In [1]:
import pandas as pd
import itertools
def read_fasta(fn):
    names, seqs, qualities = [], [], []
    with open(fn) as f:
        for lines in grouper(f, 2, ''):
            names += [lines[0].strip()[1:]]
            seqs += [lines[1].strip()]
    df = pd.DataFrame()
    df["name"] = names
    df["seq"] = seqs
    return df

def grouper(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return itertools.zip_longest(*args, fillvalue=fillvalue)


In [2]:
import ihor

model = ihor.ModelVDJ.load_model("tests/models/onegene/model_params.txt",
    "tests/models/onegene/model_marginals.txt",
    "tests/models/onegene/V_gene_CDR3_anchors.csv",
    "tests/models/onegene/J_gene_CDR3_anchors.csv")

align_params = ihor.AlignmentParameters(min_score_v= 40, min_score_j= 10, max_error_d=20)

df = read_fasta('tests/models/seq_onegene.txt')

seq = []

for l in df.seq:
    seq += [ihor.SequenceVDJ.align_sequence(ihor.Dna.from_string(l.strip()), model, align_params)]


#

In [18]:
df.name

0       nodel_noins
1      pal_ins_v, 2
2     pal_ins_d5, 1
3     pal_ins_d3, 1
4      pal_ins_j, 2
5          del_v, 4
6         del_d5, 3
7         del_d3, 1
8          del_j, 5
9       ins_vd, CCC
10      ins_vd, CCT
11      ins_dj, AAA
Name: name, dtype: object

In [33]:
infer_params = ihor.InferenceParameters(min_likelihood=1e-20, min_likelihood_error=1e-10)
res = ihor.most_likely_recombinations(5, seq[3],model, infer_params)
res[1][1].deld3

0

In [23]:
res

[(4.744677799578172e-11, <EventVDJ at 0x7fa4b42f7710>),
 (2.372338899789086e-11, <EventVDJ at 0x7fa4b4349670>),
 (1.1861694498945433e-11, <EventVDJ at 0x7fa4b42b6990>),
 (1.186169449894543e-11, <EventVDJ at 0x7fa4b42b6b70>),
 (1.1861694498945426e-11, <EventVDJ at 0x7fa4b42b6df0>)]

In [8]:
def infer_model(sequences, model, min_likelihood=1e-20, nb_rounds=6, min_likelihood_error=1e-40):
    
    for _ in range(nb_rounds):
        features = []
        for s in seq:
            features += [ihor.infer_features(s, model, infer_params)]
        new_feat = ihor.FeaturesVDJ.average(features)
        model.update(new_feat)
    return model

In [9]:
seq[0].d_genes

[<DAlignment at 0x7f822868bab0>,
 <DAlignment at 0x7f822868b750>,
 <DAlignment at 0x7f822868bc30>]

In [5]:
model.p_ins_vd

array([0.75, 0.25])

In [8]:
model.seg_ds[0].seq.get_string()

'GGGACAGGGGGC'

In [19]:
seq[0].v_genes[0].errors

[0]

In [14]:
len('GACACAGCTGTTTCCCAGACTCCAAAATACCTGGTCACACAGATGGGAAACGACAAGTCCATTAAATGTGAACAAAATCTGGGCCATGATACTATGTATTGGTATAAACAGGACTCTAAGAAATTTCTGAAGATAATGTTTAGCTACAATAACAAGGAGATCATTATAAATGAAACAGTTCCAAATCGATTCTCACCTAAATCTCCAGACAAAGCTAAATTAAATCTTCACATCAATTCCCTGGAGCTTGGTGACTCTGCTGTGTATTTCTGTGCCAGCAGCCAAGA')

287