In [1]:
import pandas as pd
import itertools
def read_fasta(fn):
    names, seqs, qualities = [], [], []
    with open(fn) as f:
        for lines in grouper(f, 2, ''):
            names += [lines[0].strip()[1:]]
            seqs += [lines[1].strip()]
    df = pd.DataFrame()
    df["name"] = names
    df["seq"] = seqs
    return df

def grouper(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return itertools.zip_longest(*args, fillvalue=fillvalue)


In [2]:
import ihor

model = ihor.ModelVDJ.load_model("ihor/tests/models/onegene/model_params.txt",
    "ihor/tests/models/onegene/model_marginals.txt",
    "ihor/tests/models/onegene/V_gene_CDR3_anchors.csv",
    "ihor/tests/models/onegene/J_gene_CDR3_anchors.csv")

align_params = ihor.AlignmentParameters(min_score_v= 40, min_score_j= 10, max_error_d=20)

df = read_fasta('ihor/tests/models/seq_onegene.txt')

seq = []

for l in df.seq:
    seq += [ihor.SequenceVDJ.align_sequence(ihor.Dna.from_string(l.strip()), model, align_params)]


#

In [3]:
model.range_del_v

(-2, 5)

In [10]:
infer_params = ihor.InferenceParameters(min_likelihood=1e-20, min_likelihood_error=1e-10)
res = ihor.most_likely_recombinations(5, seq[0],model, infer_params)
res[0][1].d_start_seq

28

In [5]:
def infer_model(sequences, model, min_likelihood=1e-20, nb_rounds=6, min_likelihood_error=1e-40):
    
    for _ in range(nb_rounds):
        features = []
        for s in seq:
            features += [ihor.infer_features(s, model, infer_params)]
        new_feat = ihor.FeaturesVDJ.average(features)
        model.update(new_feat)
    return model

In [21]:
seq[0].d_genes

[]

In [5]:
model.p_ins_vd

array([0.75, 0.25])

In [8]:
model.seg_ds[0].seq.get_string()

'GGGACAGGGGGC'

In [19]:
seq[0].v_genes[0].errors

[0]

In [14]:
len('GACACAGCTGTTTCCCAGACTCCAAAATACCTGGTCACACAGATGGGAAACGACAAGTCCATTAAATGTGAACAAAATCTGGGCCATGATACTATGTATTGGTATAAACAGGACTCTAAGAAATTTCTGAAGATAATGTTTAGCTACAATAACAAGGAGATCATTATAAATGAAACAGTTCCAAATCGATTCTCACCTAAATCTCCAGACAAAGCTAAATTAAATCTTCACATCAATTCCCTGGAGCTTGGTGACTCTGCTGTGTATTTCTGTGCCAGCAGCCAAGA')

287