In [1]:
import torch
import pickle
from pdeep.mhc.mhc_binding_retriever import MHCBindingRetriever
from pdeep.mhc.mhc_binding_model import ModelHlaEncoder, ModelSeqEncoder

model_version = "v0613"
pept_encoder = ModelSeqEncoder().to("cuda")
hla_encoder = ModelHlaEncoder().to("cuda")

hla_encoder.load_state_dict(torch.load(f"model/HLA_model_{model_version}.pt", map_location="cuda"))
pept_encoder.load_state_dict(torch.load(f"model/pept_model_{model_version}.pt", map_location="cuda"))


<All keys matched successfully>

In [2]:
with open(f"embeds/HLA_model_{model_version}.pt.embed", "rb") as f:
    data_dict = pickle.load(f)
    protein_df = data_dict["protein_df"]
    embeds = data_dict["embeds"]
    umap_reducer = data_dict["umap_reducer"]


In [3]:
fasta_list = "data/demo/test.fasta"
retriever = MHCBindingRetriever(
    hla_encoder, pept_encoder, protein_df, embeds, fasta_list, digested_pept_lens=(8,14)
)

In [4]:
test_alleles_list = [
    "A0201","A0301","B3801","B4405","C0202","C1203"
]

In [5]:
peptide_df = retriever.get_binding_metrics_for_self_proteins(
            test_alleles_list, dist_threshold=0.4,
            fdr=1e10, get_sequence=True
        )
peptide_df

100%|██████████| 3/3 [00:49<00:00, 16.50s/it]


Unnamed: 0,best_allele_id,best_allele_dist,best_allele_rank,best_allele_fdr,sequence
0,1,0.186641,1,0.007636,MVSPLPPPH
1,0,0.211866,0,0.004407,HCPGAALHV
2,4,0.276956,0,0.000802,GAALHVQPY
3,1,0.074507,0,0.000200,GAALHVQPYK
4,1,0.079320,0,0.001209,AALHVQPYK
...,...,...,...,...,...
181462,1,0.074775,0,0.000200,KKKLLGQFYK
181463,1,0.053455,0,0.000000,KKLLGQFYK
181464,1,0.043071,0,0.000000,KLLGQFYK
181465,0,0.065053,0,0.000000,KLLGQFYKC


In [6]:
peptide_df.to_csv('data/demo/peptide_df.tsv', sep='\t', index=False)