In [1]:
import pandas as pd
from tempfile import NamedTemporaryFile
from epytope.Core.AntigenImmuneReceptor import AntigenImmuneReceptor
from epytope.Core.TCREpitope import TCREpitope
from epytope.Core.ImmuneReceptorChain import ImmuneReceptorChain
from epytope.TCRSpecificityPrediction import TCRSpecificityPredictorFactory, ML
import os
import scirpy as ir
from epytope.IO.FileReader import process_dataset_TCR

In [2]:
TRA1 = ImmuneReceptorChain(chain_type="TRA", v_gene="TRAV26-1", d_gene="", j_gene="TRAJ43",
                                 cdr3="CIVRAPGRADMRF")
TRB1 = ImmuneReceptorChain(chain_type="TRB", v_gene="TRBV13", d_gene="", j_gene="TRBJ1-5",
                        cdr3="CASSYLPGQGDHYSNQPQHF")
TRA2 = ImmuneReceptorChain(chain_type="TRA", v_gene="TRAV20", d_gene="", j_gene="TRAJ28", cdr3="CAVPSGAGSYQLTF")
TRB2 = ImmuneReceptorChain(chain_type="TRB", v_gene="TRBV13", d_gene="", j_gene="TRBJ1-5",
                           cdr3="CASSFEPGQGFYSNQPQHF")
epitope1 = TCREpitope("FLKEKGGL", mhc="HLA-B*08")
epitope2 = TCREpitope("SQLLNAKYL", mhc="HLA-B*08")
TCR1 = AntigenImmuneReceptor(receptor_id="1", chains=[TRA1, TRB1], cell_type="CD8")
TCR2 = AntigenImmuneReceptor(receptor_id="2", chains=[TRA2, TRB2], cell_type="CD8")
TCRs = [TCR1, TCR2]
epitopes = [epitope1, epitope2]
dataset = pd.DataFrame({"Receptor_ID": 1, "TRA": "CAVSAASGGSYIPTF", "TRB": "CASSFSGNTGELFF", "TRAV": "TRAV3", "TRAJ": "TRAJ6",
                        "TRBV": "TRBV12-3", "TRBJ": "TRBJ2-2", "T-Cell-Type": "CD8", "Peptide": "RAKFKQLL",
                        "MHC": "HLA-B*08", "Species": "", "Antigen.species": "", "Tissue": ""}, index=[0])


vdjdb = "/home/mahmoud/Documents/Github/GoBi/TCR/epytope/Data/TCR/vdjdb_full.txt"
McPAS = "/home/mahmoud/Documents/Github/GoBi/TCR/epytope/Data/TCR/McPAS-TCR.csv"
IEDB = "/home/mahmoud/Documents/Github/GoBi/TCR/epytope/Data/TCR/tcell_receptor_table_export_1660640162.csv"

In [3]:
for name,version in TCRSpecificityPredictorFactory.available_methods().items():
    print(name, ",".join(version))

ergo-ii  


In [4]:
mo = TCRSpecificityPredictorFactory("ergo-ii")
print("Test binding specificity for each TCR to each epitope\n")
mo.predict(peptides=epitopes, TCRs=TCRs, 
           repository= "/home/mahmoud/Documents/epytope/epytope/epytope/TCRSpecificityPrediction/Models/ERGO-II", 
           all=True, trained_on="vdjdb")

Test binding specificity for each TCR to each epitope



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ERGO-II
Receptor_ID,TRA,TRB,Peptide,Unnamed: 4_level_1
1,CIVRAPGRADMRF,CASSYLPGQGDHYSNQPQHF,FLKEKGGL,0.781
1,CIVRAPGRADMRF,CASSYLPGQGDHYSNQPQHF,SQLLNAKYL,0.7095
2,CAVPSGAGSYQLTF,CASSFEPGQGFYSNQPQHF,FLKEKGGL,0.6858
2,CAVPSGAGSYQLTF,CASSFEPGQGFYSNQPQHF,SQLLNAKYL,0.6635


In [5]:
print("Test binding specificity for TCRs to the corresponding epitopes in the same passed order\n\n")
mo.predict(peptides=epitopes, TCRs=TCRs, 
           repository= "/home/mahmoud/Documents/epytope/epytope/epytope/TCRSpecificityPrediction/Models/ERGO-II", 
           all=False, trained_on="vdjdb")

Test binding specificity for TCRs to the corresponding epitopes in the same passed order




Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ERGO-II
Receptor_ID,TRA,TRB,Peptide,Unnamed: 4_level_1
1,CIVRAPGRADMRF,CASSYLPGQGDHYSNQPQHF,FLKEKGGL,0.781
2,CAVPSGAGSYQLTF,CASSFEPGQGFYSNQPQHF,SQLLNAKYL,0.6635


In [6]:
print("Testing on predefined dataset\n")
mo.predict_from_dataset(repository= "/home/mahmoud/Documents/epytope/epytope/epytope/TCRSpecificityPrediction/Models/ERGO-II", 
                        df=dataset)

Testing on predefined dataset



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ERGO-II
Receptor_ID,TRA,TRB,Peptide,Unnamed: 4_level_1
1,CAVSAASGGSYIPTF,CASSFSGNTGELFF,RAKFKQLL,0.7402


In [7]:
print("Testing on vdjdb\n")
mo.predict_from_dataset(repository= "/home/mahmoud/Documents/epytope/epytope/epytope/TCRSpecificityPrediction/Models/ERGO-II", 
                        path=vdjdb, source="vdjdb")

Testing on vdjdb



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ERGO-II
Receptor_ID,TRA,TRB,Peptide,Unnamed: 4_level_1
,,CAAADEEIGNQPQHF,ATDALMTGY,0.8233
,,CAAGGQFYGYTF,KAFSPEVIPMF,0.6466
,,CACLLPYEQYF,GPGHKARVL,0.5909
,,CAEGGRDYGYTF,KAFSPEVIPMF,0.5914
,,CAFLGGSGANVLTF,RLQSLQTYV,0.5525
...,...,...,...,...
ex,,CASSSRTSGGTDTQYF,FRDYVDRFYKTLRAEQASQE,0.7710
lung10_12.clone,CAEGARDSNYQLIW,CASSAQANQPQHF,GILGFVFTL,0.0006
lung10_15.clone,CATDGGGGSQGNLIF,CASSFRSTDTQYF,GILGFVFTL,0.0537
lung10_17.clone,CAGAYGGSQGNLIF,CASSSRSSGEQYF,GILGFVFTL,0.0353


In [8]:
print("Testing on McPAS\n")
mo.predict_from_dataset(repository= "/home/mahmoud/Documents/epytope/epytope/epytope/TCRSpecificityPrediction/Models/ERGO-II", 
                        path=McPAS, source="mcpas")

Testing on McPAS



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ERGO-II
Receptor_ID,TRA,TRB,Peptide,Unnamed: 4_level_1
0,,CASSDAGANTEVF,IKAVYNFATCG,0.7068
1,,CASSDAGAYAEQF,IKAVYNFATCG,0.7506
2,,CASSDAGGAAEVF,IKAVYNFATCG,0.6717
3,,CASSDAGHSPLYF,IKAVYNFATCG,0.2642
4,,CASSDAWGGAEQYF,IKAVYNFATCG,0.2566
...,...,...,...,...
39027,CASMDSNYQLIW,CSGKLAGDRNEQFF,FLCMKALLL,0.1701
39029,CATDAEGNNRLAF,CASSIFGGGLGEQFF,FLCMKALLL,0.0303
39030,CGAVGYQKVTF,CALNGEISYNEQFF,FLCMKALLL,0.0160
39031,CAVIWYNNNDMRF,CASSQGVNTGELFF,FLCMKALLL,0.0082


In [9]:
print("Testing on IEDB\n")
mo.predict_from_dataset(repository= "/home/mahmoud/Documents/epytope/epytope/epytope/TCRSpecificityPrediction/Models/ERGO-II", 
                        path=IEDB, source="IEDB")

Testing on IEDB



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ERGO-II
Receptor_ID,TRA,TRB,Peptide,Unnamed: 4_level_1
57,IVVRSSNTGKLI,ASSQDRDTQY,VMAPRTLIL,0.5475
58,,ASSLGQAYEQY,EEYLKAWTF,0.0566
58,,ASSLGQAYEQY,EEYLQAFTY,0.0062
58,,ASSLGQAYEQY,FLRGRAYGL,0.0033
58,,ASSLGQAYEQY,FLRGRFYGL,0.0158
...,...,...,...,...
202530,AVDNFNKFY,ASSSQGGYGYT,KVDPIGHVY,0.1099
202531,AGSGSRLT,ASSFDRGYGYT,KVDPIGHVY,0.0016
202532,AFTELNSGGSNYKLT,ASSLSGGLLRTGELF,FVVPYMIYLL,0.3506
202533,ASSGGNTPLV,ASSFGGAYEQY,VQIISCQY,0.6501


In [10]:
df = ir.datasets.wu2020().obs
# get all TCR seqs in scirpy format
df = process_dataset_TCR(df=df, source="scirpy")
df = df[["Receptor_ID", 'TRA', 'TRB', "TRAV", "TRAJ", "TRBV", "TRBJ", "T-Cell-Type", "Species", "Antigen.species", "Tissue"]]
df2 = pd.DataFrame({"Peptide": [str(pep) for pep in epitopes],
                    "MHC": [pep.mhc for pep in epitopes]})
# map each TCR seq to each epitope in the epitopes list
df = pd.merge(df, df2, how='cross')
df = df[["Receptor_ID", 'TRA', 'TRB', "TRAV", "TRAJ", "TRBV", "TRBJ", "T-Cell-Type", "Peptide", "MHC", "Species",
         "Antigen.species", "Tissue"]]
print("Testing scirpy\n")
mo.predict_from_dataset(repository= "/home/mahmoud/Documents/epytope/epytope/epytope/TCRSpecificityPrediction/Models/ERGO-II", 
                        df=df)

Testing scirpy



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ERGO-II
Receptor_ID,TRA,TRB,Peptide,Unnamed: 4_level_1
0,CALSDQVDDKLIF,CASSGGYYNEQFF,FLKEKGGL,0.2079
0,CALSDQVDDKLIF,CASSGGYYNEQFF,SQLLNAKYL,0.0748
1,,CASSPVSVLASSYEQYF,FLKEKGGL,0.6005
1,,CASSPVSVLASSYEQYF,SQLLNAKYL,0.5994
3,CALDTGGGNKLTF,CASSESQGQEKLFF,FLKEKGGL,0.1881
...,...,...,...,...
141618,CAASPAGSARQLTF,CASSEYKRHTDTQYF,SQLLNAKYL,0.0019
141620,,CASSIGLRDIQYF,FLKEKGGL,0.6148
141620,,CASSIGLRDIQYF,SQLLNAKYL,0.6638
141621,CAVNMGDMRF,CASSPRGGGPNEQYF,FLKEKGGL,0.0174
