# Transfer learning of sample-specific HLA-I models

In [1]:
fasta = '/Users/zengwenfeng/Workspace/Data/fasta/irtfusion.fasta'

#### Load training HLA peptides

It could be a tsv/csv/txt file containing sample-specific HLA-I peptides. It must contain 'sequence' column. 

In [2]:
import pandas as pd
train_seq_df = pd.DataFrame({
    'sequence': [
        'ACDEFGHIKLMNPQ',
        'ACDEFGHI',
        'ACDEFGHIK',
        'EFGHIKLMNPQ',
        'AHIKLMNPQ',
    ]
})
train_seq_df['nAA'] = train_seq_df.sequence.str.len()
train_seq_df

Unnamed: 0,sequence,nAA
0,ACDEFGHIKLMNPQ,14
1,ACDEFGHI,8
2,ACDEFGHIK,9
3,EFGHIKLMNPQ,11
4,AHIKLMNPQ,9


#### Initialize the model and load the pretrained model

In [3]:
from peptdeep_hla.HLA_class_I import HLA_Class_I_Classifier
model = HLA_Class_I_Classifier(
    fasta_files=[fasta]
)
model.get_parameter_num()

1669697

In [4]:
from peptdeep_hla.HLA_class_I import pretrained_HLA1
model.load(pretrained_HLA1)
pretrained_HLA1

'/Users/zengwenfeng/Workspace/PeptDeep-HLA/peptdeep_hla/pretrained_models/HLA1_IEDB.pt'

#### Train by the training peptides

The non-HLA peptides are automatically sampled from the fasta file as the negative training data.

In [5]:
model.train(
    train_seq_df, 
    epoch=4, warmup_epoch=2, 
    verbose=True
)

[Training] Epoch=1, lr=5e-05, loss=1.9179210364818573
[Training] Epoch=2, lr=0.0001, loss=1.48900505900383
[Training] Epoch=3, lr=5e-05, loss=0.7419157139956951
[Training] Epoch=4, lr=0.0, loss=0.4912389740347862


#### Predict HLA-I peptides from fasta

In [6]:
hla_df = model.predict_from_proteins(prob_threshold=0.7)
hla_df

100%|██████████| 1/1 [00:00<00:00,  1.33it/s]


Unnamed: 0,start_pos,end_pos,nAA,HLA_prob_pred,sequence
0,53,61,8,0.760860,GLDAKTPV
1,54,62,8,0.832445,LDAKTPVI
2,61,69,8,0.707884,ISGGPYEY
3,66,74,8,0.818252,YEYRVEAT
4,67,75,8,0.890302,EYRVEATF
...,...,...,...,...,...
189,78,92,14,0.749335,ESNAKTPVITGAPY
190,79,93,14,0.866187,SNAKTPVITGAPYE
191,80,94,14,0.921481,NAKTPVITGAPYEY
192,81,95,14,0.775341,AKTPVITGAPYEYR
