---
# Allele-specific model

In [1]:
import sys 
if ".." not in sys.path:
    sys.path.append("..")

import torch
from mutation_release import get_mutated_peptides
from tape import TAPETokenizer, ProteinBertConfig
from HPL.model_ft import meanTAPE

given_HLA = "HLA-B*42:01"
init_peptide = "RPGGKKKYK"

# prepare model
use_cuda = True
device = torch.device("cuda:5" if (torch.cuda.is_available() and use_cuda) else "cpu")
model_path = "/data/lujd/neoag_model/main_task/"
## gene
model1_filename = "HPL-Cluster/B_gene/main_finetune_plm_tape_B24_LR1e-05_seq_clip_fold4_ep10_221124.pkl"
## supertype
model2_filename = "HPL-Cluster/B_supertype_4201/main_finetune_plm_tape_B24_LR1e-05_seq_clip_fold4_ep9_230221.pkl"
## sequence
model3_filename = "HPL-Cluster/B_seq_more_4201/main_finetune_plm_tape_B26_LR6e-06_seq_clip_fold4_ep13_230223.pkl"
## semantic
model4_filename = "HPL-Cluster/B_semantic_equal_4201/main_finetune_plm_tape_B26_LR1e-05_seq_clip_fold4_ep18_230309.pkl"
model_names = [model1_filename,model2_filename,model3_filename,model4_filename]

print("Model preparing")
tokenizer = TAPETokenizer(vocab='iupac')
tape_config = ProteinBertConfig.from_pretrained('bert-base')
models = []
for model_name in model_names:
    model = meanTAPE(tape_config, "2mlp").to(device)
    model.load_state_dict(torch.load(model_path + model_name, map_location=device), strict = True)
    model = model.eval()
    models.append(model)
    print("load one model")
print("Model preparing done")

output_pos, output_pep = get_mutated_peptides(given_HLA, init_peptide, tokenizer, models, device,
                                                num_mutation=9, num_peptides=5, prob_limit=0.5,
                                                writein_file=True, algorithm="2a", filename="paper_case_file")

Model preparing
load one model
load one model
load one model
load one model
Model preparing done
given HLA: HLA-B*42:01, given peptide: RPGGKKKYK | binding porbability: 0.0005
Iteration-1, mutant_pool size: 19
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYF | 9 K->F | binding probability: 0.9939
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYV | 9 K->V | binding probability: 0.9968
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYM | 9 K->M | binding probability: 0.9971
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYI | 9 K->I | binding probability: 0.9981
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYL | 9 K->L | binding probability: 0.9986
Iteration-2, mutant_pool size: 95
source peptide: RPGGKKKYL, mutated peptide: RPGGKKTYL | 7 K->T | binding probability: 0.9997
source peptide: RPGGKKKYI, mutated peptide: RPGGKKPYI | 7 K->P | binding probability: 0.9998
source peptide: RPGGKKKYL, mutated peptide: RPGGKKIYL | 7 K->I | binding probability: 0.9998
source pepti

---
# Pan-specific model

In [2]:
import sys 
if ".." not in sys.path:
    sys.path.append("..")

import torch
from mutation_release import get_mutated_peptides
from tape import TAPETokenizer, ProteinBertConfig
from HPL.model_ft import meanTAPE

given_HLA = "HLA-B*42:01"
init_peptide = "RPGGKKKYK"

# prepare model
use_cuda = True
device = torch.device("cuda:5" if (torch.cuda.is_available() and use_cuda) else "cpu")
model_path = "/data/lujd/neoag_model/main_task/"
model1_filename = "HPL-Pan/cat_mean_2mlp/main_finetune_plm_tape_B32_LR3e-05_seq_clip_fold4_ep51_221104.pkl"
model_names = [model1_filename]

print("Model preparing")
tokenizer = TAPETokenizer(vocab='iupac')
tape_config = ProteinBertConfig.from_pretrained('bert-base')
models = []
for model_name in model_names:
    model = meanTAPE(tape_config, "2mlp").to(device)
    model.load_state_dict(torch.load(model_path + model_name, map_location=device), strict = True)
    model = model.eval()
    models.append(model)
    print("load one model")
print("Model preparing done")

output_pos, output_pep = get_mutated_peptides(given_HLA, init_peptide, tokenizer, models, device,
                                                num_mutation=9, num_peptides=5, prob_limit=0.5,
                                                writein_file=True, algorithm="2a", filename="paper_case_file")

Model preparing
load one model
Model preparing done
given HLA: HLA-B*42:01, given peptide: RPGGKKKYK | binding porbability: 0.0008
Iteration-1, mutant_pool size: 19
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYF | 9 K->F | binding probability: 0.9803
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYV | 9 K->V | binding probability: 0.9935
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYM | 9 K->M | binding probability: 0.9954
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYI | 9 K->I | binding probability: 0.9965
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYL | 9 K->L | binding probability: 0.9978
Iteration-2, mutant_pool size: 95
source peptide: RPGGKKKYI, mutated peptide: RPGGKKVYI | 7 K->V | binding probability: 0.9996
source peptide: RPGGKKKYV, mutated peptide: RPGGKKVYV | 7 K->V | binding probability: 0.9997
source peptide: RPGGKKKYL, mutated peptide: RPGGKKVYL | 7 K->V | binding probability: 0.9997
source peptide: RPGGKKKYV, mutated peptide: RPGGKKNYV | 7