### model inference on GPU (NVIDIA GeForce RTX 3090)

In [2]:
import torch
from mutation_release import get_mutated_peptides
from tape import TAPETokenizer, ProteinBertConfig
import sys 
if ".." not in sys.path:
    sys.path.append("..")
from model_ft import meanTAPE

given_HLA = "HLA-B*42:01"
init_peptides = ["RPGGKKKYK","RLRPGGKKK","KYRLKHIVW","KYQLKHIVW",
                 "KEKGGLDGL","FRYNGLIHR","FSFPQITLW","ERYLKDQQL",
                 "SDYLELDTI","DRFFKTLRA","FVNYNFTLV","RTSKAPLER"]

# prepare model
use_cuda = True
device = torch.device("cuda:0" if (torch.cuda.is_available() and use_cuda) else "cpu")
model_path = "/data/lujd/neoag_model/main_task/"
model1_filename = "TAPE_ft/cat_mean_2mlp/main_finetune_plm_tape_B32_LR3e-05_seq_clip_fold4_ep51_221104.pkl"
model_names = [model1_filename]

print("Model preparing")
tokenizer = TAPETokenizer(vocab='iupac')
tape_config = ProteinBertConfig.from_pretrained('bert-base')
models = []
for model_name in model_names:
    model = meanTAPE(tape_config, "2mlp").to(device)
    model.load_state_dict(torch.load(model_path + model_name, map_location=device), strict = True)
    model = model.eval()
    models.append(model)
    print("load one model")
print("Model preparing done")

run_times_all_pep1 = []
for ind, init_peptide in enumerate(init_peptides):
    output_pos, output_pep, run_times_per_pep = get_mutated_peptides(given_HLA, init_peptide, tokenizer, models, device,
                                                num_mutation=9, num_peptides=5, prob_limit=0.5,
                                                writein_file=True, record_time=True, algorithm="2a", filename="test_time_file_gpu")
    run_times_all_pep1.append(run_times_per_pep)
    print(ind+1, len(run_times_per_pep), "\n")

Model preparing
load one model
Model preparing done
given HLA: HLA-B*42:01, given peptide: RPGGKKKYK | binding porbability: 0.0008
Iteration-1, mutant_pool size: 19
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYF | 9 K->F | binding probability: 0.9803
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYV | 9 K->V | binding probability: 0.9935
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYM | 9 K->M | binding probability: 0.9954
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYI | 9 K->I | binding probability: 0.9965
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYL | 9 K->L | binding probability: 0.9978
Iteration-2, mutant_pool size: 95
source peptide: RPGGKKKYI, mutated peptide: RPGGKKVYI | 7 K->V | binding probability: 0.9996
source peptide: RPGGKKKYV, mutated peptide: RPGGKKVYV | 7 K->V | binding probability: 0.9997
source peptide: RPGGKKKYL, mutated peptide: RPGGKKVYL | 7 K->V | binding probability: 0.9997
source peptide: RPGGKKKYV, mutated peptide: RPGGKKNYV | 7

In [3]:
run_times_all_pep1

[array([0.20142889, 1.00304198, 1.81162143, 2.47599936, 3.07879829,
        3.59607053, 4.0514009 , 4.43625474, 4.74108458]),
 array([0.19694042, 1.02270889, 1.74894238, 2.42321301, 3.02040386,
        3.55259633, 4.00125146, 4.37829351, 4.68915987]),
 array([0.19834256, 0.99971032, 1.73231888, 2.43266201, 3.02256918,
        3.538028  , 3.99600959, 4.38147211, 4.68690729]),
 array([0.20195079, 1.0016861 , 1.73499966, 2.38905931, 2.98396492,
        3.51523137, 3.96212173, 4.34229612, 4.64763689]),
 array([0.19248438, 0.99996161, 1.73839855, 2.41453648, 3.03129673,
        3.55605769, 4.00644469, 4.38296533, 4.69621229]),
 array([0.19403028, 0.99103594, 1.73120451, 2.38866282, 2.98803353,
        3.50750208, 3.96488214, 4.34109187, 4.64631319]),
 [],
 array([0.19587564, 1.00587606, 1.73446178, 2.39340854, 2.99421549,
        3.53712296, 3.9827106 , 4.36451054, 4.67224169]),
 [],
 array([0.19728756, 0.99597645, 1.72379088, 2.38772488, 2.97224474,
        3.49039721, 3.94653463, 4.319895

In [4]:
del run_times_all_pep1[6]
del run_times_all_pep1[7]

Round-1: 0.227
Round-2: 1.141
Round-3: 1.983
Round-4: 2.726
Round-5: 3.383
Round-6: 3.962
Round-7: 4.462
Round-8: 4.874
Round-9: 5.201

In [5]:
import numpy as np
run_times_all_pep1 = np.array(run_times_all_pep1)
run_times_ave_pep1 = np.average(run_times_all_pep1, axis=0)
for ind, run_time in enumerate(run_times_ave_pep1):
    print("Round-{}: {:.3f}".format(ind+1, run_time))       # 2233M

Round-1: 0.198
Round-2: 1.003
Round-3: 1.743
Round-4: 2.410
Round-5: 3.007
Round-6: 3.532
Round-7: 3.986
Round-8: 4.366
Round-9: 4.674


In [6]:
import torch
from mutation_release import get_mutated_peptides
from tape import TAPETokenizer, ProteinBertConfig
import sys 
if ".." not in sys.path:
    sys.path.append("..")
from model_ft import meanTAPE

given_HLA = "HLA-B*42:01"
init_peptides = ["RPGGKKKYK","RLRPGGKKK","KYRLKHIVW","KYQLKHIVW",
                 "KEKGGLDGL","FRYNGLIHR","FSFPQITLW","ERYLKDQQL",
                 "SDYLELDTI","DRFFKTLRA","FVNYNFTLV","RTSKAPLER"]

# prepare model
use_cuda = True
device = torch.device("cuda:0" if (torch.cuda.is_available() and use_cuda) else "cpu")
model_path = "/data/lujd/neoag_model/main_task/"
## gene
model1_filename = "TAPE_ft4/B_gene/main_finetune_plm_tape_B24_LR1e-05_seq_clip_fold4_ep10_221124.pkl"
## supertype
model2_filename = "TAPE_ft4/B_supertype_4201/main_finetune_plm_tape_B24_LR1e-05_seq_clip_fold4_ep9_230221.pkl"
## sequence
model3_filename = "TAPE_ft4/B_seq_more_4201/main_finetune_plm_tape_B26_LR6e-06_seq_clip_fold4_ep13_230223.pkl"
## semantic
model4_filename = "TAPE_ft4/B_semantic_equal_4201/main_finetune_plm_tape_B26_LR1e-05_seq_clip_fold4_ep18_230309.pkl"
model_names = [model1_filename, model2_filename, model3_filename, model4_filename]

print("Model preparing")
tokenizer = TAPETokenizer(vocab='iupac')
tape_config = ProteinBertConfig.from_pretrained('bert-base')
models = []
for model_name in model_names:
    model = meanTAPE(tape_config, "2mlp").to(device)
    model.load_state_dict(torch.load(model_path + model_name, map_location=device), strict = True)
    model = model.eval()
    models.append(model)
    print("load one model")
print("Model preparing done")

run_times_all_pep4 = []
for ind, init_peptide in enumerate(init_peptides):
    output_pos, output_pep, run_times_per_pep = get_mutated_peptides(given_HLA, init_peptide, tokenizer, models, device,
                                                num_mutation=9, num_peptides=5, prob_limit=0.5,
                                                writein_file=True, record_time=True, algorithm="2a", filename="test_time_file_gpu")
    run_times_all_pep4.append(run_times_per_pep)
    print(ind+1, len(run_times_per_pep), "\n")

Model preparing
load one model
load one model
load one model
load one model
Model preparing done
given HLA: HLA-B*42:01, given peptide: RPGGKKKYK | binding porbability: 0.0005
Iteration-1, mutant_pool size: 19
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYF | 9 K->F | binding probability: 0.9939
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYV | 9 K->V | binding probability: 0.9968
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYM | 9 K->M | binding probability: 0.9971
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYI | 9 K->I | binding probability: 0.9981
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYL | 9 K->L | binding probability: 0.9986
Iteration-2, mutant_pool size: 95
source peptide: RPGGKKKYL, mutated peptide: RPGGKKTYL | 7 K->T | binding probability: 0.9997
source peptide: RPGGKKKYI, mutated peptide: RPGGKKPYI | 7 K->P | binding probability: 0.9998
source peptide: RPGGKKKYL, mutated peptide: RPGGKKIYL | 7 K->I | binding probability: 0.9998
source pepti

In [7]:
run_times_all_pep4

[array([ 0.81242943,  4.0970602 ,  7.17020655,  9.92374063, 12.37369514,
        14.44123673, 16.2461555 , 17.75196695, 18.98545027]),
 array([ 0.79330206,  4.07866335,  7.00271559,  9.63354373, 12.02256393,
        14.08752108, 15.873245  , 17.36532569, 18.59013057]),
 array([ 0.76825047,  4.02557206,  6.8990097 ,  9.50213504, 11.84709954,
        13.97777152, 15.75213766, 17.24568462, 18.46461463]),
 array([ 0.77687168,  3.94166112,  6.90265822,  9.56378317, 11.92847753,
        14.02515864, 15.8187921 , 17.33998275, 18.58035851]),
 array([ 0.79266834,  4.00780773,  7.00376487,  9.64776587, 12.00685954,
        14.07427835, 15.90266919, 17.4208746 , 18.64485025]),
 array([ 0.77583408,  3.95997715,  6.87123537,  9.52265763, 11.87220263,
        13.99731874, 15.81463552, 17.37090325, 18.61218786]),
 [],
 array([ 0.87223768,  4.70956492,  8.2075758 , 11.34313416, 14.10436177,
        16.49423671, 18.55473781, 20.23444319, 21.54183245]),
 [],
 array([ 0.97738075,  4.78175497,  8.25391722

In [8]:
del run_times_all_pep4[6]
del run_times_all_pep4[7]

In [9]:
import numpy as np
run_times_all_pep4 = np.array(run_times_all_pep4)
run_times_ave_pep4 = np.average(run_times_all_pep4, axis=0)
for ind, run_time in enumerate(run_times_ave_pep4):
    print("Round-{}: {:.3f} {:.2f}".format(ind+1, run_time, run_time))

Round-1: 0.850 0.85
Round-2: 4.326 4.33
Round-3: 7.488 7.49
Round-4: 10.327 10.33
Round-5: 12.855 12.85
Round-6: 15.073 15.07
Round-7: 16.967 16.97
Round-8: 18.547 18.55
Round-9: 19.809 19.81


### model inference on CPU

In [10]:
import torch
from mutation_release import get_mutated_peptides
from tape import TAPETokenizer, ProteinBertConfig
import sys 
if ".." not in sys.path:
    sys.path.append("..")
from model_ft import meanTAPE

given_HLA = "HLA-B*42:01"
init_peptides = ["RPGGKKKYK","RLRPGGKKK","KYRLKHIVW","KYQLKHIVW",
                 "KEKGGLDGL","FRYNGLIHR","FSFPQITLW","ERYLKDQQL",
                 "SDYLELDTI","DRFFKTLRA","FVNYNFTLV","RTSKAPLER"]

# prepare model
use_cuda = False
device = torch.device("cuda:0" if (torch.cuda.is_available() and use_cuda) else "cpu")
model_path = "/data/lujd/neoag_model/main_task/"
model1_filename = "TAPE_ft/cat_mean_2mlp/main_finetune_plm_tape_B32_LR3e-05_seq_clip_fold4_ep51_221104.pkl"
model_names = [model1_filename]

print("Model preparing")
tokenizer = TAPETokenizer(vocab='iupac')
tape_config = ProteinBertConfig.from_pretrained('bert-base')
models = []
for model_name in model_names:
    model = meanTAPE(tape_config, "2mlp").to(device)
    model.load_state_dict(torch.load(model_path + model_name, map_location=device), strict = True)
    model = model.eval()
    models.append(model)
    print("load one model")
print("Model preparing done")

run_times_all_pep1_cpu = []
for ind, init_peptide in enumerate(init_peptides):
    output_pos, output_pep, run_times_per_pep = get_mutated_peptides(given_HLA, init_peptide, tokenizer, models, device,
                                                num_mutation=9, num_peptides=5, prob_limit=0.5,
                                                writein_file=True, record_time=True, algorithm="2a", filename="test_time_file_cpu")
    run_times_all_pep1_cpu.append(run_times_per_pep)
    print(ind+1, len(run_times_per_pep), "\n")

Model preparing
load one model
Model preparing done
given HLA: HLA-B*42:01, given peptide: RPGGKKKYK | binding porbability: 0.0008
Iteration-1, mutant_pool size: 19
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYF | 9 K->F | binding probability: 0.9803
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYV | 9 K->V | binding probability: 0.9935
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYM | 9 K->M | binding probability: 0.9954
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYI | 9 K->I | binding probability: 0.9965
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYL | 9 K->L | binding probability: 0.9978
Iteration-2, mutant_pool size: 95
source peptide: RPGGKKKYI, mutated peptide: RPGGKKVYI | 7 K->V | binding probability: 0.9996
source peptide: RPGGKKKYV, mutated peptide: RPGGKKVYV | 7 K->V | binding probability: 0.9997
source peptide: RPGGKKKYL, mutated peptide: RPGGKKVYL | 7 K->V | binding probability: 0.9997
source peptide: RPGGKKKYV, mutated peptide: RPGGKKNYV | 7

In [11]:
run_times_all_pep1_cpu

[array([ 3.59277844, 16.92497993, 28.88256621, 39.70403266, 49.85267186,
        59.09795594, 68.34219551, 75.20602918, 81.38100696]),
 array([ 3.52070904, 16.6349709 , 27.83393097, 38.83088493, 48.50677919,
        57.41679716, 65.9058187 , 73.96573114, 80.56720901]),
 array([ 2.55275846, 16.86469603, 28.00692725, 38.94508982, 48.28660059,
        56.89809036, 64.90238571, 72.4306159 , 78.73810673]),
 array([ 3.43347979, 16.84638095, 28.64131474, 39.66940236, 49.67903662,
        57.82918191, 65.95715332, 74.34115291, 80.49348545]),
 array([ 3.21322346, 16.31402397, 26.84101796, 37.77554703, 48.76957631,
        58.75168109, 67.52723479, 75.63227677, 82.39269257]),
 array([ 2.70303917, 13.74963427, 24.2804234 , 34.7916677 , 45.01891184,
        53.51717949, 62.43445373, 69.15451407, 76.32931447]),
 [],
 array([ 2.39944172, 15.1062398 , 26.46959472, 36.88094521, 46.94209218,
        56.662184  , 65.51851773, 73.75371265, 79.70025945]),
 [],
 array([ 2.55699277, 17.08585691, 30.09940147

In [12]:
del run_times_all_pep1_cpu[6]
del run_times_all_pep1_cpu[7]

In [13]:
import numpy as np
run_times_all_pep1_cpu = np.array(run_times_all_pep1_cpu)
run_times_ave_pep1_cpu = np.average(run_times_all_pep1_cpu, axis=0)
for ind, run_time in enumerate(run_times_ave_pep1_cpu):
    print("Round-{}: {:.3f}".format(ind+1, run_time))

Round-1: 2.935
Round-2: 16.235
Round-3: 27.923
Round-4: 38.950
Round-5: 49.042
Round-6: 58.205
Round-7: 66.879
Round-8: 74.404
Round-9: 80.934


In [14]:
import torch
from mutation_release import get_mutated_peptides
from tape import TAPETokenizer, ProteinBertConfig
import sys 
if ".." not in sys.path:
    sys.path.append("..")
from model_ft import meanTAPE

given_HLA = "HLA-B*42:01"
init_peptides = ["RPGGKKKYK","RLRPGGKKK","KYRLKHIVW","KYQLKHIVW",
                 "KEKGGLDGL","FRYNGLIHR","FSFPQITLW","ERYLKDQQL",
                 "SDYLELDTI","DRFFKTLRA","FVNYNFTLV","RTSKAPLER"]

# prepare model
use_cuda = False
device = torch.device("cuda:0" if (torch.cuda.is_available() and use_cuda) else "cpu")
model_path = "/data/lujd/neoag_model/main_task/"
## gene
model1_filename = "TAPE_ft4/B_gene/main_finetune_plm_tape_B24_LR1e-05_seq_clip_fold4_ep10_221124.pkl"
## supertype
model2_filename = "TAPE_ft4/B_supertype_4201/main_finetune_plm_tape_B24_LR1e-05_seq_clip_fold4_ep9_230221.pkl"
## sequence
model3_filename = "TAPE_ft4/B_seq_more_4201/main_finetune_plm_tape_B26_LR6e-06_seq_clip_fold4_ep13_230223.pkl"
## semantic
model4_filename = "TAPE_ft4/B_semantic_equal_4201/main_finetune_plm_tape_B26_LR1e-05_seq_clip_fold4_ep18_230309.pkl"
model_names = [model1_filename, model2_filename, model3_filename, model4_filename]

print("Model preparing")
tokenizer = TAPETokenizer(vocab='iupac')
tape_config = ProteinBertConfig.from_pretrained('bert-base')
models = []
for model_name in model_names:
    model = meanTAPE(tape_config, "2mlp").to(device)
    model.load_state_dict(torch.load(model_path + model_name, map_location=device), strict = True)
    model = model.eval()
    models.append(model)
    print("load one model")
print("Model preparing done")

run_times_all_pep4_cpu = []
for ind, init_peptide in enumerate(init_peptides):
    output_pos, output_pep, run_times_per_pep = get_mutated_peptides(given_HLA, init_peptide, tokenizer, models, device,
                                                num_mutation=9, num_peptides=5, prob_limit=0.5,
                                                writein_file=True, record_time=True, algorithm="2a", filename="test_time_file_cpu")
    run_times_all_pep4_cpu.append(run_times_per_pep)
    print(ind+1, len(run_times_per_pep), "\n")

Model preparing
load one model
load one model
load one model
load one model
Model preparing done
given HLA: HLA-B*42:01, given peptide: RPGGKKKYK | binding porbability: 0.0005
Iteration-1, mutant_pool size: 19
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYF | 9 K->F | binding probability: 0.9939
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYV | 9 K->V | binding probability: 0.9968
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYM | 9 K->M | binding probability: 0.9971
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYI | 9 K->I | binding probability: 0.9981
source peptide: RPGGKKKYK, mutated peptide: RPGGKKKYL | 9 K->L | binding probability: 0.9986
Iteration-2, mutant_pool size: 95
source peptide: RPGGKKKYL, mutated peptide: RPGGKKTYL | 7 K->T | binding probability: 0.9997
source peptide: RPGGKKKYI, mutated peptide: RPGGKKPYI | 7 K->P | binding probability: 0.9998
source peptide: RPGGKKKYL, mutated peptide: RPGGKKIYL | 7 K->I | binding probability: 0.9998
source pepti

KeyboardInterrupt: 

In [15]:
del run_times_all_pep4_cpu[6]
del run_times_all_pep4_cpu[7]

IndexError: list assignment index out of range

In [17]:
run_times_all_pep4_cpu

array([[ 14.64189076,  72.16515541, 127.51203227, 175.02054071,
        222.67848277, 269.14470601, 312.77631164, 348.53119636,
        386.59478426],
       [ 11.91085625,  75.88129306, 130.73677397, 181.02611113,
        227.0818553 , 270.42692995, 310.63873959, 349.69878697,
        389.97785401],
       [ 12.31398511,  70.17629743, 127.1750803 , 178.11993814,
        226.1125834 , 272.37187481, 314.81752634, 356.36514235,
        393.06425714],
       [ 12.42112446,  84.35516787, 149.2895937 , 202.61623454,
        253.82470512, 301.55678177, 344.84365869, 393.10438418,
        438.31745744],
       [ 16.23779798,  91.85987449, 164.11976814, 230.85335064,
        293.79268861, 352.64112496, 407.63591981, 458.23652411,
        506.96829987]])

In [16]:
import numpy as np
run_times_all_pep4_cpu = np.array(run_times_all_pep4_cpu)
run_times_ave_pep4_cpu = np.average(run_times_all_pep4_cpu, axis=0)
for ind, run_time in enumerate(run_times_ave_pep4_cpu):
    print("Round-{}: {:.3f}".format(ind+1, run_time))

Round-1: 13.505
Round-2: 78.888
Round-3: 139.767
Round-4: 193.527
Round-5: 244.698
Round-6: 293.228
Round-7: 338.142
Round-8: 381.187
Round-9: 422.985
