# Dr. Sormanni Presentation Region

#### Build Model

In [1]:
from TransformerBeta import *

In [2]:
# build model wide
query_size, key_size, value_size, num_hiddens = 512, 512, 512, 512
num_layers, dropout = 4, 0.1
lr, num_epochs, batch_size, label_smoothing = 0.0004, 500, 6000, 0.1
ffn_num_input, ffn_num_hiddens, num_heads = 512, 2048, 8

norm_shape = [512] # 32 corresponds to the dim of such number to normalize
device = d2l.try_gpu()


encoder_wide = TransformerEncoder(
	len(amino_dict), key_size, query_size, value_size, num_hiddens, 
	norm_shape, ffn_num_input, ffn_num_hiddens, num_heads,
	num_layers, dropout)
decoder_wide = TransformerDecoder(
	len(amino_dict), key_size, query_size, value_size, num_hiddens, 
	norm_shape, ffn_num_input, ffn_num_hiddens, num_heads,
	num_layers, dropout)
model_wide = EncoderDecoder(encoder_wide, decoder_wide)


model_wide_total_params = sum(p.numel() for p in model_wide.parameters())
model_wide_total_trainable_params = sum(p.numel() for p in model_wide.parameters() if p.requires_grad)

print('Wide model: total number of parameters: {}'.format(model_wide_total_params))
print('Wide model: total number of trainable parameters: {}'.format(model_wide_total_trainable_params))


model_wide.load_state_dict(torch.load("model_wide_22Jul16_1011AM", map_location = ('cpu')))

Wide model: total number of parameters: 29437976
Wide model: total number of trainable parameters: 29437976


<All keys matched successfully>

#### Target of interest

In [3]:
PDB_beta_strand = 'NRIELKGT' # PDB label = 'VVARTKYK' (training sample that generate different labels)

PNAS_2015_target = 'EQVTNVGG' # paper chosen peptide = 'QYSVLIDA',  paper chosen peptide 2 = 'QYSVLIEF' (alpha-synuclein residues 61-68)

#### Target of selection

In [8]:
task_target = 'EQVTNVGG'
model_use = model_wide
prediction_length = 8

#### Greedy single prediction

In [9]:
dec_comple_peptide_pred, dec_prob, dec_attention_weight_seq = predict_greedy_single(model_use, task_target, amino_dict, prediction_length + 2, device, save_attention_weights=True, print_info=True)

Conditional probability at position 1 is 0.8633506298065186
Conditional probability at position 2 is 0.9360931515693665
Conditional probability at position 3 is 0.9199482798576355
Conditional probability at position 4 is 0.9479291439056396
Conditional probability at position 5 is 0.9067015647888184
Conditional probability at position 6 is 0.723145604133606
Conditional probability at position 7 is 0.6093640923500061
Conditional probability at position 8 is 0.32506802678108215
Conditional probability at position 9 is 0.9241384267807007
Input target sequence is EQVTNVGG, predicted complementary peptide is QYSVLIFL
Condition on input, predicted probability is 0.08459093488266128


#### Candidates sampling

In [10]:
num_candidates = 100
max_iter = 20

peptide_candidates = sample_candidates(model_use, task_target, num_candidates, amino_dict, prediction_length + 2, device, max_iter=max_iter)
print(peptide_candidates)

number of total candidates sampled: 200
number of unique top candidates successfully sampled: 100
[['QYSVLIFL' '0.08459067344665527']
 ['QYSVLIFG' '0.05803714320063591']
 ['QYSVLSYY' '0.039539407938718796']
 ['QYSVLIAL' '0.03525225818157196']
 ['QYSVLIFF' '0.03364912420511246']
 ['QYSVLITL' '0.029616713523864746']
 ['QYSVLIFW' '0.01687812991440296']
 ['QYSVLIFA' '0.01644524373114109']
 ['QYSVLSYL' '0.015118086710572243']
 ['QYSVLISL' '0.011400229297578335']
 ['QYSVLIGY' '0.011086122132837772']
 ['QYSVLIFQ' '0.00900103710591793']
 ['QYSVLIHF' '0.005809905007481575']
 ['QYSVLSYV' '0.004839177709072828']
 ['QYSVLIFY' '0.004783081356436014']
 ['QYSVLVYF' '0.004712470341473818']
 ['QYSVLSYF' '0.004016211722046137']
 ['QYSVTITP' '0.003600770840421319']
 ['QYSVLIFH' '0.003430353244766593']
 ['QYSVLCNV' '0.003339572111144662']
 ['QYSVLIFN' '0.002169572515413165']
 ['QYSVLVYW' '0.0020870205480605364']
 ['QYSVLSYM' '0.0017096661031246185']
 ['QYSVLERL' '0.0011118578258901834']
 ['QYSVLSAG' '0.00

#### Peptides pair evaulation

In [11]:
task_target = task_target
dec_comple_peptide_pred = 'EQSVEQEL'

dec_prob, dec_attention_weight_seq = evaluate_single(model_use, task_target, dec_comple_peptide_pred,amino_dict, prediction_length + 2, device, save_attention_weights=True, print_info=True)

Conditional probability at position 1 is 0.012247689068317413
Conditional probability at position 2 is 0.00828417669981718
Conditional probability at position 3 is 0.10578251630067825
Conditional probability at position 4 is 0.5253966450691223
Conditional probability at position 5 is 0.029865358024835587
Conditional probability at position 6 is 0.030841505154967308
Conditional probability at position 7 is 0.8573779463768005
Conditional probability at position 8 is 0.01506110094487667
Conditional probability at position 9 is 0.9241788387298584
Input target sequence is EQVTNVGG, complementary peptide is EQSVEQEL
Evaluated probability is 6.19859173534822e-11
