# Dr. Sormanni Presentation Region

#### Build Model

In [1]:
from TransformerBeta import *

In [2]:
# build model wide
query_size, key_size, value_size, num_hiddens = 512, 512, 512, 512
num_layers, dropout = 4, 0.1
lr, num_epochs, batch_size, label_smoothing = 0.0004, 500, 6000, 0.1
ffn_num_input, ffn_num_hiddens, num_heads = 512, 2048, 8

norm_shape = [512] # 32 corresponds to the dim of such number to normalize
device = d2l.try_gpu()


encoder_wide = TransformerEncoder(
	len(amino_dict), key_size, query_size, value_size, num_hiddens, 
	norm_shape, ffn_num_input, ffn_num_hiddens, num_heads,
	num_layers, dropout)
decoder_wide = TransformerDecoder(
	len(amino_dict), key_size, query_size, value_size, num_hiddens, 
	norm_shape, ffn_num_input, ffn_num_hiddens, num_heads,
	num_layers, dropout)
model_wide = EncoderDecoder(encoder_wide, decoder_wide)


model_wide_total_params = sum(p.numel() for p in model_wide.parameters())
model_wide_total_trainable_params = sum(p.numel() for p in model_wide.parameters() if p.requires_grad)

print('Wide model: total number of parameters: {}'.format(model_wide_total_params))
print('Wide model: total number of trainable parameters: {}'.format(model_wide_total_trainable_params))


model_wide.load_state_dict(torch.load("model_wide_22Jul16_1011AM", map_location = ('cpu')))

Wide model: total number of parameters: 29437976
Wide model: total number of trainable parameters: 29437976


<All keys matched successfully>

#### Target of interest

In [11]:
PDB_beta_strand = 'NRIELKGT' # PDB label = 'VVARTKYK' 

PNAS_2015_target = 'EQVTNVGG' # paper chosen peptide = 'QYSVLIDA',  paper chosen peptide 2 = 'QYSVLIEF' (alpha-synuclein residues 61-68)

#### Target of selection

In [23]:
task_target = 'NRIELKGT'
model_use = model_wide
prediction_length = 8

#### Greedy single prediction

In [24]:
dec_comple_peptide_pred, dec_prob, dec_attention_weight_seq = predict_greedy_single(model_use, task_target, amino_dict, prediction_length + 2, device, save_attention_weights=True, print_info=True)

Conditional probability at position 1 is 0.9358905553817749
Conditional probability at position 2 is 0.8671083450317383
Conditional probability at position 3 is 0.7962782979011536
Conditional probability at position 4 is 0.922514021396637
Conditional probability at position 5 is 0.9255252480506897
Conditional probability at position 6 is 0.9258487224578857
Conditional probability at position 7 is 0.9241359233856201
Conditional probability at position 8 is 0.9532920718193054
Conditional probability at position 9 is 0.9239785075187683
Input target sequence is NRIELKGT, predicted complementary peptide is VEARTKYK
Condition on input, predicted probability is 0.415803644044098


#### Candidates sampling

In [25]:
num_candidates = 50
max_iter = 20

peptide_candidates = sample_candidates(model_use, task_target, num_candidates, amino_dict, prediction_length + 2, device, max_iter=max_iter)
print(peptide_candidates)

number of total candidates sampled: 132
number of unique top candidates successfully sampled: 50
[['VEARTKYK' '0.4158034920692444']
 ['VELDGDVK' '0.06144126132130623']
 ['VVARTKYK' '0.023861050605773926']
 ['VIARTKYK' '0.0027274086605757475']
 ['VNARTKYK' '0.0024703301023691893']
 ['VYARTKYK' '0.0018396908417344093']
 ['VKARTKYK' '0.0015880581922829151']
 ['PVARTKYK' '0.001371582387946546']
 ['VEARTKYN' '0.00130607804749161']
 ['LVARTKYK' '0.001157547696493566']
 ['VEARTKTK' '0.0010942153166979551']
 ['MEARTKYK' '0.001092130201868713']
 ['VEARTNIS' '0.0010632550111040473']
 ['VEARTKPV' '0.0010007908567786217']
 ['VEATTKYK' '0.000990288332104683']
 ['VEARTKYP' '0.0009733658516779542']
 ['VEARDKYK' '0.0009230708237737417']
 ['VEARTWLQ' '0.000921268539968878']
 ['VEARTKRT' '0.0008684906060807407']
 ['VELDGDVI' '0.0008510025800205767']
 ['AVARTKYK' '0.0006835784297436476']
 ['VEDTMHLD' '0.0006519404123537242']
 ['YHARTKYK' '0.0005593040841631591']
 ['VEARSSFT' '0.0005156139959581196']
 ['H

#### Peptides pair evaulation

In [None]:
task_target = task_target
dec_comple_peptide_pred = 'VEAKTKYK'

dec_prob, dec_attention_weight_seq = evaluate_single(model_use, task_target, dec_comple_peptide_pred,amino_dict, prediction_length + 2, device, save_attention_weights=True, print_info=True)