In [None]:
#---#| default_exp model.rt

In [None]:
%reload_ext autoreload
%autoreload 2

# Retention Time Prediction

This notebook implements deep learning models to predict the retention time of a given (modified) peptide. The components of the models are defined in the `building_block.ipynb` notebook. It is based on the interface defined in `model_interface.ipynb`, which defines standard interactions, such as loading data and training the model.

## Imports

In [None]:
from peptdeep.model.rt import *

## Unit Tests

### Sanity checks on simple cases

In [None]:
#| hide
def initialize_model():

    torch.manual_seed(1337)
    model = AlphaRTModel()
    model.set_device('cpu')
    model.model.to(model.device)
    mod_hidden = len(model_const['mod_elements'])
    model.model(torch.LongTensor([[1,2,3,4,5,6]]), torch.tensor([[[0.0]*mod_hidden]*6]))
    return model

model = initialize_model()

def create_test_dataframe_with_identical_rows(nrows = 10):
    precursor_df = pd.DataFrame({
        'sequence': ['AGHCEWQMKYR']*nrows,
        'mods': ['Acetyl@Protein_N-term;Carbamidomethyl@C;Oxidation@M']*nrows,
        'mod_sites': ['0;4;8']*nrows,
        'nAA': [11]*nrows,
        'rt_norm': [0.6]*nrows
    })
    return precursor_df

repeat_row_df = create_test_dataframe_with_identical_rows()

model.train(repeat_row_df, epoch=5, verbose_each_epoch=True)

print(model.get_parameter_num())

def test_prediction():
    model = initialize_model()
    repeat_row_df_predict = model.predict(repeat_row_df)
    display(repeat_row_df_predict)
    pred_rts = list(repeat_row_df_predict["rt_pred"])
    first_pred_rt = pred_rts[0]
    control_array = [first_pred_rt for x in range(len(pred_rts))]
    np.testing.assert_almost_equal(pred_rts, control_array)
test_prediction()

Epoch=1, nAA=11, batch=1, loss=0.5976: 100%|██████████| 1/1 [00:00<00:00, 16.27it/s]
Epoch=2, nAA=11, batch=1, loss=0.5916: 100%|██████████| 1/1 [00:00<00:00, 23.82it/s]
Epoch=3, nAA=11, batch=1, loss=0.5859: 100%|██████████| 1/1 [00:00<00:00, 22.64it/s]
Epoch=4, nAA=11, batch=1, loss=0.5779: 100%|██████████| 1/1 [00:00<00:00, 25.14it/s]
Epoch=5, nAA=11, batch=1, loss=0.5764: 100%|██████████| 1/1 [00:00<00:00,  9.01it/s]

Model_RT_LSTM_CNN(
  (dropout): Dropout(p=0.1, inplace=False)
  (rt_encoder): Encoder_26AA_Mod_CNN_LSTM_AttnSum(
    (mod_nn): Mod_Embedding_FixFirstK(
      (nn): Linear(in_features=103, out_features=2, bias=False)
    )
    (input_cnn): SeqCNN(
      (cnn_short): Conv1d(35, 35, kernel_size=(3,), stride=(1,), padding=(1,))
      (cnn_medium): Conv1d(35, 35, kernel_size=(5,), stride=(1,), padding=(2,))
      (cnn_long): Conv1d(35, 35, kernel_size=(7,), stride=(1,), padding=(3,))
    )
    (hidden_nn): SeqLSTM(
      (rnn): LSTM(140, 128, num_layers=2, batch_first=True, bidirectional=True)
    )
    (attn_sum): SeqAttentionSum(
      (attn): Sequential(
        (0): Linear(in_features=256, out_features=1, bias=False)
        (1): Softmax(dim=1)
      )
    )
  )
  (rt_decoder): Decoder_Linear(
    (nn): Sequential(
      (0): Linear(in_features=256, out_features=64, bias=True)
      (1): PReLU(num_parameters=1)
      (2): Linear(in_features=64, out_features=1, bias=True)
    )
  )
)
708




In [None]:
#|hide
from peptdeep.pretrained_models import ModelManager

In [None]:
#| hide
models = ModelManager(device='cpu')
models.load_installed_models()
pep_df = IRT_PEPTIDE_DF.iloc[:7].drop(columns=['irt'])
df_w_rt_prediction = models.rt_model.predict(pep_df)
df_w_irt_prediction_added = models.rt_model.add_irt_column_to_precursor_df(pep_df)

assert df_w_irt_prediction_added.rt_pred.is_monotonic_increasing
assert df_w_irt_prediction_added.irt_pred.is_monotonic_increasing
df_w_rt_prediction = models.rt_model.predict(IRT_PEPTIDE_DF)
models.rt_model.add_irt_column_to_precursor_df(IRT_PEPTIDE_DF)

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,rt_pred,irt_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.072804,-28.148849
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.271196,2.053492
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.332649,11.408902
3,YILAGVENSK,RT-pep d,19.79,,,10,0.400949,21.806524
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.438901,27.584271
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.489774,35.328937
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.542729,43.390475
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.609782,53.598396
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.757164,76.035216
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.846791,89.679588


In [None]:
#|hide
from peptdeep.utils import evaluate_linear_regression

In [None]:
#| hide
evaluate_linear_regression(IRT_PEPTIDE_DF, 'irt', 'irt_pred')

  x = pd.concat(x[::order], 1)


Unnamed: 0,R_square,R,slope,intercept,test_num
0,0.99,0.994987,0.9901,0.3828,11.0
