# Using the pretrained models

All pretrained MS2/RT/CCS models managed in the `alphadeep.pretrained_models`, wherein `class ModelManager` is the main entry of all models.

In [1]:
%reload_ext autoreload
%autoreload 2

### Predicting RT values

In [2]:
from alphadeep.pretrained_models import ModelManager

model_mgr = ModelManager()
model_mgr.load_installed_models()

We use the iRT peptides as the testing peptides

In [3]:
from alphadeep.model.rt import irt_pep
pep_df = irt_pep.copy()

First, we test the RT prediction model (`model_mgr.rt_model`)

In [4]:
model_mgr.rt_model.predict(pep_df)
pep_df

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,rt_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.072804
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.271196
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.332649
3,YILAGVENSK,RT-pep d,19.79,,,10,0.400949
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.438901
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.489774
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.542729
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.609782
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.757164
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.846791


We predict the normalized retention time (`rt_pred`, normally ranging from 0 to 1) instead of real RT values. It could be converted to real RT values by multiplying the maximal RT of the LC gradient. We can also convert `rt_pred` into iRT values (`irt_pred`) based on the 11 iRT peptides.

In [5]:
model_mgr.rt_model.rt_to_irt_pred(pep_df)
pep_df

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,rt_pred,irt_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.072804,-28.148849
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.271196,2.053492
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.332649,11.408902
3,YILAGVENSK,RT-pep d,19.79,,,10,0.400949,21.806524
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.438901,27.584271
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.489774,35.328937
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.542729,43.390475
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.609782,53.598396
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.757164,76.035216
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.846791,89.679588


### Predicting CCS values

After adding `charge` into the `pep_df`, we can predict the CCS values for the given peptide (precursor) using `model_mgr.ccs_model`, and then convert them into mobility values. Note that these mobility values are Bruker timsTOF mobility values.

In [6]:
pep_df['charge'] = 3
model_mgr.ccs_model.predict(pep_df)
model_mgr.ccs_model.ccs_to_mobility_pred(pep_df)
pep_df

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,rt_pred,irt_pred,charge,ccs_pred,precursor_mz,mobility_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.072804,-28.148849,3,382.416138,325.173562,0.627622
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.271196,2.053492,3,453.66864,430.217496,0.747111
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.332649,11.408902,3,479.035492,456.221018,0.789363
3,YILAGVENSK,RT-pep d,19.79,,,10,0.400949,21.806524,3,444.596069,365.201118,0.73079
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.438901,27.584271,3,451.388611,446.894465,0.74365
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.489774,35.328937,3,470.053528,456.238232,0.774563
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.542729,43.390475,3,488.397858,466.561374,0.804969
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.609782,53.598396,3,465.785065,484.892901,0.767984
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.757164,76.035216,3,448.219818,415.571434,0.737861
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.846791,89.679588,3,469.222473,424.915201,0.772623


### Predicting MS2 fragment (b/y) ion intensities

`model_mgr.ms2_model` predicts the fragment ion intensities of the `pep_df`. We need `nce` and `instrument` for fragment prediction. We store the predicted fragment intensities in a new dataframe.

In [7]:
pep_df['nce'] = 0.3
pep_df['instrument'] = 'Lumos'
fragment_intensity_df = model_mgr.ms2_model.predict(pep_df)
fragment_intensity_df

Unnamed: 0,b_z1,b_z2,y_z1,y_z2,b_modloss_z1,b_modloss_z2,y_modloss_z1,y_modloss_z2
0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
1,0.215521,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
2,0.108832,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0
3,0.187404,0.0,0.018003,0.000000,0.0,0.0,0.0,0.0
4,0.051630,0.0,0.177014,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
118,0.000000,0.0,0.005737,0.000000,0.0,0.0,0.0,0.0
119,0.000000,0.0,0.412741,0.078777,0.0,0.0,0.0,0.0
120,0.000000,0.0,0.023614,0.000000,0.0,0.0,0.0,0.0
121,0.000000,0.0,0.149743,0.000000,0.0,0.0,0.0,0.0


`ms2_model.predict` will also appends `frag_start_idx` and `frag_end_idx` into the `pep_df`, they points to the start and end (stop) positions for the corresponding peptides.

In [8]:
pep_df['sequence,mods,mod_sites,frag_start_idx,frag_end_idx'.split(',')]

Unnamed: 0,sequence,mods,mod_sites,frag_start_idx,frag_end_idx
0,LGGNEQVTR,,,0,8
1,GAGSSEPVTGLDAK,,,8,21
2,VEATFGVDESNAK,,,21,33
3,YILAGVENSK,,,33,42
4,TPVISGGPYEYR,,,42,53
5,TPVITGAPYEYR,,,53,64
6,DGLDAASYYAPVR,,,64,76
7,ADVTPADFSEWSK,,,76,88
8,GTFIIDPGGVIR,,,88,99
9,GTFIIDPAAVIR,,,99,110


We can calculate the fragment mz values for the `pep_df` with `create_fragment_mz_dataframe`. As there are already `frag_start_idx` and `frag_end_idx` in the `pep_df` pointing to a fragment dataframe (i.e. `fragment_intensity_df`), so we have to set `reference_fragment_df` argument as `fragment_intensity_df` to make sure that the `fragment_mz_df` and `fragment_intensity_df` have the same order.

In [9]:
import alphabase.peptide.fragment as fragment
fragment_mz_df = fragment.create_fragment_mz_dataframe(
    pep_df, ['b_z1','b_z2','y_z1','y_z2'], 
    reference_fragment_df=fragment_intensity_df
)
print(pep_df.sequence.values[0])

fragment_mz_df.iloc[
    pep_df.frag_start_idx.values[0]
    :pep_df.frag_end_idx.values[0]
]

LGGNEQVTR


Unnamed: 0,b_z1,b_z2,y_z1,y_z2
0,114.09134,57.549308,860.42207,430.714673
1,171.112804,86.06004,803.400606,402.203941
2,228.134268,114.570772,746.379143,373.69321
3,342.177195,171.592236,632.336215,316.671746
4,471.219788,236.113532,503.293622,252.150449
5,599.278366,300.142821,375.235045,188.121161
6,698.34678,349.677028,276.166631,138.586954
7,799.394458,400.200867,175.118952,88.063114


We can also `create_fragment_mz_dataframe()` first and then predict the ms2 intensities.

In [10]:
del pep_df['frag_start_idx']
del pep_df['frag_end_idx']

fragment_mz_df2 = fragment.create_fragment_mz_dataframe(
    pep_df, ['b_z1','b_z2','y_z1','y_z2'],
)
fragment_intensity_df2 = model_mgr.ms2_model.predict(pep_df, reference_frag_df=fragment_mz_df2)

import numpy as np
assert np.allclose(fragment_intensity_df.values, fragment_intensity_df2.values)

In [11]:
pep_df

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,rt_pred,irt_pred,charge,ccs_pred,precursor_mz,mobility_pred,nce,instrument,frag_start_idx,frag_end_idx
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.072804,-28.148849,3,382.416138,325.173562,0.627622,0.3,Lumos,0,8
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.271196,2.053492,3,453.66864,430.217496,0.747111,0.3,Lumos,8,21
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.332649,11.408902,3,479.035492,456.221018,0.789363,0.3,Lumos,21,33
3,YILAGVENSK,RT-pep d,19.79,,,10,0.400949,21.806524,3,444.596069,365.201118,0.73079,0.3,Lumos,33,42
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.438901,27.584271,3,451.388611,446.894465,0.74365,0.3,Lumos,42,53
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.489774,35.328937,3,470.053528,456.238232,0.774563,0.3,Lumos,53,64
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.542729,43.390475,3,488.397858,466.561374,0.804969,0.3,Lumos,64,76
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.609782,53.598396,3,465.785065,484.892901,0.767984,0.3,Lumos,76,88
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.757164,76.035216,3,448.219818,415.571434,0.737861,0.3,Lumos,88,99
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.846791,89.679588,3,469.222473,424.915201,0.772623,0.3,Lumos,99,110


# Using `PredictSpecLib`

For a given precursor_df or peptide_df, we can also directly predict the spectrum libraries using the `PredictSpecLib` class in `alphadeep.spec_lib.predict_lib`.

In [14]:
from alphadeep.spec_lib.predict_lib import PredictSpecLib

pep_df = irt_pep.copy()
pep_df['charge'] = 2
pep_df['nce'] = 0.3
pep_df['instrument'] = 'Lumos'
lib = PredictSpecLib(model_mgr, ['b_z1','b_z2','y_z1','y_z2'])
lib.precursor_df = pep_df
lib.load_fragment_df()
lib.predict_rt()
lib.predict_mobility()

100%|██████████| 5/5 [00:00<00:00, 57.78it/s]
100%|██████████| 5/5 [00:00<00:00, 142.97it/s]
100%|██████████| 5/5 [00:00<00:00, 171.73it/s]


In [15]:
lib.precursor_df

Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,rt_pred,charge,nce,instrument,frag_start_idx,frag_end_idx,irt_pred,precursor_mz,ccs_pred,mobility_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.072804,2,0.3,Lumos,0,8,-28.148849,487.256705,331.279816,0.815533
1,YILAGVENSK,RT-pep d,19.79,,,10,0.400949,2,0.3,Lumos,8,17,21.806524,547.298039,364.828003,0.8995
2,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.438901,2,0.3,Lumos,17,28,27.584271,669.838059,394.317596,0.974434
3,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.489774,2,0.3,Lumos,28,39,35.328937,683.853709,399.848633,0.988309
4,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.757164,2,0.3,Lumos,39,50,76.035216,622.853512,379.443451,0.936954
5,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.846791,2,0.3,Lumos,50,61,89.679588,636.869163,387.88678,0.958034
6,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.332649,2,0.3,Lumos,61,73,11.408902,683.827889,394.208893,0.974369
7,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.542729,2,0.3,Lumos,73,85,43.390475,699.338423,399.736542,0.988252
8,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.609782,2,0.3,Lumos,85,97,53.598396,726.835714,405.532562,1.002953
9,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.271196,2,0.3,Lumos,97,110,2.053492,644.822606,382.269806,0.944286


In [18]:
print(lib.precursor_df.sequence.values[0])

lib.fragment_mz_df.iloc[
    lib.precursor_df.frag_start_idx.values[0]
    :lib.precursor_df.frag_end_idx.values[0]
]

LGGNEQVTR


Unnamed: 0,b_z1,b_z2,y_z1,y_z2
0,114.09134,57.549308,860.42207,430.714673
1,171.112804,86.06004,803.400606,402.203941
2,228.134268,114.570772,746.379143,373.69321
3,342.177195,171.592236,632.336215,316.671746
4,471.219788,236.113532,503.293622,252.150449
5,599.278366,300.142821,375.235045,188.121161
6,698.34678,349.677028,276.166631,138.586954
7,799.394458,400.200867,175.118952,88.063114
