In [None]:
#---#| default_exp pretrained_models

# Pretrained Models

This module provides integrated functionalities for MS2/RT/CCS models

`peptdeep.pretrained_models` handles the pretrained models, including downloading, installing, and loading the models.

---

## Downloading and installing the models
For continuous model deployment, we uploaded several pretrained models (compressed as a ZIP file) onto a net disk. peptdeep will automatically download the ZIP file into `global_settings['PEPTDEEP_HOME']/pretrained_models/pretrained_models.zip` when importing peptdeep.pretrained_models. The models will be downloaded only once, if we would like to update them to the latest models, we can call `download_models(overwrite=True)`

In [None]:
from peptdeep.pretrained_models import *

In [None]:
#| hide
download_models()
assert is_model_zip(MODEL_ZIP_FILE_PATH)

## 2. Loading the models
peptdeep provides a convenient APIs to load models from ZIP files. 

`load_models` will load the generic models for unmodified peptides, `load_phos_models` will load the phospho models. Note that MS2/CCS prediction models are the same for generic and phospho models because this model was trained on both generic and phospho peptides.

## 3. Using `ModelManager`

For users, `ModelManager` class is the only thing we need to manage models (loading, transfer learning, etc). According to different arguments, `ModelManager.load_installed_models` will call `load_models` or `load_phos_models`. For external models, `ModelManager.load_external_models` will load them by file path or file stream. Here is an example:

```
from zipfile import ZipFile

admodel = ModelManager()
ext_zip = 'external_models.zip' # model compressed in ZIP
rt_model_path = '/path/to/rt.pth' # model as file path
with ZipFile(ext_zip) as model_zip:
    with model_zip.open('generic/ms2.pth','r') as ms2_file:
        admodel.load_external_models(ms2_model_file=ms2_file, rt_model_file=rt_model_path)
```

Transfer learning for different models could also be done in `ModelManager` by using the given training dataframes.

In [None]:
#| hide
assert os.path.isfile(MODEL_ZIP_FILE_PATH)
with ZipFile(MODEL_ZIP_FILE_PATH) as _zip:
    with _zip.open('generic/ms2.pth'):
        pass
    with _zip.open('generic/rt.pth'):
        pass
    with _zip.open('generic/ccs.pth'):
        pass
    with _zip.open('digly/rt_digly.pth'):
        pass
    with _zip.open('phospho/rt_phos.pth'):
        pass

In [None]:
#| hide
from io import StringIO

In [None]:
#| hide

matched_df = pd.read_csv(
    StringIO(',b_z1,b_z2,y_z1,y_z2,b_modloss_z1,b_modloss_z2,y_modloss_z1,y_modloss_z2\r\n'
        '0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0\r\n1,0.13171915994341352,0.0,0.0,0.0,0.0,0.0,0.0,0.0\r\n'
        '2,0.09560456716002332,0.0,0.0,0.0,0.0,0.0,0.0,0.0\r\n'
        '3,0.032392355556351476,0.0,0.0,0.0,0.0,0.0,0.0,0.0\r\n'
        '4,0.06267661211925589,0.0,0.0,0.0,0.0,0.0,0.0,0.0\r\n'
        '5,0.10733421416437268,0.0,0.0,0.0,0.0,0.0,0.0,0.0\r\n'
        '6,0.07955175724673087,0.0,0.0,0.0,0.0,0.0,0.0,0.0\r\n'
        '7,0.08283861204882843,0.0,0.03294760940125559,0.0,0.0,0.0,0.0,0.0\r\n'
        '8,0.0914959582993716,0.0,0.09471333271745186,0.0,0.0,0.0,0.0,0.0\r\n'
        '9,0.10283525167783934,0.0,0.29624251030302834,0.0,0.0,0.0,0.0,0.0\r\n'
        '10,0.02220051360812495,0.0272619351931404,0.8077539764174795,0.0,0.0,0.0,0.0,0.0\r\n'
        '11,0.0,0.02411148245999131,0.851474013001872,0.0,0.0,0.0,0.0,0.0\r\n'
        '12,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0\r\n13,0.0,0.0,0.22244818653184315,0.0,0.0,0.0,0.0,0.0\r\n'
        '14,0.0,0.0,0.21824010319946407,0.0,0.0,0.0,0.0,0.0\r\n'
        '15,0.0,0.0,0.16690493688692923,0.0,0.0,0.0,0.0,0.0\r\n'),
    index_col=0
)

model_mgr = ModelManager(mask_modloss=True, device="cpu")
model_mgr.verbose=False
def pred_one(seq, mods, mod_sites, charge):
    df = pd.DataFrame()
    df["sequence"] = [seq]
    df["mods"] = [mods]
    df["mod_sites"] = [mod_sites]
    df["charge"] = charge
    df["nce"] = 35
    df["instrument"] = "Lumos"
    predict_dict = model_mgr.predict_all(
        df, predict_items=['mobility','rt','ms2'],
        multiprocessing=False
    )
    return predict_dict['fragment_intensity_df']

pred_df = pred_one('ANEKTESSSAQQVAVSR', '', '', 3)

def get_pcc(matched_df, pred_df):
    matched_df = matched_df[pred_df.columns.values]
    return torch.nn.functional.cosine_similarity(
        torch.tensor((pred_df.values   -pred_df.values.mean()).reshape(-1)), 
        torch.tensor((matched_df.values-matched_df.values.mean()).reshape(-1)), 
        dim=0
    )
assert get_pcc(matched_df, pred_df) > 0.95

In [None]:
from peptdeep.model.rt import IRT_PEPTIDE_DF

In [None]:
model_mgr = ModelManager(mask_modloss=False, device='cpu')
model_mgr.load_installed_models('phos')
model_mgr.predict_rt(IRT_PEPTIDE_DF)
model_mgr.rt_model.add_irt_column_to_precursor_df(IRT_PEPTIDE_DF)

2023-01-13 12:47:10> Predicting RT ...


100%|██████████| 5/5 [00:00<00:00, 253.64it/s]


Unnamed: 0,sequence,pep_name,irt,mods,mod_sites,nAA,rt_pred,rt_norm_pred,irt_pred
0,LGGNEQVTR,RT-pep a,-24.92,,,9,0.184235,0.184235,-26.123539
1,GAGSSEPVTGLDAK,RT-pep b,0.0,,,14,0.250092,0.250092,4.238097
2,VEATFGVDESNAK,RT-pep c,12.39,,,13,0.266133,0.266133,11.63313
3,YILAGVENSK,RT-pep d,19.79,,,10,0.290495,0.290495,22.864808
4,TPVISGGPYEYR,RT-pep e,28.71,,,12,0.303847,0.303847,29.020255
5,TPVITGAPYEYR,RT-pep f,33.38,,,12,0.316514,0.316514,34.860146
6,DGLDAASYYAPVR,RT-pep g,42.26,,,13,0.324423,0.324423,38.506304
7,ADVTPADFSEWSK,RT-pep h,54.62,,,13,0.345197,0.345197,48.083872
8,GTFIIDPGGVIR,RT-pep i,70.52,,,12,0.394248,0.394248,70.69747
9,GTFIIDPAAVIR,RT-pep k,87.23,,,12,0.434775,0.434775,89.381159


In [None]:
#| hide
def pred_one(seq, mods, mod_sites, charge):
    df = pd.DataFrame()
    df["sequence"] = [seq]
    df["mods"] = [mods]
    df["mod_sites"] = [mod_sites]
    df["charge"] = charge
    df["nce"] = 30
    df["instrument"] = "Lumos"
    predict_dict = model_mgr.predict_all(
        df, predict_items=['mobility','rt','ms2'],
        multiprocessing=False
    )
    return predict_dict['fragment_intensity_df']

model_mgr.verbose=False
pred_df = pred_one('ANEKTESSSAQQVAVSR', 'Phospho@S', '9',2)
assert (pred_df.y_modloss_z1.values>0.5).any()
pred_df = pred_one('ANEKTESSTAQQVAVSR', 'Phospho@T', '9',2)
assert (pred_df.y_modloss_z1.values>0.5).any()
pred_df = pred_one('ANEKTESSSAQQVAVSR', 'Phospho@S', '16',2)
assert (pred_df.y_modloss_z1.values>0.5).any()
pred_df = pred_one('ANEKTESSYAQQVAVSR', 'Phospho@Y', '9',2)
assert (pred_df.y_modloss_z1.values<=0).all()

In [None]:
#| hide
IRT_PEPTIDE_DF['rt_norm'] = IRT_PEPTIDE_DF['irt']
IRT_PEPTIDE_DF['ccs'] = IRT_PEPTIDE_DF['irt']
model_mgr.epoch_to_train_rt_ccs = 1
model_mgr.train_rt_model(IRT_PEPTIDE_DF)
model_mgr.train_ccs_model(IRT_PEPTIDE_DF)

2023-01-13 12:47:11> 11 PSMs for RT model training/transfer learning


In [None]:
from peptdeep.utils import get_device

device, device_type = get_device('mps')
if device_type != 'cpu':
    model_mgr = ModelManager(mask_modloss=False, device=device_type)
    model_mgr.load_installed_models('phos')
    model_mgr.predict_rt(IRT_PEPTIDE_DF)
    model_mgr.rt_model.add_irt_column_to_precursor_df(IRT_PEPTIDE_DF)


    model_mgr.verbose=False
    pred_df = pred_one('ANEKTESSSAQQVAVSR', 'Phospho@S', '9',2)
    assert (pred_df.y_modloss_z1.values>0.5).any()
    pred_df = pred_one('ANEKTESSTAQQVAVSR', 'Phospho@T', '9',2)
    assert (pred_df.y_modloss_z1.values>0.5).any()
    pred_df = pred_one('ANEKTESSSAQQVAVSR', 'Phospho@S', '16',2)
    assert (pred_df.y_modloss_z1.values>0.5).any()
    pred_df = pred_one('ANEKTESSYAQQVAVSR', 'Phospho@Y', '9',2)
    assert (pred_df.y_modloss_z1.values<=0).all()

    IRT_PEPTIDE_DF['rt_norm'] = IRT_PEPTIDE_DF['irt']
    IRT_PEPTIDE_DF['ccs'] = IRT_PEPTIDE_DF['irt']
    model_mgr.epoch_to_train_rt_ccs = 1
    model_mgr.train_rt_model(IRT_PEPTIDE_DF)
    model_mgr.train_ccs_model(IRT_PEPTIDE_DF)

2023-01-13 12:48:35> Predicting RT ...


100%|██████████| 5/5 [00:00<00:00, 11.12it/s]


2023-01-13 12:48:39> 11 PSMs for RT model training/transfer learning


  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
