In [None]:
#---#| default_exp pipeline_api

# pipeline API

This module provides high-level APIs for different pipelines such as transfer learning, library geneartion, and rescoring (percolator). 

## Transfer learning pipeline

Transfer learning pipeline includes:
1. Loading PSM files of the search engine results.
2. Matching PSMs against the MS files.
3. Loading pre-trained models and refining RT/CCS(/MS2) models.

The refined models will be saved in the path pointed by "PEPTDEEP_HOME" in `peptdeep.settings.global_settings`.

In [None]:
from peptdeep.pipeline_api import global_settings
import os
import warnings

#### Test library generation

In [None]:
from peptdeep.pipeline_api import generate_library

fasta = os.path.expanduser('~/Workspace/Data/fasta/irtfusion.fasta')
lib_settings = global_settings['library']
lib_settings['infile_type'] = 'fasta'
lib_settings['infiles'] = [fasta]
lib_settings['fasta']['add_contaminants'] = False

if os.path.isfile(fasta):
    generate_library()
else:
    print("`pipeline_api.generate_library()` will be not tested")

2023-01-20 09:28:27> Platform information:
2023-01-20 09:28:27> system        - Darwin
2023-01-20 09:28:27> release       - 20.6.0
2023-01-20 09:28:27> version       - 10.16
2023-01-20 09:28:27> machine       - x86_64
2023-01-20 09:28:27> processor     - i386
2023-01-20 09:28:27> cpu count     - 8
2023-01-20 09:28:27> ram           - 10.8/32.0 Gb (available/total)
2023-01-20 09:28:27> 
2023-01-20 09:28:28> Python information:
2023-01-20 09:28:28> alphabase        - 1.0.0
2023-01-20 09:28:28> biopython        - 1.78
2023-01-20 09:28:28> click            - 8.1.3
2023-01-20 09:28:28> lxml             - 4.6.2
2023-01-20 09:28:28> numba            - 0.54.0
2023-01-20 09:28:28> numpy            - 1.19.4
2023-01-20 09:28:28> pandas           - 1.3.5
2023-01-20 09:28:28> peptdeep         - 1.0.1
2023-01-20 09:28:28> psutil           - 5.8.0
2023-01-20 09:28:28> python           - 3.8.3
2023-01-20 09:28:28> scikit-learn     - 1.1.1
2023-01-20 09:28:28> streamlit        - 1.16.0
2023-01-20 09:28

100%|██████████| 13/13 [00:00<00:00, 46.66it/s]

2023-01-20 09:28:41> Predicting mobility ...



100%|██████████| 13/13 [00:00<00:00, 67.14it/s]


2023-01-20 09:28:41> Predicting MS2 ...


100%|██████████| 13/13 [00:00<00:00, 19.11it/s]

2023-01-20 09:28:41> End predicting RT/IM/MS2
2023-01-20 09:28:41> Predicting the spectral library with 156 precursors and 0.01M fragments used 0.3971 GB memory
2023-01-20 09:28:41> Saving HDF library to /Users/zengwenfeng/peptdeep/spec_libs/predict.speclib.hdf ...





2023-01-20 09:28:42> Library generated!!


#### Test transfer learning

In [None]:
from peptdeep.pipeline_api import transfer_learn

alphapept_hdf = os.path.expanduser('~/Workspace/Data/Thermo_iRT/iRT.ms_data.hdf')
mgr_settings = global_settings['model_mgr']
mgr_settings['transfer']['psm_files'] = [alphapept_hdf]
mgr_settings['transfer']['psm_type'] = 'alphapept'
mgr_settings['transfer']['ms_file_type'] = 'alphapept_hdf'
mgr_settings['transfer']['ms_files'] = [alphapept_hdf]
mgr_settings['transfer']['epoch_ms2'] = 5
mgr_settings['transfer']['warmup_epoch_ms2'] = 2
mgr_settings['transfer']['epoch_rt_ccs'] = 5
mgr_settings['transfer']['warmup_epoch_rt_ccs'] = 2

if os.path.isfile(alphapept_hdf):
    transfer_learn()
else:
    print("`pipeline_api.transfer_learn()` will be not tested")

2023-01-20 09:28:42> Platform information:
2023-01-20 09:28:42> system        - Darwin
2023-01-20 09:28:42> release       - 20.6.0
2023-01-20 09:28:42> version       - 10.16
2023-01-20 09:28:42> machine       - x86_64
2023-01-20 09:28:42> processor     - i386
2023-01-20 09:28:42> cpu count     - 8
2023-01-20 09:28:42> ram           - 10.7/32.0 Gb (available/total)
2023-01-20 09:28:42> 
2023-01-20 09:28:42> Python information:
2023-01-20 09:28:42> alphabase        - 1.0.0
2023-01-20 09:28:42> biopython        - 1.78
2023-01-20 09:28:42> click            - 8.1.3
2023-01-20 09:28:42> lxml             - 4.6.2
2023-01-20 09:28:42> numba            - 0.54.0
2023-01-20 09:28:42> numpy            - 1.19.4
2023-01-20 09:28:42> pandas           - 1.3.5
2023-01-20 09:28:42> peptdeep         - 1.0.1
2023-01-20 09:28:42> psutil           - 5.8.0
2023-01-20 09:28:42> python           - 3.8.3
2023-01-20 09:28:42> scikit-learn     - 1.1.1
2023-01-20 09:28:42> streamlit        - 1.16.0
2023-01-20 09:28

100%|██████████| 1/1 [00:01<00:00,  1.08s/it]

2023-01-20 09:28:54> Training CCS model ...
2023-01-20 09:28:54> Finished training CCS model
2023-01-20 09:28:54> Training RT model ...
2023-01-20 09:28:54> 10 PSMs for RT model training/transfer learning
2023-01-20 09:28:54> Training with fixed sequence length: 0





[Training] Epoch=1, lr=5e-05, loss=0.23635914623737336
[Training] Epoch=2, lr=0.0001, loss=0.23305423259735109
[Training] Epoch=3, lr=7.500000000000001e-05, loss=0.22809805870056152
[Training] Epoch=4, lr=2.500000000000001e-05, loss=0.22056499421596526
[Training] Epoch=5, lr=0.0, loss=0.21937111914157867
2023-01-20 09:28:55> Finished training RT model
2023-01-20 09:28:55> Training MS2 model ...
2023-01-20 09:28:55> 14 PSMs for MS2 model training/transfer learning
2023-01-20 09:28:55> Training with fixed sequence length: 0
[Training] Epoch=1, lr=5e-05, loss=0.023086506687104703
[Training] Epoch=2, lr=0.0001, loss=0.023529775300994517
[Training] Epoch=3, lr=7.500000000000001e-05, loss=0.020745731191709638
[Training] Epoch=4, lr=2.500000000000001e-05, loss=0.019724564673379064
[Training] Epoch=5, lr=0.0, loss=0.01900430452078581
2023-01-20 09:28:57> Finished training MS2 model
2023-01-20 09:28:57> Models were saved in /Users/zengwenfeng/peptdeep/refined_models


#### Test rescore

In [None]:
from peptdeep.pipeline_api import rescore

alphapept_hdf = os.path.expanduser('~/Workspace/Data/HeLa_500ng/20201207_tims03_Evo03_PS_SA_HeLa_200ng_EvoSep_prot_DDA_21min_8cm_S1-C10_1_22476.ms_data.hdf')
perc_settings = global_settings['percolator']
perc_settings['input_files']['psm_files'] = [alphapept_hdf]
perc_settings['input_files']['psm_type'] = 'alphapept'
perc_settings['input_files']['ms_file_type'] = 'alphapept_hdf'
perc_settings['input_files']['ms_files'] = [alphapept_hdf]
perc_settings['require_model_tuning'] = False
perc_settings['percolator_iter_num'] = 1
perc_settings['multiprocessing'] = False

if False and os.path.isfile(alphapept_hdf):
    rescore()
else:
    print("`pipeline_api.rescore()` will be not tested")

`pipeline_api.rescore()` will be not tested
