In [None]:
#---#| default_exp pipeline_api

# pipeline API

This module provides high-level APIs for different pipelines such as transfer learning, library geneartion, and rescoring (percolator). 

## Transfer learning pipeline

Transfer learning pipeline includes:
1. Loading PSM files of the search engine results.
2. Matching PSMs against the MS files.
3. Loading pre-trained models and refining RT/CCS(/MS2) models.

The refined models will be saved in the path pointed by "PEPTDEEP_HOME" in `peptdeep.settings.global_settings`.

In [None]:
import torch # noqa: 401, to prevent crash in Mac Arm

In [None]:
from peptdeep.pipeline_api import global_settings
import os
import warnings

2023-08-24 14:18:03> /Users/wenfengzeng/workspace/peptdeep/peptdeep/mass_spec/ms_reader.py#L13: Cannot import `RawFileReader`, check if PythonNet is installed. See https://github.com/MannLabs/alphapeptdeep#pip


#### Test library generation

In [None]:
from peptdeep.pipeline_api import generate_library

fasta = os.path.expanduser('~/Workspace/Data/fasta/irtfusion.fasta')
lib_settings = global_settings['library']
lib_settings['infile_type'] = 'fasta'
lib_settings['infiles'] = [fasta]
lib_settings['fasta']['add_contaminants'] = False

if os.path.isfile(fasta):
    generate_library()
else:
    print("`pipeline_api.generate_library()` will be not tested")

`pipeline_api.generate_library()` will be not tested


#### Test transfer learning

In [None]:
from io import StringIO
from peptdeep.pipeline_api import transfer_learn

tsv_str = """PrecursorCharge	ModifiedPeptide	StrippedPeptide	iRT	LabeledPeptide	PrecursorMz	FragmentLossType	FragmentNumber	FragmentType	FragmentCharge	FragmentMz	RelativeIntensity	IonMobility
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	3	b	1	326.1710473	14.37029	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	3	y	1	361.2081611	37.7585	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	4	b	1	397.2081611	9.488808	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	4	y	1	432.2452749	100	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	5	b	1	496.276575	5.498003	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	5	y	1	545.3293389	74.56643	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	6	y	2	321.6946896	51.50719	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	noloss	3	y	1	411.1639269	6.911595	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	H3PO4	3	y	1	313.1870287	17.38582	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	noloss	4	y	1	510.2323409	10.65426	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	H3PO4	4	y	1	412.2554427	37.41231	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	noloss	5	y	1	609.3007548	45.03617	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	H3PO4	5	y	1	511.3238566	100	0.9
2	_MGS[Phospho (STY)]LDSK_	MGSLDSK	-27.5635	_MGS[Phospho (STY)]LDSK_	409.1617118	noloss	3	y	1	349.1717756	9.20575	0.9
2	_MGS[Phospho (STY)]LDSK_	MGSLDSK	-27.5635	_MGS[Phospho (STY)]LDSK_	409.1617118	noloss	6	y	1	686.2756622	10.37339	0.9
2	_MGS[Phospho (STY)]LDSK_	MGSLDSK	-27.5635	_MGS[Phospho (STY)]LDSK_	409.1617118	H3PO4	6	y	1	588.298764	100	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	3	y	1	347.2288965	88.27327	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	3	b	1	256.1291795	64.97146	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	4	y	1	494.2973105	100	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	4	b	1	403.1975934	35.17805	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	5	y	1	661.2956694	19.89741	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	5	b	1	490.2296218	40.04738	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	5	y	1	563.3187712	77.43164	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	6	b	1	701.290584	24.43497	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	6	b	1	603.3136858	63.09999	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	1(+H2+O)1(+H3+O4+P)	3	b	1	238.1186147	62.60851	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	1(+H2+O)1(+H3+O4+P)	5	b	1	472.219057	22.99903	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	1(+H2+O)1(+H3+O4+P)	6	b	1	585.303121	66.30389	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	3	y	1	329.1931797	100	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	3	b	1	268.165565	5.755442	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	4	b	2	267.0740493	8.743931	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	4	y	1	496.1915387	27.69686	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	4	b	1	435.1639239	6.162673	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	2(+H3+O4+P)	4	b	1	337.1870258	10.84257	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	4	y	1	398.2146405	26.28527	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	5	y	1	497.2830544	28.41294	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	6	y	1	762.2583115	8.490795	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	6	y	1	664.2814133	32.87384	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	2(+H3+O4+P)	6	y	1	566.3045151	35.87218	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	3	y	1	331.1975964	49.20179	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	4	y	1	498.1959553	10.89141	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	H3PO4	4	y	1	400.2190571	27.99594	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	5	y	1	611.2800193	14.11057	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	H3PO4	5	y	1	513.3031211	70.5295	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	6	y	1	698.3120477	60.23455	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	H3PO4	6	y	1	600.3351495	100	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	1(+H2+O)1(+H3+O4+P)	6	y	1	582.3245847	5.233977	0.9
"""

mgr_settings = global_settings['model_mgr']
mgr_settings['transfer']['psm_files'] = [StringIO(tsv_str)]
mgr_settings['transfer']['psm_type'] = 'speclib_tsv'
mgr_settings['transfer']['ms_file_type'] = 'speclib_tsv'
mgr_settings['transfer']['epoch_ms2'] = 5
mgr_settings['transfer']['warmup_epoch_ms2'] = 2
mgr_settings['transfer']['epoch_rt_ccs'] = 5
mgr_settings['transfer']['warmup_epoch_rt_ccs'] = 2
transfer_learn()

2023-08-24 14:18:03> Platform information:
2023-08-24 14:18:03> system        - Darwin
2023-08-24 14:18:03> release       - 21.6.0
2023-08-24 14:18:03> version       - 12.5.1
2023-08-24 14:18:03> machine       - arm64
2023-08-24 14:18:03> processor     - arm
2023-08-24 14:18:03> cpu count     - 10
2023-08-24 14:18:03> ram           - 28.8/64.0 Gb (available/total)
2023-08-24 14:18:03> 
2023-08-24 14:18:03> Python information:
2023-08-24 14:18:03> alphabase        - 1.0.2
2023-08-24 14:18:03> biopython        - 1.79
2023-08-24 14:18:03> click            - 8.1.3
2023-08-24 14:18:03> lxml             - 4.9.1
2023-08-24 14:18:03> numba            - 0.56.2
2023-08-24 14:18:03> numpy            - 1.23.3
2023-08-24 14:18:03> pandas           - 1.5.0
2023-08-24 14:18:03> peptdeep         - 1.0.2
2023-08-24 14:18:03> psutil           - 5.9.2
2023-08-24 14:18:03> python           - 3.9.12
2023-08-24 14:18:03> scikit-learn     - 1.1.2
2023-08-24 14:18:03> streamlit        - 1.13.0
2023-08-24 14:1

#### Test rescore

In [None]:
# from peptdeep.pipeline_api import rescore

# alphapept_hdf = os.path.expanduser('~/Workspace/Data/HeLa_500ng/20201207_tims03_Evo03_PS_SA_HeLa_200ng_EvoSep_prot_DDA_21min_8cm_S1-C10_1_22476.ms_data.hdf')
# perc_settings = global_settings['percolator']
# perc_settings['input_files']['psm_files'] = [alphapept_hdf]
# perc_settings['input_files']['psm_type'] = 'alphapept'
# perc_settings['input_files']['ms_file_type'] = 'alphapept_hdf'
# perc_settings['input_files']['ms_files'] = [alphapept_hdf]
# perc_settings['require_model_tuning'] = False
# perc_settings['percolator_iter_num'] = 1
# perc_settings['multiprocessing'] = False

# if False and os.path.isfile(alphapept_hdf):
#     rescore()
# else:
#     print("`pipeline_api.rescore()` will be not tested")