# Processing spectral libraries

### Load regular spectral libraries in TSV/CSV format

`alphabase.spectral_library.reader.LibraryReaderBase`

In [1]:
import torch # to prevent kernel crash (MacOS Arm?)  # noqa: F401

In [2]:
from alphabase.spectral_library.reader import LibraryReaderBase
from io import StringIO

speclib = LibraryReaderBase(
    charged_frag_types = [
        "b_z1",
        "b_z2",
        "y_z1",
        "y_z2",
        "b_modloss_z1",
        "b_modloss_z2",
        "y_modloss_z1",
        "y_modloss_z2",
    ]
)

tsv_str = """PrecursorCharge	ModifiedPeptide	StrippedPeptide	iRT	LabeledPeptide	PrecursorMz	FragmentLossType	FragmentNumber	FragmentType	FragmentCharge	FragmentMz	RelativeIntensity	IonMobility
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	3	b	1	326.1710473	14.37029	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	3	y	1	361.2081611	37.7585	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	4	b	1	397.2081611	9.488808	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	4	y	1	432.2452749	100	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	5	b	1	496.276575	5.498003	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	5	y	1	545.3293389	74.56643	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	6	y	2	321.6946896	51.50719	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	noloss	3	y	1	411.1639269	6.911595	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	H3PO4	3	y	1	313.1870287	17.38582	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	noloss	4	y	1	510.2323409	10.65426	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	H3PO4	4	y	1	412.2554427	37.41231	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	noloss	5	y	1	609.3007548	45.03617	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	H3PO4	5	y	1	511.3238566	100	0.9
2	_MGS[Phospho (STY)]LDSK_	MGSLDSK	-27.5635	_MGS[Phospho (STY)]LDSK_	409.1617118	noloss	3	y	1	349.1717756	9.20575	0.9
2	_MGS[Phospho (STY)]LDSK_	MGSLDSK	-27.5635	_MGS[Phospho (STY)]LDSK_	409.1617118	noloss	6	y	1	686.2756622	10.37339	0.9
2	_MGS[Phospho (STY)]LDSK_	MGSLDSK	-27.5635	_MGS[Phospho (STY)]LDSK_	409.1617118	H3PO4	6	y	1	588.298764	100	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	3	y	1	347.2288965	88.27327	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	3	b	1	256.1291795	64.97146	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	4	y	1	494.2973105	100	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	4	b	1	403.1975934	35.17805	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	5	y	1	661.2956694	19.89741	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	5	b	1	490.2296218	40.04738	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	5	y	1	563.3187712	77.43164	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	6	b	1	701.290584	24.43497	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	6	b	1	603.3136858	63.09999	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	1(+H2+O)1(+H3+O4+P)	3	b	1	238.1186147	62.60851	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	1(+H2+O)1(+H3+O4+P)	5	b	1	472.219057	22.99903	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	1(+H2+O)1(+H3+O4+P)	6	b	1	585.303121	66.30389	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	3	y	1	329.1931797	100	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	3	b	1	268.165565	5.755442	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	4	b	2	267.0740493	8.743931	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	4	y	1	496.1915387	27.69686	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	4	b	1	435.1639239	6.162673	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	2(+H3+O4+P)	4	b	1	337.1870258	10.84257	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	4	y	1	398.2146405	26.28527	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	5	y	1	497.2830544	28.41294	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	6	y	1	762.2583115	8.490795	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	6	y	1	664.2814133	32.87384	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	2(+H3+O4+P)	6	y	1	566.3045151	35.87218	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	3	y	1	331.1975964	49.20179	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	4	y	1	498.1959553	10.89141	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	H3PO4	4	y	1	400.2190571	27.99594	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	5	y	1	611.2800193	14.11057	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	H3PO4	5	y	1	513.3031211	70.5295	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	6	y	1	698.3120477	60.23455	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	H3PO4	6	y	1	600.3351495	100	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	1(+H2+O)1(+H3+O4+P)	6	y	1	582.3245847	5.233977	0.9
"""

psm_df = speclib.import_file(StringIO(tsv_str))
for col in ['sequence','charge','rt','rt_norm','mods','mod_sites','nAA','frag_start_idx','frag_stop_idx']:
    assert col in psm_df.columns
speclib.psm_df

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
100%|██████████| 6/6 [00:00<00:00, 1964.85it/s]


Unnamed: 0,mobility,mod_sites,rt,mods,sequence,nAA,charge,precursor_mz,frag_start_idx,frag_stop_idx,rt_norm,ccs
0,0.9,,-15.0871,,DPLAVDK,7,2,379.208161,0,6,0.199375,367.0431
1,0.9,2;4,-23.93085,Phospho@S;Phospho@S,VSVSPGR,7,2,431.167001,6,12,0.05805,366.254833
2,0.9,3,-27.5635,Phospho@S,MGSLDSK,7,2,409.161712,12,18,0.0,366.564438
3,0.9,3,35.01411,Phospho@S,SVSFSLK,7,1,847.396112,18,24,1.0,183.178171
4,0.9,4,-6.428198,Phospho@S,YSLSPSK,7,2,431.191326,24,30,0.337745,366.254509
5,0.9,5,-22.84974,Phospho@S,AVVVSPK,7,2,390.206779,30,36,0.075327,366.858877


In [3]:
speclib.fragment_intensity_df

Unnamed: 0,b_z1,b_z2,y_z1,y_z2,b_modloss_z1,b_modloss_z2,y_modloss_z1,y_modloss_z2
0,0.0,0.0,0.0,0.515072,0.0,0.0,0.0,0.0
1,0.0,0.0,0.745664,0.0,0.0,0.0,0.0,0.0
2,0.143703,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.094888,0.0,0.377585,0.0,0.0,0.0,0.0,0.0
4,0.05498,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.084908,0.0,0.0,0.0,0.328738,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.284129,0.0
8,0.0,0.0,0.276969,0.0,0.057554,0.0,0.262853,0.0
9,0.0,0.087439,1.0,0.0,0.061627,0.0,0.0,0.0


### Get flatten spectral library

In [4]:
from alphabase.spectral_library.flat import SpecLibFlat

flatlib = SpecLibFlat(
    charged_frag_types="b_z1,b_z2,y_z1,y_z2".split(","),
    min_fragment_intensity=0.01,
    keep_top_k_fragments=12,
    custom_fragment_df_columns=["type","number","position","charge","loss_type"]
)
flatlib.parse_base_library(speclib)
flatlib.fragment_df

Unnamed: 0,mz,intensity,type,loss_type,charge,number,position
0,321.694702,0.515072,121,0,2,6,0
1,545.329346,0.745664,121,0,1,5,1
2,326.171051,0.143703,98,0,1,3,2
3,432.24527,1.0,121,0,1,4,2
4,397.20816,0.094888,98,0,1,4,3
5,361.20816,0.377585,121,0,1,3,3
6,496.276581,0.05498,98,0,1,5,4
7,762.258301,0.084908,121,0,1,6,0
8,664.281433,0.328738,121,98,1,6,0
9,497.283051,0.284129,121,98,1,5,1


### Predict spectral libraries from fasta files

(Plan to copy from https://github.com/MannLabs/alphapeptdeep/blob/main/nbs_tests/protein/fasta.ipynb)

In [5]:
from peptdeep.pretrained_models import ModelManager
model_mgr = ModelManager(device="get_available")

  torch.load(stream, map_location=self.device), strict=False


In [6]:
from peptdeep.protein.fasta import PredictSpecLibFasta
fastalib = PredictSpecLibFasta(
    model_manager=model_mgr,
    charged_frag_types="b_z1,b_z2,y_z1,y_z2".split(","),
    protease="trypsin",
    max_missed_cleavages=1,
    peptide_length_min=7,
    peptide_length_max=20,
    precursor_charge_min=2,
    precursor_charge_max=3,
    precursor_mz_min=400,
    precursor_mz_max=800,
    var_mods=["Oxidation@M"],
    min_var_mod_num=0,
    max_var_mod_num=1,
    fix_mods=["Carbamidomethyl@C"],
    labeling_channels=None,
    special_mods=[],
    min_special_mod_num=0,
    max_special_mod_num=1,
    include_contaminants=True,
    I_to_L=False,
    generate_precursor_isotope=True,
    rt_to_irt=False,
)
fastalib.import_and_process_fasta(fasta_files=["data/sample_proteins.fasta"])
fastalib.protein_df

Unnamed: 0,protein_id,full_name,gene_name,gene_org,description,sequence
0,A0A087X1C5,sp|A0A087X1C5|CP2D7_HUMAN,CYP2D7,CP2D7_HUMAN,sp|A0A087X1C5|CP2D7_HUMAN Putative cytochrome ...,MGLEALVPLAMIVAIFLLLVDLMHRHQRWAARYPPGPLPLPGLGNL...
1,A0A0B4J2F0,sp|A0A0B4J2F0|PIOS1_HUMAN,PIGBOS1,PIOS1_HUMAN,sp|A0A0B4J2F0|PIOS1_HUMAN Protein PIGBOS1 OS=H...,MFRRLTFAQLLFATVLGIAGGVYIFQPVFEQYAKDQKELKEKMQLV...
2,A0A0B4J2F2,sp|A0A0B4J2F2|SIK1B_HUMAN,SIK1B,SIK1B_HUMAN,sp|A0A0B4J2F2|SIK1B_HUMAN Putative serine/thre...,MVIMSEFSADPAGQGQGQQKPLRVGFYDIERTLGKGNFAVVKLARH...
3,A0A0C5B5G6,sp|A0A0C5B5G6|MOTSC_HUMAN,MT-RNR1,MOTSC_HUMAN,sp|A0A0C5B5G6|MOTSC_HUMAN Mitochondrial-derive...,MRWQEMGYIFYPRKLR
4,A0A0K2S4Q6,sp|A0A0K2S4Q6|CD3CH_HUMAN,CD300H,CD3CH_HUMAN,sp|A0A0K2S4Q6|CD3CH_HUMAN Protein CD300H OS=Ho...,MTQRAGAAMLPSALLLLCVPGCLTVSGPSTVMGAVGESLSVQCRYE...
...,...,...,...,...,...,...
250,CON_ENSEMBL:ENSBTAP00000034412,CON_ENSEMBL:ENSBTAP00000034412,,CON_ENSEMBL:ENSBTAP00000034412,CON_ENSEMBL:ENSBTAP00000034412 (Bos taurus) si...,MKPVSGRKSPVLYLLGILTVLLCPDGLQGCSPPPEVKHGHFVYIQR...
251,CON_ENSEMBL:ENSBTAP00000013050,CON_ENSEMBL:ENSBTAP00000013050,,CON_ENSEMBL:ENSBTAP00000013050,CON_ENSEMBL:ENSBTAP00000013050 (Bos taurus) hy...,MWAVLSLPLACLLAQAWLVPGSTLASHSPEAQAGLETLLTPMAQNK...
252,CON_ENSEMBL:ENSBTAP00000016285,CON_ENSEMBL:ENSBTAP00000016285,,CON_ENSEMBL:ENSBTAP00000016285,CON_ENSEMBL:ENSBTAP00000016285 (Bos taurus) si...,MSFGNWKPTVVVQGILWILYGLLLQPEPGTATLPLLMDSVIQALAE...
253,CON_ENSEMBL:ENSBTAP00000024146,CON_ENSEMBL:ENSBTAP00000024146,,CON_ENSEMBL:ENSBTAP00000024146,CON_ENSEMBL:ENSBTAP00000024146 (Bos taurus) si...,MGKNKLLYPSLTLLLLLLLPTDASVSGKPQYMVLVPSLLHTETPEK...


In [7]:
fastalib.predict_all(
    predict_items=["rt, mobility","ms2"], 
    min_required_precursor_num_for_mp=2000
)

2024-08-12 14:55:20> Calculating precursor isotope distributions ...


100%|██████████| 24/24 [00:05<00:00,  4.76it/s]

2024-08-12 14:55:25> Predicting RT/IM/MS2 for 16845 precursors ...
2024-08-12 14:55:25> Predicting MS2 ...



100%|██████████| 14/14 [00:02<00:00,  6.92it/s]

2024-08-12 14:55:28> End predicting RT/IM/MS2





In [8]:
fastalib.precursor_df

Unnamed: 0,sequence,protein_idxes,miss_cleavage,is_prot_nterm,is_prot_cterm,mods,mod_sites,nAA,charge,precursor_mz,...,i_1,i_2,i_3,i_4,i_5,mono_isotope_idx,nce,instrument,frag_start_idx,frag_stop_idx
0,HQRWAAR,0,1,False,False,,,7,2,462.749185,...,0.300536,0.084151,0.016962,0.002720,0.000000,0,30.0,Lumos,0,6
1,EVTLEDR,120,0,False,False,,,7,2,431.219257,...,0.271432,0.074903,0.015489,0.002634,0.000000,0,30.0,Lumos,6,12
2,KFISHIK,120,1,False,False,,,7,2,436.771263,...,0.300746,0.084248,0.016983,0.002723,0.000000,0,30.0,Lumos,12,18
3,QLKPLEK,121,1,False,False,,,7,2,428.268553,...,0.288564,0.078985,0.015880,0.002574,0.000000,0,30.0,Lumos,18,24
4,NSAVRHR,120,1,False,False,,,7,2,420.233368,...,0.273484,0.068732,0.012748,0.001910,0.000000,0,30.0,Lumos,24,30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16840,HYYELEALTTDFQDWAGASR,234,0,False,False,,,20,3,791.697590,...,0.335343,0.232131,0.113675,0.043888,0.014153,0,30.0,Lumos,195286,195305
16841,NPDGSITGPWCYTTSPTLRR,117,1,False,False,Carbamidomethyl@C,11,20,3,760.367298,...,0.328418,0.225484,0.112746,0.045111,0.015180,0,30.0,Lumos,195305,195324
16842,SSEADWVTDQLNQINYADHK,135,0,False,False,,,20,3,778.692616,...,0.338395,0.224401,0.106166,0.039837,0.012542,0,30.0,Lumos,195324,195343
16843,EIETYCLLIDGEDGSCSKSK,209,1,False,False,Carbamidomethyl@C;Carbamidomethyl@C,6;16,20,3,768.683853,...,0.313297,0.225158,0.120428,0.052318,0.019306,0,30.0,Lumos,195343,195362


In [9]:
fastalib.fragment_intensity_df

Unnamed: 0,b_z1,b_z2,y_z1,y_z2
0,0.028929,0.000000,0.020062,0.294034
1,0.269401,0.000000,0.063472,0.079531
2,0.125745,0.000000,0.286240,0.000000
3,0.001778,0.000000,0.054674,0.000000
4,0.096889,0.050289,0.037408,0.000000
...,...,...,...,...
195376,0.000000,0.000000,0.078300,0.000000
195377,0.000000,0.000000,0.037144,0.000000
195378,0.000000,0.000000,0.000000,0.000000
195379,0.000000,0.000000,0.000000,0.000000


### Merging different spectral libraries

Sometimes we need to merge spectral libraries from different sources:

1. We have two spectral libraries acquired by two instruments (e.g, timsTOF and Orbitrap), and we would like to build a combined library for diaPASEF analysis.

2. We have a sample spectific DDA-generated spectral library but we would like to include some external spectral libraries for better analysis.

3. We have a spectral library for the samples, but we would like to include some peptides or proteins of interest for quantification.

It is easy to merge spectral libraries by using peptdeep for these purposes.

#### First, we fine-tune the peptdeep model based on the primary spectral library.

In [10]:
prime_speclib = LibraryReaderBase()
prime_speclib.import_file("data/timstof_speclib.tsv")
prime_speclib.precursor_df

100%|██████████| 1680/1680 [00:00<00:00, 6331.78it/s]


Unnamed: 0,mod_sites,genes,proteins,rt,mods,mobility,sequence,nAA,charge,precursor_mz,frag_start_idx,frag_stop_idx,rt_norm,ccs
0,,,,-8.134939,,0.821354,DLGEEHFK,8,2,487.73254,0,7,0.197671,333.639802
1,,,,34.306236,,0.805000,AEFVEVTK,8,2,461.74765,7,14,0.445221,327.253271
2,,,,90.309280,,1.137500,DAIPENLPPLTADFAEDK,18,2,978.48346,14,31,0.771875,458.815870
3,,,,109.080700,,1.056250,DAFLGSFLYEYSR,13,2,784.37500,31,43,0.881365,426.786276
4,,,,113.218810,,1.143958,AEFVEVTKLVTDLTK,15,2,846.97455,43,57,0.905501,461.925887
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1675,9,SPARCL1,SPRL1_HUMAN,23.823553,Carbamidomethyl@C,1.022386,ASLVPMEHCITR,12,2,707.35248,23075,23086,0.384078,413.497584
1676,9;13,AHCY,SAHH_HUMAN,34.144798,Oxidation@M;Oxidation@M,1.043333,ALDIAENEMPGLMR,14,2,796.37634,23086,23099,0.444279,421.511475
1677,9;13,CSF1,CSF1_HUMAN,90.554207,Oxidation@M;Oxidation@M,1.095312,AFLLVQDIMEDTMR,14,2,857.41296,23099,23112,0.773304,442.239123
1678,9;13,CSF1,CSF1_HUMAN,90.639214,Oxidation@M;Oxidation@M,0.807727,AFLLVQDIMEDTMR,14,3,571.94440,23112,23125,0.773799,489.184940


In [11]:
train_df = prime_speclib.precursor_df.sample(frac=0.8)
test_df = prime_speclib.precursor_df.drop(index=train_df.index)

Fine-tune RT/CCS models:

In [12]:
model_mgr.train_verbose = True
model_mgr.epoch_to_train_rt_ccs = 10
model_mgr.warmup_epoch_to_train_rt_ccs = 3
model_mgr.train_rt_model(train_df)
model_mgr.rt_model.test(test_df)

2024-08-12 14:55:28> 1225 PSMs for RT model training/transfer learning
2024-08-12 14:55:28> Training with fixed sequence length: 0
[Training] Epoch=1, lr=6.666666666666667e-05, loss=0.04601777202504523
[Training] Epoch=2, lr=0.0001, loss=0.04113308773102129
[Training] Epoch=3, lr=0.0001, loss=0.035520798033651185
[Training] Epoch=4, lr=9.504844339512095e-05, loss=0.030462097979205495
[Training] Epoch=5, lr=8.117449009293668e-05, loss=0.026844032327918446
[Training] Epoch=6, lr=6.112604669781572e-05, loss=0.02462515207555364
[Training] Epoch=7, lr=3.887395330218429e-05, loss=0.02217793801580282
[Training] Epoch=8, lr=1.8825509907063327e-05, loss=0.020465805784196538
[Training] Epoch=9, lr=4.951556604879048e-06, loss=0.019403176204137066
[Training] Epoch=10, lr=1.0000000000000002e-14, loss=0.018994575708775836


Unnamed: 0,R_square,R,slope,intercept,test_num
0,0.979972,0.989936,0.988141,0.006182,336


In [13]:
model_mgr.train_ccs_model(train_df)
model_mgr.ccs_model.test(test_df)

2024-08-12 14:55:44> 1344 PSMs for CCS model training/transfer learning
2024-08-12 14:55:44> Training with fixed sequence length: 0
[Training] Epoch=1, lr=6.666666666666667e-05, loss=23.995932677212885
[Training] Epoch=2, lr=0.0001, loss=20.350843892377966
[Training] Epoch=3, lr=0.0001, loss=21.66925231148215
[Training] Epoch=4, lr=9.504844339512095e-05, loss=19.2557854792651
[Training] Epoch=5, lr=8.117449009293668e-05, loss=18.233890743816602
[Training] Epoch=6, lr=6.112604669781572e-05, loss=19.92142859627219
[Training] Epoch=7, lr=3.887395330218429e-05, loss=18.394885511959302
[Training] Epoch=8, lr=1.8825509907063327e-05, loss=17.186839748831357
[Training] Epoch=9, lr=4.951556604879048e-06, loss=17.437322658651016
[Training] Epoch=10, lr=1.0000000000000002e-14, loss=16.11759058166953


Unnamed: 0,R_square,R,slope,intercept,test_num
0,0.989729,0.994851,1.022848,-10.790875,336


Fine-tuning the MS2 model based on spectral libraries is possible in peptdeep but it is not recommended as different search engines my ignore a lot of fragments when constructing the library. Thus it is better to use DDA results with the RAW data or to use alphadia's end-to-end transfer learning to get the tuned MS2 models.

#### Then, we re-predict precursors from other libraries based on tuned model to align RT (and mobility) with the primary library.

In [14]:
other_speclib = LibraryReaderBase()
other_speclib.import_file("data/sample_openswath.tsv")
pred_other_lib = PredictSpecLibFasta(
    model_manager=model_mgr,
    charged_frag_types="b_z1,b_z2,y_z1,y_z2".split(","),
)
pred_other_lib._precursor_df = other_speclib.precursor_df
pred_other_lib._fragment_intensity_df = other_speclib.fragment_intensity_df
pred_other_lib._fragment_mz_df = other_speclib.fragment_mz_df
pred_other_lib.predict_all(predict_items=["rt","mobility"])
pred_other_lib.precursor_df

100%|██████████| 18/18 [00:00<00:00, 5339.66it/s]

2024-08-12 14:56:05> Predicting RT/IM/MS2 for 18 precursors ...
2024-08-12 14:56:05> Predicting RT ...



100%|██████████| 6/6 [00:00<00:00,  7.81it/s]

2024-08-12 14:56:05> Predicting mobility ...



100%|██████████| 6/6 [00:00<00:00, 13.15it/s]

2024-08-12 14:56:06> End predicting RT/IM/MS2





Unnamed: 0,mod_sites,proteins,rt,mods,sequence,nAA,charge,precursor_mz,frag_start_idx,frag_stop_idx,rt_norm,rt_pred,rt_norm_pred,ccs_pred,mobility_pred
0,,1/iRT|iRT_Biognosys|iRT_Biognosys,-28.1,,LGGNEQVTR,9,2,487.256705,0,8,0.0,0.075625,0.075625,332.653778,0.818916
1,,1/sp|A1A5C7|S22AN_HUMAN,114.1,,FLMPGLAAL,9,2,466.767332,147,155,0.984083,0.92429,0.92429,315.040192,0.77508
2,,1/sp|A1A4S6|RHG10_HUMAN,9.7,,ILVKHLTNV,9,2,518.829309,139,147,0.261592,0.286732,0.286732,357.252686,0.880221
3,,1/sp|A0AVT1|UBA6_HUMAN,116.4,,TLLDFINAV,9,2,503.2844,131,139,1.0,0.9168,0.9168,339.884216,0.837088
4,,1/sp|A0AVT1|UBA6_HUMAN,81.2,,KLQNLNIFL,9,2,551.834591,123,131,0.756401,0.66227,0.66227,355.174347,0.875788
5,,1/sp|A2RU49|HYKK_HUMAN,100.0,,ALVESVFGL,9,2,467.765843,165,173,0.886505,0.888786,0.888786,326.156036,0.802453
6,3.0,1/sp|A6NNC1|P12LL_HUMAN,96.1,Oxidation@M,PLMLPPPLEL,10,2,568.325211,173,182,0.859516,0.922169,0.922169,356.546082,0.879487
7,,1/iRT|iRT_Biognosys|iRT_Biognosys,22.4,,YILAGVENSK,10,2,547.298039,33,42,0.349481,0.380247,0.380247,362.310547,0.893293
8,,1/sp|A2A3N6|PIPSL_HUMAN,71.5,,VMQDPEFLQSV,11,2,646.813196,155,165,0.689273,0.699226,0.699226,381.634491,0.942748
9,,1/iRT|iRT_Biognosys|iRT_Biognosys,33.6,,TPVITGAPYEYR,12,2,683.85371,53,64,0.42699,0.454034,0.454034,395.296204,0.977057
