# Processing spectral libraries

### Load regular spectral libraries in TSV format

`alphabase.spectral_library.reader.LibraryReaderBase`

In [1]:
from alphabase.spectral_library.reader import LibraryReaderBase
from io import StringIO

reader = LibraryReaderBase(
    charged_frag_types = [
        "b_z1",
        "b_z2",
        "y_z1",
        "y_z2",
        "b_modloss_z1",
        "b_modloss_z2",
        "y_modloss_z1",
        "y_modloss_z2",
    ]
)

tsv_str = """PrecursorCharge	ModifiedPeptide	StrippedPeptide	iRT	LabeledPeptide	PrecursorMz	FragmentLossType	FragmentNumber	FragmentType	FragmentCharge	FragmentMz	RelativeIntensity	IonMobility
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	3	b	1	326.1710473	14.37029	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	3	y	1	361.2081611	37.7585	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	4	b	1	397.2081611	9.488808	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	4	y	1	432.2452749	100	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	5	b	1	496.276575	5.498003	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	5	y	1	545.3293389	74.56643	0.9
2	_DPLAVDK_	DPLAVDK	-15.0871	_DPLAVDK_	379.2081611	noloss	6	y	2	321.6946896	51.50719	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	noloss	3	y	1	411.1639269	6.911595	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	H3PO4	3	y	1	313.1870287	17.38582	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	noloss	4	y	1	510.2323409	10.65426	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	H3PO4	4	y	1	412.2554427	37.41231	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	noloss	5	y	1	609.3007548	45.03617	0.9
2	_AVVVS[Phospho (STY)]PK_	AVVVSPK	-22.84974	_AVVVS[Phospho (STY)]PK_	390.2067795	H3PO4	5	y	1	511.3238566	100	0.9
2	_MGS[Phospho (STY)]LDSK_	MGSLDSK	-27.5635	_MGS[Phospho (STY)]LDSK_	409.1617118	noloss	3	y	1	349.1717756	9.20575	0.9
2	_MGS[Phospho (STY)]LDSK_	MGSLDSK	-27.5635	_MGS[Phospho (STY)]LDSK_	409.1617118	noloss	6	y	1	686.2756622	10.37339	0.9
2	_MGS[Phospho (STY)]LDSK_	MGSLDSK	-27.5635	_MGS[Phospho (STY)]LDSK_	409.1617118	H3PO4	6	y	1	588.298764	100	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	3	y	1	347.2288965	88.27327	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	3	b	1	256.1291795	64.97146	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	4	y	1	494.2973105	100	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	4	b	1	403.1975934	35.17805	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	5	y	1	661.2956694	19.89741	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	5	b	1	490.2296218	40.04738	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	5	y	1	563.3187712	77.43164	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	noloss	6	b	1	701.290584	24.43497	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	H3PO4	6	b	1	603.3136858	63.09999	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	1(+H2+O)1(+H3+O4+P)	3	b	1	238.1186147	62.60851	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	1(+H2+O)1(+H3+O4+P)	5	b	1	472.219057	22.99903	0.9
1	_SVS[Phospho (STY)]FSLK_	SVSFSLK	35.01411	_SVS[Phospho (STY)]FSLK_	847.3961117	1(+H2+O)1(+H3+O4+P)	6	b	1	585.303121	66.30389	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	3	y	1	329.1931797	100	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	3	b	1	268.165565	5.755442	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	4	b	2	267.0740493	8.743931	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	4	y	1	496.1915387	27.69686	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	4	b	1	435.1639239	6.162673	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	2(+H3+O4+P)	4	b	1	337.1870258	10.84257	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	4	y	1	398.2146405	26.28527	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	5	y	1	497.2830544	28.41294	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	noloss	6	y	1	762.2583115	8.490795	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	H3PO4	6	y	1	664.2814133	32.87384	0.9
2	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	VSVSPGR	-23.93085	_VS[Phospho (STY)]VS[Phospho (STY)]PGR_	431.1670009	2(+H3+O4+P)	6	y	1	566.3045151	35.87218	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	3	y	1	331.1975964	49.20179	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	4	y	1	498.1959553	10.89141	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	H3PO4	4	y	1	400.2190571	27.99594	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	5	y	1	611.2800193	14.11057	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	H3PO4	5	y	1	513.3031211	70.5295	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	noloss	6	y	1	698.3120477	60.23455	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	H3PO4	6	y	1	600.3351495	100	0.9
2	_YSLS[Phospho (STY)]PSK_	YSLSPSK	-6.428198	_YSLS[Phospho (STY)]PSK_	431.1913264	1(+H2+O)1(+H3+O4+P)	6	y	1	582.3245847	5.233977	0.9
"""

psm_df = reader.import_file(StringIO(tsv_str))
for col in ['sequence','charge','rt','rt_norm','mods','mod_sites','nAA','frag_start_idx','frag_stop_idx']:
    assert col in psm_df.columns
reader.psm_df

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
100%|██████████| 6/6 [00:00<00:00, 2283.86it/s]


Unnamed: 0,mods,nAA,mod_sites,sequence,mobility,charge,rt,precursor_mz,frag_start_idx,frag_stop_idx,rt_norm,ccs
0,,7,,DPLAVDK,0.9,2,-15.0871,379.208161,0,6,0.199375,367.0431
1,Phospho@S,7,3,MGSLDSK,0.9,2,-27.5635,409.161712,6,12,0.0,366.564438
2,Phospho@S,7,3,SVSFSLK,0.9,1,35.01411,847.396112,12,18,1.0,183.178171
3,Phospho@S,7,4,YSLSPSK,0.9,2,-6.428198,431.191326,18,24,0.337745,366.254509
4,Phospho@S,7,5,AVVVSPK,0.9,2,-22.84974,390.206779,24,30,0.075327,366.858877
5,Phospho@S;Phospho@S,7,2;4,VSVSPGR,0.9,2,-23.93085,431.167001,30,36,0.05805,366.254833


In [2]:
reader.fragment_mz_df

Unnamed: 0,b_z1,b_z2,y_z1,y_z2,b_modloss_z1,b_modloss_z2,y_modloss_z1,y_modloss_z2
0,116.034218,58.520748,642.38208,321.694702,0.0,0.0,0.0,0.0
1,213.08699,107.047127,545.329346,273.168304,0.0,0.0,0.0,0.0
2,326.171051,163.589157,432.24527,216.626282,0.0,0.0,0.0,0.0
3,397.20816,199.107712,361.20816,181.107712,0.0,0.0,0.0,0.0
4,496.276581,248.641922,262.13974,131.573517,0.0,0.0,0.0,0.0
5,611.303528,306.155396,147.112808,74.060043,0.0,0.0,0.0,0.0
6,132.04776,66.527519,686.275635,343.641479,0.0,0.0,588.298767,294.653015
7,189.069229,95.038254,629.254211,315.130737,0.0,0.0,531.277283,266.142303
8,356.067596,178.53743,462.255829,231.631561,258.090698,129.548981,0.0,0.0
9,469.151642,235.079468,349.171783,175.089523,371.174744,186.091019,0.0,0.0


In [3]:
reader.fragment_intensity_df

Unnamed: 0,b_z1,b_z2,y_z1,y_z2,b_modloss_z1,b_modloss_z2,y_modloss_z1,y_modloss_z2
0,0.0,0.0,0.0,0.515072,0.0,0.0,0.0,0.0
1,0.0,0.0,0.745664,0.0,0.0,0.0,0.0,0.0
2,0.143703,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.094888,0.0,0.377585,0.0,0.0,0.0,0.0,0.0
4,0.05498,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.103734,0.0,0.0,0.0,1.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.092058,0.0,0.0,0.0,0.0,0.0


### Predict spectral libraries from fasta files

(Plan to copy from https://github.com/MannLabs/alphapeptdeep/blob/main/nbs_tests/protein/fasta.ipynb)

### Merge different spectral libraries

(The idea is to use transfer learning to fit RT in one library, then predict RT values for other libraries)