In [None]:
#---#| default_exp spec_lib.predict_lib

# Predict Spectral Library

Base functionalities to predict spectral libraries

For most of the users, `peptdeep.pretrained_models.ModelManager`, and `peptdeep.spec_lib.predict_lib.PredictSpecLib`, and `peptdeep.protein.fasta.PredictSpecLibFasta` can cover most of the use cases.

Both  `peptdeep.spec_lib.predict_lib.PredictSpecLib` and `peptdeep.protein.fasta.PredictSpecLibFasta` take `peptdeep.pretrained_models.ModelManager` as the first positional argument. 



In [None]:
import torch # noqa: 401, to prevent crash in Mac Arm

In [None]:
from peptdeep.spec_lib.predict_lib import *

In [None]:
from peptdeep.protein.fasta import PredictSpecLibFasta
from alphabase.peptide.fragment import get_charged_frag_types

In [None]:
model_mgr = ModelManager(device='cpu', mask_modloss=False)
model_mgr.load_installed_models()
model_mgr.verbose = False
_lib = PredictSpecLibFasta(
    model_mgr, 
    charged_frag_types=get_charged_frag_types(['b','y','b_modloss','y_modloss'], 2),
    I_to_L=False, 
    decoy='pseudo_reverse'
)
prot1 = 'MACDESTYKAKFGHIKLMNPQRST'
prot2 = 'FGHIKLMNPQR'
protein_dict = {
    'xx': {
        'protein_id': 'xx',
        'sequence': prot1
    },
    'yy': {
        'protein_id': 'yy',
        'sequence': prot2
    }
}
_lib.import_and_process_protein_dict(protein_dict)
_lib.precursor_df

Unnamed: 0,sequence,protein_idxes,miss_cleavage,is_prot_nterm,is_prot_cterm,mods,mod_sites,nAA,decoy,charge
0,AKFGHIK,0,1,False,False,,,7,0,2
1,AKFGHIK,0,1,False,False,,,7,0,3
2,AKFGHIK,0,1,False,False,,,7,0,4
3,IHGFKAK,0,1,False,False,,,7,1,2
4,IHGFKAK,0,1,False,False,,,7,1,3
...,...,...,...,...,...,...,...,...,...,...
169,IHGFKAKYTSEDCAMK,0,2,True,False,Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...,13;0;15,16,1,3
170,IHGFKAKYTSEDCAMK,0,2,True,False,Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...,13;0;15,16,1,4
171,IHGFKAKYTSEDCAMK,0,2,True,False,Carbamidomethyl@C;Acetyl@Protein_N-term,13;0,16,1,2
172,IHGFKAKYTSEDCAMK,0,2,True,False,Carbamidomethyl@C;Acetyl@Protein_N-term,13;0,16,1,3


In [None]:
flat_lib = PredictSpecLibFlat(custom_fragment_df_columns=['type'])
_lib.verbose = False
flat_lib.predict_and_parse_lib_in_batch(
    _lib, batch_size=100
)
flat_lib.precursor_df

Unnamed: 0,sequence,protein_idxes,miss_cleavage,is_prot_nterm,is_prot_cterm,mods,mod_sites,nAA,decoy,charge,precursor_mz,rt_pred,rt_norm_pred,ccs_pred,mobility_pred,frag_stop_idx,nce,instrument,frag_start_idx
0,AKFGHIK,0,1,False,False,,,7,0,2,400.742505,0.029719,0.029719,315.498627,0.774353,11,30.0,Lumos,0
1,AKFGHIK,0,1,False,False,,,7,0,3,267.497429,0.029719,0.029719,389.692902,0.637649,23,30.0,Lumos,11
2,AKFGHIK,0,1,False,False,,,7,0,4,200.874891,0.029719,0.029719,456.406067,0.560120,32,30.0,Lumos,23
3,IHGFKAK,0,1,False,False,,,7,1,2,400.742505,0.018621,0.018621,313.312408,0.768987,44,30.0,Lumos,32
4,IHGFKAK,0,1,False,False,,,7,1,3,267.497429,0.018621,0.018621,376.615753,0.616251,53,30.0,Lumos,44
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,IHGFKAKYTSEDCAMK,0,2,True,False,Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...,13;0;15,16,1,3,648.637850,0.213083,0.213083,543.556580,0.898357,2623,30.0,Lumos,2593
170,IHGFKAKYTSEDCAMK,0,2,True,False,Carbamidomethyl@C;Acetyl@Protein_N-term;Oxidat...,13;0;15,16,1,4,486.730206,0.213083,0.213083,671.651733,0.832552,2648,30.0,Lumos,2623
171,IHGFKAKYTSEDCAMK,0,2,True,False,Carbamidomethyl@C;Acetyl@Protein_N-term,13;0,16,1,2,964.455679,0.270350,0.270350,469.423584,1.163679,2673,30.0,Lumos,2648
172,IHGFKAKYTSEDCAMK,0,2,True,False,Carbamidomethyl@C;Acetyl@Protein_N-term,13;0,16,1,3,643.306211,0.270350,0.270350,543.796631,0.898701,2702,30.0,Lumos,2673


In [None]:
flat_lib.fragment_df

Unnamed: 0,mz,intensity,type
0,729.440621,0.049367,121
1,365.223949,0.133035,121
2,200.139353,0.194481,98
3,601.345658,1.000000,121
4,347.207767,0.045913,98
...,...,...,...
2720,509.221052,0.737302,121
2721,255.114164,0.082723,121
2722,349.190403,0.549802,121
2723,278.153289,0.576329,121
