In [1]:
import os
import pandas as pd
import psutil
import numpy as np
from peptdeep.pretrained_models import ModelManager
from peptdeep.protein.fasta import PredictSpecLibFasta
from alphabase.peptide.fragment import get_charged_frag_types
from alphabase.peptide.precursor import hash_precursor_df
from alphabase.spectral_library.translate import translate_to_tsv

In [2]:
nce = 27
instrument = 'QE'

fix_mods = []
var_mods = ['Oxidation@M']

labeling_channels = {}

In [3]:
max_var_mods = 1
min_pep_mz = 300
max_pep_mz = 1650
precursor_charge_min = 1
precursor_charge_max = 3
frag_types = get_charged_frag_types(['b', 'y'], 2)

model_mgr = ModelManager()
model_mgr

<peptdeep.pretrained_models.ModelManager at 0x7e3412917760>

In [4]:
seq_df = pd.read_table("data/demo/peptide_df.tsv")
seq_df["nAA"] = seq_df.sequence.str.len()
seq_df

Unnamed: 0,best_allele_id,best_allele_dist,best_allele_rank,best_allele_fdr,sequence,nAA
0,1,0.186641,1,0.007636,MVSPLPPPH,9
1,0,0.211866,0,0.004407,HCPGAALHV,9
2,4,0.276956,0,0.000802,GAALHVQPY,9
3,1,0.074507,0,0.000200,GAALHVQPYK,10
4,1,0.079320,0,0.001209,AALHVQPYK,9
...,...,...,...,...,...,...
181462,1,0.074775,0,0.000200,KKKLLGQFYK,10
181463,1,0.053455,0,0.000000,KKLLGQFYK,9
181464,1,0.043071,0,0.000000,KLLGQFYK,8
181465,0,0.065053,0,0.000000,KLLGQFYKC,9


In [5]:
out_lib_dir = "data/demo"

title = (f"HLA"
    + ("_mDIA" if labeling_channels else "")
    + f"_ch={precursor_charge_min}-{precursor_charge_max}"
    + f"_mz={min_pep_mz}-{max_pep_mz}"
    + f"_mod={max_var_mods}"
)

hdf_path = os.path.join(out_lib_dir,f'{title}_{instrument}{nce}.hdf')
hdf_path

'data/demo/HLA_ch=1-3_mz=300-1650_mod=1_QE27.hdf'

In [6]:
model_mgr.load_installed_models()
model_mgr.nce = nce
model_mgr.instrument = instrument

fasta_lib = PredictSpecLibFasta(
    model_mgr,
    charged_frag_types=frag_types,
    var_mods=var_mods,
    fix_mods=fix_mods,
    max_var_mod_num=max_var_mods,
    labeling_channels=labeling_channels,
    precursor_charge_min=precursor_charge_min,
    precursor_charge_max=precursor_charge_max,
    precursor_mz_min=min_pep_mz,
    precursor_mz_max=max_pep_mz,
    decoy=None
)

fasta_lib.precursor_df = seq_df
fasta_lib.add_modifications()
fasta_lib.add_peptide_labeling()
fasta_lib.add_charge()

In [7]:
hash_precursor_df(fasta_lib.precursor_df)

fasta_lib.calc_precursor_mz()

fasta_lib.precursor_df['instrument'] = model_mgr.instrument
fasta_lib.precursor_df['nce'] = model_mgr.nce
res = fasta_lib.model_manager.predict_all(
    fasta_lib.precursor_df,
    predict_items=['rt','mobility','ms2'],
    frag_types=frag_types
)

fasta_lib.set_precursor_and_fragment(
    **res
)


2024-08-13 15:49:26> Predicting RT ...


100%|██████████| 7/7 [00:05<00:00,  1.27it/s]

2024-08-13 15:49:32> Predicting mobility ...



100%|██████████| 7/7 [00:05<00:00,  1.34it/s]


2024-08-13 15:49:40> Predicting MS2 ...


100%|██████████| 7/7 [00:08<00:00,  1.25s/it]


In [8]:
process = psutil.Process(os.getpid())
print(f'{len(fasta_lib.precursor_df)*1e-6:.2f}M precursors with {np.prod(fasta_lib.fragment_mz_df.values.shape, dtype=float)*(1e-6):.2f}M fragments used {process.memory_info().rss/1024**3:.4f} GB memory')

translate_to_tsv(fasta_lib, hdf_path[:-4]+".tsv",
    min_frag_intensity=0.001,
    keep_k_highest_fragments=12
)

0.66M precursors with 22.99M fragments used 1.6904 GB memory


100%|██████████| 7/7 [01:54<00:00, 16.42s/it]


Translation finished, it will take several minutes to export the rest precursors to the tsv file...
