In [1]:
import pandas as pd
import numpy as np

from alphabase.spectral_library.flat import SpecLibFlat
from alphabase.spectral_library.base import SpecLibBase
from alphadia.extraction.utils import reannotate_fragments
from alphabase.psm_reader import psm_reader_provider



In [7]:
reader = psm_reader_provider.get_reader('maxquant', rt_unit='minute', keep_decoy=True, fdr=1)
reader.import_file('/Volumes/pool-mann-pub/User/GeorgW/fromPeople/Patricia/modified_SCP2_dia-PASEF_vs_synchro-PASEF/txt/evidence.txt')

from peptdeep.pretrained_models import ModelManager
from alphabase.peptide.fragment import get_charged_frag_types

frag_types = get_charged_frag_types(
    ['b','y','b_modloss','y_modloss'], 
    2
)

model_mgr = ModelManager()

model_mgr.nce = 30
model_mgr.instrument = 'timsTOF'

res = model_mgr.predict_all(
    reader.psm_df,
    predict_items=['ms2'],
    frag_types = frag_types,
)

2023-02-08 09:27:51> Using multiprocessing with 8 processes ...
2023-02-08 09:27:51> Predicting ms2 ...


100%|██████████| 39/39 [00:07<00:00,  5.00it/s]


In [8]:
target_lib = SpecLibBase()
target_lib._precursor_df = res['precursor_df']
target_lib._fragment_intensity_df = res['fragment_intensity_df']
target_lib._fragment_mz_df = res['fragment_mz_df']

In [9]:
# create dense library from diann psm file
target_lib._precursor_df['elution_group_idx'] = np.arange(len(target_lib._precursor_df))

In [10]:
decoy_lib = SpecLibBase()
decoy_lib._precursor_df = target_lib.precursor_df.copy()
decoy_lib._fragment_mz_df = target_lib._fragment_mz_df.copy()
decoy_lib._fragment_intensity_df = target_lib._fragment_intensity_df.copy()

#decoy_lib.import_file(brunner_lib)
decoy_lib.decoy = 'diann'
decoy_lib.append_decoy_sequence()
decoy_lib.calc_precursor_mz()
decoy_lib.precursor_df = decoy_lib.precursor_df[decoy_lib.precursor_df['decoy'] == 1]

In [11]:
res = model_mgr.predict_all(
    decoy_lib.precursor_df,
    predict_items=['ms2'],
    frag_types = frag_types,
)

2023-02-08 09:28:24> Using multiprocessing with 8 processes ...
2023-02-08 09:28:25> Predicting ms2 ...


100%|██████████| 39/39 [00:07<00:00,  4.88it/s]


In [12]:
decoy_lib._precursor_df = res['precursor_df']
decoy_lib._fragment_mz_df = res['fragment_mz_df']
decoy_lib._fragment_intensity_df = res['fragment_intensity_df']
decoy_lib._precursor_df[['frag_start_idx']] += len(target_lib._fragment_mz_df)
decoy_lib._precursor_df[['frag_stop_idx']] += len(target_lib._fragment_mz_df)

In [13]:
output_lib = SpecLibBase()

output_lib._precursor_df = pd.concat([target_lib._precursor_df, decoy_lib._precursor_df], join='inner')
output_lib._fragment_mz_df = pd.concat([target_lib._fragment_mz_df, decoy_lib._fragment_mz_df], join='inner')
output_lib._fragment_intensity_df = pd.concat([target_lib._fragment_intensity_df, decoy_lib._fragment_intensity_df], join='inner')

In [14]:
output_lib._precursor_df = output_lib._precursor_df.sort_values(by=['elution_group_idx']).reset_index(drop=True)
output_lib.precursor_df['precursor_idx'] = np.arange(len(output_lib.precursor_df))

In [15]:
from tqdm import tqdm
from alphabase.peptide.precursor import get_mod_seq_formula
from alphabase.constants.isotope import IsotopeDistribution


def calc_isotope_dist(precursor_df, max_isotope = 6, min_intensity = 0.001):
    isotope_dist = IsotopeDistribution()

    col_names = ['i_{}'.format(i) for i in range(max_isotope)]
    output_lib.precursor_df[col_names] = 0.

    for i in tqdm(range(len(output_lib.precursor_df))):

        row = output_lib.precursor_df.iloc[i]
        dist, mono = isotope_dist.calc_formula_distribution(
            get_mod_seq_formula(row['sequence'], row['mods'])
        )
        dist[dist < 0.001] = 0.
        dist = dist / dist.sum()
        output_lib.precursor_df.loc[i, col_names] = dist[:max_isotope]