In [1]:
import pandas as pd
import numpy as np
import os

import logging
logging.getLogger().setLevel(logging.INFO)

from alphabase.spectral_library.flat import SpecLibFlat
from alphabase.spectral_library.base import SpecLibBase
from alphabase.spectral_library.reader import SWATHLibraryReader
from alphadia.extraction.utils import reannotate_fragments
from alphabase.psm_reader import psm_reader_provider

INFO:root:
INFO:root:Cupy is not available


In [2]:
brunner_lib = '/Users/georgwallmann/Documents/data/brunner2022/brunner_1ng_lib.tsv'
insilico_lib_location = '/Users/georgwallmann/Documents/data/libraries/2022_111_brunner_2022_1ng_all_29_human_trypsin_diann_decoy.hdf'
output_location = '/Users/georgwallmann/Library/CloudStorage/OneDrive-Personal/Studium/MPI/AG Mann/alphaDIA/2022_12_18_performance_testing/out_lib_eg.hdf'

In [3]:
# create dense library from diann psm file
target_lib = SWATHLibraryReader()
target_lib.import_file(brunner_lib)
target_lib.precursor_df['decoy'] = 0
target_lib._fragment_mz_df = target_lib._fragment_mz_df[['b_z1', 'b_z2', 'y_z1', 'y_z2']]
target_lib._fragment_intensity_df = target_lib._fragment_intensity_df[['b_z1', 'b_z2', 'y_z1', 'y_z2']]
target_lib._precursor_df['elution_group_idx'] = np.arange(len(target_lib._precursor_df))

Index(['FileName', 'PrecursorMz', 'ProductMz', 'Tr_recalibrated',
       'IonMobility', 'transition_name', 'LibraryIntensity',
       'transition_group_id', 'decoy', 'PeptideSequence', 'Proteotypic',
       'QValue', 'PGQValue', 'Ms1ProfileCorr', 'ProteinGroup', 'ProteinName',
       'Genes', 'FullUniModPeptideName', 'ModifiedPeptide', 'PrecursorCharge',
       'PeptideGroupLabel', 'UniprotID', 'NTerm', 'CTerm', 'FragmentType',
       'FragmentCharge', 'FragmentSeriesNumber', 'FragmentLossType',
       'ExcludeFromAssay'],
      dtype='object')


In [8]:
print(target_lib.precursor_df)

                             sequence  charge          rt  mobility  \
279                           ADEWLMK       2   35.276371  0.743977   
410                           AEEILEK       2  -10.277693  0.766364   
555                           AEWQVYK       2   19.510443  0.784271   
1118                          ALEDLHK       2  -17.151136  0.763068   
1138                          ALELDHK       2  -17.018036  0.758438   
...                               ...     ...         ...       ...   
5758   ELGGAIDFGAAYVLEQASSHIGNSTQATVR       4  100.893500  1.009773   
21917  SQSSNDTFPTAMHIAAAIEVHEVLLPGLQK       4   95.498299  1.040227   
23872  TTGIVMDSGDGVTHTVPIYEGYALPHAILR       4   67.357315  1.073750   
24064  TVPPAVTGITFLSGGQSEEEASINLNAINK       4   89.908928  1.036458   
24427  VDHQTGPIVWGEPGTNGQHAFYQLIHQGTK       4   57.051338  1.057954   

                    proteins mods mod_sites  nAA  frag_start_idx  \
279               MYH9_HUMAN                   7            3450   
410        

In [9]:
decoy_lib = SpecLibBase()
decoy_lib._precursor_df = target_lib.precursor_df.copy()
decoy_lib._fragment_mz_df = target_lib._fragment_mz_df.copy()
decoy_lib._fragment_intensity_df = target_lib._fragment_intensity_df.copy()

#decoy_lib.import_file(brunner_lib)
decoy_lib.decoy = 'diann'
decoy_lib.append_decoy_sequence()
decoy_lib.calc_precursor_mz()
decoy_lib.precursor_df = decoy_lib.precursor_df[decoy_lib.precursor_df['decoy'] == 1]

In [10]:
from peptdeep.pretrained_models import ModelManager
from alphabase.peptide.fragment import get_charged_frag_types

frag_types = get_charged_frag_types(
    ['b','y'], 
    2
)

model_mgr = ModelManager()

model_mgr.nce = 30
model_mgr.instrument = 'timsTOF'

In [11]:
res = model_mgr.predict_all(
    decoy_lib.precursor_df,
    predict_items=['ms2'],
    frag_types = frag_types,
)

2023-01-15 17:33:34> Using multiprocessing with 8 processes ...
2023-01-15 17:33:34> Predicting ms2 ...


100%|██████████| 24/24 [00:08<00:00,  2.80it/s]


In [12]:
decoy_lib._precursor_df = res['precursor_df']
decoy_lib._fragment_mz_df = res['fragment_mz_df']
decoy_lib._fragment_intensity_df = res['fragment_intensity_df']

In [13]:
decoy_lib._precursor_df[['frag_start_idx']] += len(target_lib._fragment_mz_df)
decoy_lib._precursor_df[['frag_stop_idx']] += len(target_lib._fragment_mz_df)

In [14]:
output_lib = SpecLibBase()

output_lib._precursor_df = pd.concat([target_lib._precursor_df, decoy_lib._precursor_df], join='inner')
output_lib._fragment_mz_df = pd.concat([target_lib._fragment_mz_df, decoy_lib._fragment_mz_df], join='inner')
output_lib._fragment_intensity_df = pd.concat([target_lib._fragment_intensity_df, decoy_lib._fragment_intensity_df], join='inner')

In [15]:
output_lib._precursor_df = output_lib._precursor_df.sort_values(by=['elution_group_idx']).reset_index(drop=True)
output_lib.precursor_df['precursor_idx'] = np.arange(len(output_lib.precursor_df))

In [16]:
output_lib.calc_precursor_isotope()
output_lib._precursor_df.drop(columns=[
    'isotope_m1_intensity', 
    'isotope_apex_intensity', 
    'isotope_right_most_intensity',
    'isotope_right_most_offset',
    'isotope_m1_mz',
    'isotope_apex_mz',
    'isotope_right_most_mz'
    ], inplace=True)

In [17]:
output_lib._precursor_df['decoy'].value_counts()

0    27883
1    27798
Name: decoy, dtype: int64

In [18]:
output_lib.save_hdf(output_location)

In [15]:
def check_fragment_coverage(speclib):
    nAA = speclib.precursor_df['nAA'].values
    frag_end = speclib.precursor_df['frag_end_idx'].values
    frag_start = speclib.precursor_df['frag_start_idx'].values
    frag_num = frag_end - frag_start
    aa_count = np.sum(nAA - 1)
    frag_count = np.sum(frag_num)
    print(aa_count, frag_count)