In [1]:
import pandas as pd
import numpy as np
import os

import logging
logging.getLogger().setLevel(logging.INFO)

from alphabase.spectral_library.flat import SpecLibFlat
from alphabase.spectral_library.base import SpecLibBase
from alphabase.spectral_library.reader import SWATHLibraryReader
from alphadia.extraction.utils import reannotate_fragments
from alphabase.psm_reader import psm_reader_provider

INFO:root:
INFO:root:Cupy is not available


In [2]:
brunner_lib = '/Users/georgwallmann/Documents/data/brunner2022/brunner_1ng_lib.tsv'
insilico_lib_location = '/Users/georgwallmann/Documents/data/libraries/2022_111_brunner_2022_1ng_all_29_human_trypsin_diann_decoy.hdf'
output_location = '/Users/georgwallmann/Library/CloudStorage/OneDrive-Personal/Studium/MPI/AG Mann/alphaDIA/2022_12_18_performance_testing/out_lib.hdf'

In [3]:
# create dense library from diann psm file
target_lib = SWATHLibraryReader()
target_lib.import_file(brunner_lib)
target_lib.precursor_df['decoy'] = 0
target_lib._fragment_mz_df = target_lib._fragment_mz_df[['b_z1', 'b_z2', 'y_z1', 'y_z2']]
target_lib._fragment_intensity_df = target_lib._fragment_intensity_df[['b_z1', 'b_z2', 'y_z1', 'y_z2']]

In [4]:
decoy_lib = SWATHLibraryReader()
decoy_lib.import_file(brunner_lib)
decoy_lib.decoy = 'diann'
decoy_lib.append_decoy_sequence()
decoy_lib.calc_precursor_mz()
decoy_lib.precursor_df = decoy_lib.precursor_df[decoy_lib.precursor_df['decoy'] == 1]

In [5]:
from peptdeep.pretrained_models import ModelManager
from alphabase.peptide.fragment import get_charged_frag_types

frag_types = get_charged_frag_types(
    ['b','y'], 
    2
)

model_mgr = ModelManager()

model_mgr.nce = 30
model_mgr.instrument = 'timsTOF'

In [6]:
res = model_mgr.predict_all(
    decoy_lib.precursor_df,
    predict_items=['ms2'],
    frag_types = frag_types,
)

2023-01-02 00:14:36> Using multiprocessing ...
2023-01-02 00:14:36> Predicting ms2 ...


100%|██████████| 24/24 [00:09<00:00,  2.46it/s]


In [7]:
decoy_lib._precursor_df = res['precursor_df']
decoy_lib._fragment_mz_df = res['fragment_mz_df']
decoy_lib._fragment_intensity_df = res['fragment_intensity_df']

In [8]:
print(len(decoy_lib._fragment_intensity_df))

311297


In [9]:
decoy_lib._precursor_df[['frag_start_idx']] += len(target_lib._fragment_mz_df)
decoy_lib._precursor_df[['frag_stop_idx']] += len(target_lib._fragment_mz_df)

In [10]:
output_lib = SpecLibBase()

output_lib._precursor_df = pd.concat([target_lib._precursor_df, decoy_lib._precursor_df], join='inner')
output_lib._fragment_mz_df = pd.concat([target_lib._fragment_mz_df, decoy_lib._fragment_mz_df], join='inner')
output_lib._fragment_intensity_df = pd.concat([target_lib._fragment_intensity_df, decoy_lib._fragment_intensity_df], join='inner')

In [11]:
output_lib.save_hdf(output_location)

In [12]:
output_lib.precursor_df

Unnamed: 0,sequence,charge,rt,mobility,mods,mod_sites,nAA,frag_start_idx,frag_stop_idx,rt_norm,precursor_mz,ccs,decoy,mod_seq_hash,mod_seq_charge_hash
0,AAAAAAALQAK,2,-15.465085,0.856250,,,11,0,10,0.198066,478.779816,347.905464,0,-2991749297035814181,-2991749297035814179
1,AAAAALSQQQSLQER,2,-3.104374,1.030682,,,15,10,24,0.253858,786.410443,416.445789,0,551941559302260027,551941559302260029
2,AAAAASAAGPGGLVAGK,2,2.835199,0.943333,,,17,24,40,0.280667,670.370058,381.729309,0,-3604848273074995317,-3604848273074995315
3,AAAAAWEEPSSGNGTAR,2,2.707207,1.039318,,,17,40,56,0.280090,823.381882,419.770380,0,8311254918165539724,8311254918165539726
4,AAAASAAEAGIATTGTEDSDDALLK,3,38.684788,1.000227,,,25,56,80,0.442481,774.043203,604.497867,0,-1416384479300153794,-1416384479300153791
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27793,EVGGAIDFGAAYVLEQASSHIGNSTQATLR,4,100.893500,1.009773,,,30,623017,623046,0.723271,766.384899,812.512721,1,111295133357387816,111295133357387820
27794,DLAIATGGAVFGEEGLTLNLEDVQPHDLLK,4,92.312187,1.011250,,,30,623046,623075,0.684538,784.660666,813.659782,1,4023991883338393540,4023991883338393544
27795,SNSSNDTFPTAMHIAAAIEVHEVLLPGLNK,4,95.498299,1.040227,,,30,623075,623104,0.698919,794.906576,836.850059,1,2669979993308629378,2669979993308629382
27796,VEHQTGPIVWGEPGTNGQHAFYQLIHQGSK,4,57.051338,1.057954,,,30,623104,623133,0.525382,829.662803,850.988199,1,-9168276693893309775,-9168276693893309771


In [15]:
def check_fragment_coverage(speclib):
    nAA = speclib.precursor_df['nAA'].values
    frag_end = speclib.precursor_df['frag_end_idx'].values
    frag_start = speclib.precursor_df['frag_start_idx'].values
    frag_num = frag_end - frag_start
    aa_count = np.sum(nAA - 1)
    frag_count = np.sum(frag_num)
    print(aa_count, frag_count)