# Example Notebook

Building a spectral library for DIA-NN based on AlphaPept search results with AlphaPeptDeep

## Installation & File Download

Follow the installation instructions for [AlphaPeptDeep](https://github.com/MannLabs/alphapeptdeep), i.e.:

!pip install alphabase peptdeep

In [1]:
import peptdeep

print(peptdeep.__version__)   

1.0.2


In [2]:
#Download the files

import os
try:
    import wget
except ModuleNotFoundError:
    !pip install wget
    import wget
    
FILE_DICT = {}
FILE_DICT['thermo_IRT.ms_data.hdf'] = 'https://datashare.biochem.mpg.de/s/oUkKeB0SraHATrh/download'

tmp_folder = os.path.join(os.getcwd(),'/temp_test_run')

if not os.path.isdir(tmp_folder):
    os.mkdir(tmp_folder)
    
for file in FILE_DICT:
    target = os.path.join(tmp_folder, file)
    if not os.path.isfile(target):
        wget.download(FILE_DICT[file], target)

## Load DataFrame and import to AlphaPeptDeep

In [3]:
import os
ap_ms_data_hdf = os.path.join(tmp_folder,'thermo_IRT.ms_data.hdf')

In [4]:
from alphabase.io.psm_reader.alphapept_reader import AlphaPeptReader

modification_mapping = {
    'Carbamidomethyl@C': 'cC',
    'Oxidation@M': 'oxM',
    'Phospho@S': 'pS',
    'Phospho@T': 'pT',
    'Phospho@Y': 'pY',
    'Acetyl@Protein N-term': 'a',
}

reader = AlphaPeptReader(modification_mapping=modification_mapping)
reader.import_file(ap_ms_data_hdf)
reader.psm_df

Unnamed: 0,rt,scan_num,spec_idx,query_id,score,precursor_mz,charge,raw_name,fdr,decoy,sequence,mods,mod_sites,nAA,rt_norm
0,2.050556,937,936,1536,0.765814,508.262939,2,thermo_IRT,0.0,0,LGGNEQVTR,Acetyl@Protein N-term,0.0,9,0.352973
1,2.050556,927,926,1535,0.744933,508.262939,2,thermo_IRT,0.0,0,LGGNEQVTR,Acetyl@Protein N-term,0.0,9,0.352973
2,4.792137,3337,3336,2242,0.846333,683.857632,2,thermo_IRT,0.0,0,TPVITGAPYEYR,,,12,0.824895
3,4.792137,3399,3398,2241,0.836358,683.857632,2,thermo_IRT,0.0,0,TPVITGAPYEYR,,,12,0.824895
4,2.794513,1732,1731,2237,0.786324,456.238705,3,thermo_IRT,0.0,0,TPVITGAPYEYR,,,12,0.481034
5,2.508832,1409,1408,2239,0.744924,456.239318,3,thermo_IRT,0.0,0,TPVITGAPYEYR,,,12,0.431858
6,2.21901,1150,1149,2236,1.0,683.828846,2,thermo_IRT,0.0,0,VEATFGVDESNAK,,,13,0.381969
7,3.189371,2102,2101,2373,1.0,466.561822,3,thermo_IRT,0.0,0,DGLDAASYYAPVR,,,13,0.549003
8,5.804992,3827,3826,2235,0.756397,683.828439,2,thermo_IRT,0.0,0,VEATFGVDESNAK,,,13,0.999243
9,5.80939,3833,3832,2377,0.747592,699.339856,2,thermo_IRT,0.0,0,DGLDAASYYAPVR,,,13,1.0


## Extract fragment intensities from the spectrum file

In [5]:
from peptdeep.rescore.feature_extractor import match_one_raw

psm_df, frag_mz_df, frag_intensity_df, frag_merr_df = match_one_raw(
    reader.psm_df, ms2_file=ap_ms_data_hdf,
    ms2_file_type='alphapept',
    frag_types_to_match=['b_z1','b_z2','y_z1','y_z2'],
    ms2_ppm=True, ms2_tol=50.0,
    calibrate_frag_mass_error=False,
)
psm_df

Unnamed: 0,rt,scan_num,spec_idx,query_id,score,precursor_mz,charge,raw_name,fdr,decoy,sequence,mods,mod_sites,nAA,rt_norm,frag_start_idx,frag_stop_idx
0,2.050556,937,936,1536,0.765814,508.262939,2,thermo_IRT,0.0,0,LGGNEQVTR,Acetyl@Protein N-term,0.0,9,0.352973,0,8
1,2.050556,927,926,1535,0.744933,508.262939,2,thermo_IRT,0.0,0,LGGNEQVTR,Acetyl@Protein N-term,0.0,9,0.352973,8,16
2,4.792137,3337,3336,2242,0.846333,683.857632,2,thermo_IRT,0.0,0,TPVITGAPYEYR,,,12,0.824895,16,27
3,4.792137,3399,3398,2241,0.836358,683.857632,2,thermo_IRT,0.0,0,TPVITGAPYEYR,,,12,0.824895,27,38
4,2.794513,1732,1731,2237,0.786324,456.238705,3,thermo_IRT,0.0,0,TPVITGAPYEYR,,,12,0.481034,38,49
5,2.508832,1409,1408,2239,0.744924,456.239318,3,thermo_IRT,0.0,0,TPVITGAPYEYR,,,12,0.431858,49,60
6,2.21901,1150,1149,2236,1.0,683.828846,2,thermo_IRT,0.0,0,VEATFGVDESNAK,,,13,0.381969,60,72
7,3.189371,2102,2101,2373,1.0,466.561822,3,thermo_IRT,0.0,0,DGLDAASYYAPVR,,,13,0.549003,72,84
8,5.804992,3827,3826,2235,0.756397,683.828439,2,thermo_IRT,0.0,0,VEATFGVDESNAK,,,13,0.999243,84,96
9,5.80939,3833,3832,2377,0.747592,699.339856,2,thermo_IRT,0.0,0,DGLDAASYYAPVR,,,13,1.0,96,108


## Generate the library for DiaNN

In [6]:
from peptdeep.protein.fasta import SpecLibFasta

fasta_lib = SpecLibFasta()

fasta_lib._precursor_df = psm_df
fasta_lib._fragment_mz_df = frag_mz_df
fasta_lib._fragment_intensity_df = frag_intensity_df

In [7]:
from peptdeep.spec_lib.translate import translate_to_tsv
translate_to_tsv(
    fasta_lib, 
    tsv = ap_ms_data_hdf[:-len('ms_data.hdf')]+'tsv',
    min_frag_intensity=0.001,
    keep_k_highest_fragments=1000, #keep all matched fragments
)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.57s/it]


Translation finished, it will take several minutes to export the rest precursors to the tsv file...
