# Ion exclusion from MS2 spectra

In [1]:
import sys
sys.path.append("../..")

from ion_exclusion import *

from os.path import join, basename
import numpy as np

In [2]:
oms_file_handler = OpenMS_File_Handler()

In [3]:
experiments = oms_file_handler.load_experiments_df( "/mnt/d/mine2sirius_pipe/data/converted/20241023_HILICZ_DDAit", file_ending=".mzML")

Loading names:


100%|██████████| 9/9 [00:00<00:00, 145187.45it/s]


Loading experiments:


  0%|          | 0/9 [00:00<?, ?it/s]

100%|██████████| 9/9 [00:26<00:00,  2.91s/it]


In [4]:
ms2_in_files = {}
for i, row in experiments.iterrows():
    experiment = row["experiment"]
    ms2_spectra = [ spectrum for spectrum in experiment.getSpectra() if spectrum.getMSLevel() >= 2 ]
    precursor_mzs = list( set( [ precursor.getMZ() for ms2_spectrum in ms2_spectra for precursor in ms2_spectrum.getPrecursors() ] ) )
    ms2_in_files[basename(experiment.getLoadedFilePath())] = precursor_mzs

In [5]:
quant_df = pd.read_csv("/mnt/d/mine2sirius_pipe/data/processed/20241023_HILICZ_DDAit/20241023_HILICZ_DDAit_iimn_fbmn_quant.csv")
quant_df = quant_df[[col for col in quant_df.columns if "Unnamed" not in col ]]

In [6]:
mz_in_ms2 = {}
for file_name, ms2_mzs in ms2_in_files.items():
    for mz_val in quant_df["row m/z"]:
        mz_found = int( np.any( np.isclose( mz_val, ms2_mzs, rtol=1e-5, atol=5e-3) ) )
        if file_name not in mz_in_ms2.keys():
            mz_in_ms2[file_name] = [mz_found]
        else:
            mz_in_ms2[file_name].append( mz_found )

row_info = pd.DataFrame( {"id": quant_df["row ID"], "m/z": quant_df["row m/z"], "rt": quant_df["row retention time"]} )
ms2_presence_df = pd.DataFrame( mz_in_ms2 )
ms2_presence_df = row_info.join(ms2_presence_df)


In [7]:
ms2_presence_df.to_csv( "/mnt/d/mine2sirius_pipe/substep_batches/ion_exclusion/20241023_HILICZ_DDAit_ms2_presence.csv" )
ms2_presence_df

Unnamed: 0,id,m/z,rt,AAmix1microM_01_HILICZ_DDA1_neg.mzML,AAmix1microM_02_HILICZ_DDA1_neg.mzML,E.coli_01_HILICZ_DDA1_neg.mzML,E.coli_02_HILICZ_DDA1_neg.mzML,E.coli_03_HILICZ_DDA1_neg.mzML,E.coli_AAmix1microM_01_HILICZ_DDA1_neg.mzML,E.coli_AAmix1microM_02_HILICZ_DDA1_neg.mzML,Quench_01_HILICZ_DDA1_neg.mzML,Quench_02_HILICZ_DDA1_neg.mzML
0,1264,56.995488,6.061204,0,0,1,1,1,0,1,0,0
1,958,59.014152,2.598620,0,0,1,1,1,1,1,0,0
2,1227,60.992964,5.781988,0,0,1,1,1,1,1,0,0
3,910,61.988448,2.383504,0,0,1,1,1,1,1,0,0
4,949,71.013664,2.604887,0,0,1,1,1,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
306,802,1305.363259,1.233496,0,0,1,1,1,0,0,0,0
307,788,1306.363947,1.185573,0,0,0,0,0,0,1,0,0
308,777,1326.357239,1.090833,1,1,1,1,0,1,1,1,1
309,514,1326.357641,0.605846,1,1,1,1,0,1,1,1,1


## Add annotations

In [8]:
local_annotations = pd.read_csv("/mnt/d/mine2sirius_pipe/data/processed/20241023_HILICZ_DDAit/20241023_HILICZ_DDAit_annotations")
local_annotations = local_annotations.loc[[i for i, add in local_annotations["adduct"].items() if not  "+" in  add.split("]")[-1]]]

In [9]:
annotated_ms2_presence = pd.merge(ms2_presence_df, local_annotations, left_on="id", right_on="id", how="left")

In [None]:
annotated_ms2_presence.to_csv( "/mnt/d/mine2sirius_pipe/substep_batches/ion_exclusion/20241023_HILICZ_DDAit_ms2_presence_annotated.csv" )
annotated_ms2_presence

Unnamed: 0,id,m/z,rt_x,AAmix1microM_01_HILICZ_DDA1_neg.mzML,AAmix1microM_02_HILICZ_DDA1_neg.mzML,E.coli_01_HILICZ_DDA1_neg.mzML,E.coli_02_HILICZ_DDA1_neg.mzML,E.coli_03_HILICZ_DDA1_neg.mzML,E.coli_AAmix1microM_01_HILICZ_DDA1_neg.mzML,E.coli_AAmix1microM_02_HILICZ_DDA1_neg.mzML,...,score,precursor_mz,ion_mobility,ccs,rt_y,mol_formula,smiles,inchi,inchi_key,method
0,1264,56.995488,6.061204,0,0,1,1,1,0,1,...,,,,,,,,,,
1,958,59.014152,2.598620,0,0,1,1,1,1,1,...,0.672,59.013824,,,,C2H4O2,,,,simple_compound_db_annotation
2,1227,60.992964,5.781988,0,0,1,1,1,1,1,...,0.840,60.993124,,,,CH2O3,,,,simple_compound_db_annotation
3,910,61.988448,2.383504,0,0,1,1,1,1,1,...,0.876,61.988324,,,,HNO3,,,,simple_compound_db_annotation
4,949,71.013664,2.604887,0,0,1,1,1,1,1,...,0.840,71.013824,,,,C3H4O2,,,,simple_compound_db_annotation
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
343,802,1305.363259,1.233496,0,0,1,1,1,0,0,...,,,,,,,,,,
344,788,1306.363947,1.185573,0,0,0,0,0,0,1,...,,,,,,,,,,
345,777,1326.357239,1.090833,1,1,1,1,0,1,1,...,,,,,,,,,,
346,514,1326.357641,0.605846,1,1,1,1,0,1,1,...,,,,,,,,,,
