# Ion exclusion from MS2 spectra

In [1]:
import sys
sys.path.append("../..")

from ion_exclusion import *

from os.path import join, basename
import numpy as np

In [2]:
name = "20241023_Ex0016_HILICZ_DDAit"
oms_file_handler = OpenMS_File_Handler()

In [3]:
experiments = oms_file_handler.load_experiments_df( f"/mnt/d/mine2sirius_pipe/data/converted/{name}", file_ending=".mzML")

Loading names:


100%|██████████| 19/19 [00:00<00:00, 269228.97it/s]


Loading experiments:


100%|██████████| 19/19 [01:39<00:00,  5.25s/it]


In [4]:
ms2_in_files = {}
for i, row in experiments.iterrows():
    experiment = row["experiment"]
    ms2_spectra = [ spectrum for spectrum in experiment.getSpectra() if spectrum.getMSLevel() >= 2 ]
    precursor_mzs = list( set( [ precursor.getMZ() for ms2_spectrum in ms2_spectra for precursor in ms2_spectrum.getPrecursors() ] ) )
    ms2_in_files[basename(experiment.getLoadedFilePath())] = precursor_mzs

In [5]:
quant_df = pd.read_csv(f"/mnt/d/mine2sirius_pipe/data/processed/{name}/{name}_iimn_fbmn_quant.csv")
quant_df = quant_df[[col for col in quant_df.columns if "Unnamed" not in col ]]

In [6]:
mz_in_ms2 = {}
for file_name, ms2_mzs in ms2_in_files.items():
    for mz_val in quant_df["row m/z"]:
        mz_found = int( np.any( np.isclose( mz_val, ms2_mzs, rtol=1e-5, atol=5e-3) ) )
        if file_name not in mz_in_ms2.keys():
            mz_in_ms2[file_name] = [mz_found]
        else:
            mz_in_ms2[file_name].append( mz_found )

row_info = pd.DataFrame( {"id": quant_df["row ID"], "m/z": quant_df["row m/z"], "rt": quant_df["row retention time"]} )
ms2_presence_df = pd.DataFrame( mz_in_ms2 )
ms2_presence_df = row_info.join(ms2_presence_df)


In [7]:
ms2_presence_df

Unnamed: 0,id,m/z,rt,AAmix10microM_01_HILICZ_DDA1_neg.mzML,AAmix1microM_01_HILICZ_DDA1_neg.mzML,AAmix1microM_02_HILICZ_DDA1_neg.mzML,E.coli_01_HILICZ_DDA1it_neg.mzML,E.coli_01_HILICZ_DDA1_neg.mzML,E.coli_02_HILICZ_DDA1it_neg.mzML,E.coli_02_HILICZ_DDA1_neg.mzML,...,E.coli_04_HILICZ_DDA1it_neg.mzML,E.coli_05_HILICZ_DDA1it_neg.mzML,E.coli_06_HILICZ_DDA1it_neg.mzML,E.coli_07_HILICZ_DDA1it_neg.mzML,E.coli_08_HILICZ_DDA1it_neg.mzML,E.coli_AAmix10microM_01_HILICZ_DDA1_neg.mzML,E.coli_AAmix1microM_01_HILICZ_DDA1_neg.mzML,E.coli_AAmix1microM_02_HILICZ_DDA1_neg.mzML,Quench_01_HILICZ_DDA1_neg.mzML,Quench_02_HILICZ_DDA1_neg.mzML
0,2661,56.995542,6.070944,0,0,0,1,1,0,1,...,0,0,0,0,0,1,0,1,0,0
1,2176,59.014164,2.613818,0,0,0,1,1,0,1,...,0,0,1,0,0,1,1,1,0,0
2,2613,60.992957,5.789265,0,0,0,1,1,0,1,...,0,0,0,0,0,1,1,1,0,0
3,2095,61.988528,2.413207,1,0,0,1,1,1,1,...,0,0,0,0,0,1,1,1,0,0
4,2180,71.014069,2.618727,0,0,0,1,1,0,1,...,1,0,0,0,0,1,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
617,1715,1418.020774,0.782284,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
618,1696,1419.026698,0.775181,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
619,1682,1430.021943,0.773765,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
620,1694,1432.033975,0.779181,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


## Add annotations

In [8]:
local_annotations = pd.read_csv(f"/mnt/d/mine2sirius_pipe/data/processed/{name}/{name}_annotations")
local_annotations = local_annotations.loc[[i for i, add in local_annotations["adduct"].items() if not  "+" in  add.split("]")[-1]]]

In [9]:
annotated_ms2_presence = pd.merge(ms2_presence_df, local_annotations, left_on="id", right_on="id", how="left")

In [12]:
annotated_ms2_presence.to_csv( f"/mnt/d/mine2sirius_pipe/substep_batches/ion_exclusion/{name}_annotated.tsv", sep="\t" )
annotated_ms2_presence

Unnamed: 0,id,m/z,rt_x,AAmix10microM_01_HILICZ_DDA1_neg.mzML,AAmix1microM_01_HILICZ_DDA1_neg.mzML,AAmix1microM_02_HILICZ_DDA1_neg.mzML,E.coli_01_HILICZ_DDA1it_neg.mzML,E.coli_01_HILICZ_DDA1_neg.mzML,E.coli_02_HILICZ_DDA1it_neg.mzML,E.coli_02_HILICZ_DDA1_neg.mzML,...,score,precursor_mz,ion_mobility,ccs,rt_y,mol_formula,smiles,inchi,inchi_key,method
0,2661,56.995542,6.070944,0,0,0,1,1,0,1,...,,,,,,,,,,
1,2176,59.014164,2.613818,0,0,0,1,1,0,1,...,0.660,59.013824,,,,C2H4O2,,,,simple_compound_db_annotation
2,2613,60.992957,5.789265,0,0,0,1,1,0,1,...,0.833,60.993124,,,,CH2O3,,,,simple_compound_db_annotation
3,2095,61.988528,2.413207,1,0,0,1,1,1,1,...,0.796,61.988324,,,,HNO3,,,,simple_compound_db_annotation
4,2180,71.014069,2.618727,0,0,0,1,1,0,1,...,0.755,71.013824,,,,C3H4O2,,,,simple_compound_db_annotation
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663,1715,1418.020774,0.782284,0,0,0,0,0,0,0,...,,,,,,,,,,
664,1696,1419.026698,0.775181,0,0,0,0,0,0,0,...,,,,,,,,,,
665,1682,1430.021943,0.773765,0,0,0,0,0,0,0,...,0.835,1430.023124,,,,C39H74N1O8P1,,,,simple_compound_db_annotation
666,1694,1432.033975,0.779181,0,0,0,0,0,0,0,...,,,,,,,,,,
