In [1]:
# Imports
import os
import datetime
import json
from typing import overload, Any, List, Dict, Tuple, Set, Sequence, Union, Optional
import numpy as np
import pandas as pd
import pyopenms as oms
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from tqdm import tqdm
import shutil
import requests
from sklearn.impute import KNNImputer
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline

# Ignore seaborn warning for future deprecation of module part
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# import methods from FIA python script
from FIA import *

oms.LogConfigHandler().setLogLevel("DEBUG")



## Read in

In [2]:
# set path to your mzML files and workfolder
data_dir = "../../data/example data/exampleA_ecolistrains"
run_dir = "../../runs/fia_explorer"

data_dir = os.path.normpath(os.path.join(os.getcwd(), data_dir))
run_dir = os.path.normpath(os.path.join(os.getcwd(), run_dir))

# clean_dir(run_dir)

## Centroiding
Data reduction with peak picking

In [3]:
# centroid_dir = centroid_batch(data_dir, run_dir, file_ending=".mzXML")
centroid_dir = os.path.join(run_dir, "centroids")

## Merging
We loose temporal information. Could not be desired, because the isotope spectra get less specific.

In [4]:
# merge_dir = merge_batch(centroid_dir, run_dir, block_size=None, mz_binning_width=5.0, mz_binning_width_unit="ppm", average_gaussian_cutoff=0.01, file_ending=".mzML")
# merge_dir = os.path.join(run_dir, "merged")

## Mass trace detection

In [9]:
def mass_trace_detection(experiment: Union[oms.MSExperiment, str],
                         mass_error_ppm: float = 10.0, noise_threshold_int: float = 1000.0, reestimate_mt_sd:str="true",
                         quant_method:str="median", trace_termination_criterion:str="outlier", trace_termination_outliers:int=3,
                         min_trace_length:float=5.0, max_trace_length:float=-1.0) -> list:
    """
    Mass trace detection
    """
    experiment = load_experiment(experiment)
    
    mass_traces = []
    mtd = oms.MassTraceDetection()
    mtd_par = mtd.getDefaults()
    mtd_par.setValue("mass_error_ppm", mass_error_ppm)
    mtd_par.setValue("noise_threshold_int", noise_threshold_int)
    mtd_par.setValue("reestimate_mt_sd", reestimate_mt_sd)              # Dynamic re-estimation of m/z variance
    mtd_par.setValue("quant_method", quant_method)                      # Method of quantification for mass traces. "median" is recommended for direct injection
    mtd_par.setValue("trace_termination_criterion", trace_termination_criterion)
    mtd_par.setValue("trace_termination_outliers", trace_termination_outliers) 
    mtd_par.setValue("min_trace_length", min_trace_length)
    mtd_par.setValue("max_trace_length", max_trace_length)
    mtd.setParameters(mtd_par)
    mtd.run(experiment, mass_traces, 0)

    return mass_traces

def mass_trace_detection_batch(experiments: Optional[List[oms.MSExperiment|str]] = [], in_dir:str=".", file_ending:str=".mzML", 
                               mass_error_ppm: float = 10.0, noise_threshold_int: float = 1000.0, reestimate_mt_sd:str="true",
                               quant_method:str="median", trace_termination_criterion:str="outlier", trace_termination_outliers:int=3,
                               min_trace_length:float=5.0, max_trace_length:float=-1.0) -> list:
    """
    Mass trace detection
    """
    mass_traces_all = []
    if not experiments:
        experiments = [os.path.join(in_dir, file) for file in os.listdir(in_dir) if file.endswith(file_ending)]
    for experiment in tqdm(experiments):
        mass_traces_all.append(
            mass_trace_detection(experiment=experiment, mass_error_ppm=mass_error_ppm, noise_threshold_int=noise_threshold_int,
                                reestimate_mt_sd=reestimate_mt_sd, quant_method=quant_method, trace_termination_criterion=trace_termination_criterion,
                                trace_termination_outliers=trace_termination_outliers, min_trace_length=min_trace_length, max_trace_length=max_trace_length)
        )
            
    return mass_traces_all

In [10]:
mass_traces_all = mass_trace_detection_batch(experiments=[], in_dir=centroid_dir, file_ending=".mzML", 
                                             mass_error_ppm=10.0, noise_threshold_int= 1000.0, reestimate_mt_sd="true",
                                             quant_method="median", trace_termination_criterion="outlier", trace_termination_outliers=3,
                                             min_trace_length=5.0, max_trace_length=-1.0)

100%|██████████| 6/6 [00:03<00:00,  1.50it/s]
