In [74]:
# Imports
import os
import datetime
import json
from typing import overload, Any, List, Dict, Tuple, Set, Sequence, Union
from numpy import *
import pandas as pd
import pyopenms as oms
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from tqdm import tqdm
import shutil
import requests
from sklearn.impute import KNNImputer
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline

# Ignore seaborn warning for future deprecation of module part
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# import methods from FIA python script
from FIA import *

oms.LogConfigHandler().setLogLevel("DEBUG")



In [75]:
def sirius_integration(sirius_path: Union[str, oms.String], feature_xml_path: str, msfile_path:str, csifingerid_path:str, experiment_path:str, experiment:oms.MSExperiment=None,
                       email:Union[str, oms.String]=None, password:Union[str, oms.String]=None,
                       filter_by_num_masstraces:int=1, precursor_mz_tolerance:float=10.0, precursor_mz_tolerance_unit:str="ppm", precursor_rt_tolerance:float=5.0,
                       isotope_pattern_iterations:int=3, feature_only:str="false", max_mz:int=-1, log_level:str="WARNING"):
    """
    Preprocess and export .ms files for SIRIUS
    """
    experiment = load_experiment(experiment_path, experiment)
    clean_dir(msfile_path)
    clean_dir(csifingerid_path)

    saa = oms.SiriusAdapterAlgorithm()
    param = saa.getDefaults()
    param.setValue("preprocessing:filter_by_num_masstraces", filter_by_num_masstraces)
    param.setValue("preprocessing:precursor_mz_tolerance", precursor_mz_tolerance)
    param.setValue("preprocessing:precursor_mz_tolerance_unit", precursor_mz_tolerance_unit)
    param.setValue("preprocessing:precursor_rt_tolerance", precursor_rt_tolerance)
    param.setValue("preprocessing:isotope_pattern_iterations", isotope_pattern_iterations)
    param.setValue("preprocessing:feature_only", feature_only)
    param.setValue("project:maxmz", max_mz)
    param.setValue("project:loglevel", log_level)
    saa.setParameters(param)

    saa.determineSiriusExecutable(sirius_path)
    saa. logInSiriusAccount(sirius_path, email, password)

    feature_mapping = oms.FeatureMapping_FeatureToMs2Indices()
    fm_info = oms.FeatureMapping_FeatureMappingInfo()
    saa.preprocessingSirius(feature_xml_path, experiment, fm_info, feature_mapping)

    saa.logFeatureSpectraNumber(feature_xml_path, feature_mapping, experiment)

    tmp_ms_file = clean_dir(os.path.dirname(feature_xml_path), "tmp_ms")
    tmp_out_dir = clean_dir(os.path.dirname(feature_xml_path), "tmp")
    v_cmpinfo = saa.callSiriusQProcess( tmp_ms_file, tmp_out_dir, sirius_path, csifingerid_path, False)

    smsf = oms.SiriusMSFile()
    smsf.store(experiment, os.path.join(msfile_path, os.path.basename(feature_xml_path)[:-11] + ".ms"), feature_mapping, feature_only, isotope_pattern_iterations, False, v_cmpinfo)

    return v_cmpinfo 	

In [76]:
c = sirius_integration(oms.String(r"C:\Program Files\OpenMS-3.1.0\share\OpenMS\THIRDPARTY\Sirius\sirius.bat"), r"..\runs\example\features_rtaligned\mg1655_P3-A8_neg.featureXML",
                   r"..\runs\example\mss", r"..\runs\example\csifingerids", r"..\data\example data\exampleA_ecolistrains\mg1655_P3-A8_neg.mzXML", experiment=None, 
                   email=oms.String("josua.carl@uni-tuebingen.de"), password=pw,
                   filter_by_num_masstraces=1, precursor_mz_tolerance=10.0, precursor_mz_tolerance_unit="ppm", precursor_rt_tolerance=5.0,
                   isotope_pattern_iterations=3, feature_only="false", max_mz=-1, log_level="ALL")

In [28]:
pw = oms.String("H43fft4260!")

In [2]:

sirius_integration(oms.String(r"C:\Program Files\OpenMS-3.1.0\share\OpenMS\THIRDPARTY\Sirius\sirius.bat"), r"..\runs\example\features_rtaligned\mg1655_P3-A8_neg.featureXML", r"..\runs\example\mss",
                   r"..\runs\example\csifingerids", r"..\data\example data\exampleA_ecolistrains\mg1655_P3-A8_neg.mzXML", experiment=None, email=None, password=None,
                   filter_by_num_masstraces=1, precursor_mz_tolerance=10.0, precursor_mz_tolerance_unit="ppm", precursor_rt_tolerance=5.0,
                   isotope_pattern_iterations=3, feature_only="false", maxmz=-1)

AssertionError: arg executable wrong type

In [2]:
sa = oms.SiriusAdapterAlgorithm()
par = sa.getParameters()
print_params(par)

Param: b'read_sirius_stdout' Value: false Description: Read and print the standard output and error of the Sirius executable, even if it succeeds.
Param: b'preprocessing:filter_by_num_masstraces' Value: 1 Description: Number of mass traces each feature has to have to be included. To use this parameter, setting the feature_only flag is necessary
Param: b'preprocessing:precursor_mz_tolerance' Value: 10.0 Description: Tolerance window for precursor selection (Feature selection in regard to the precursor)
Param: b'preprocessing:precursor_mz_tolerance_unit' Value: ppm Description: Unit of the precursor_mz_tolerance
Param: b'preprocessing:precursor_rt_tolerance' Value: 5.0 Description: Tolerance window (left and right) for precursor selection [seconds]
Param: b'preprocessing:isotope_pattern_iterations' Value: 3 Description: Number of iterations that should be performed to extract the C13 isotope pattern. If no peak is found (C13 distance) the function will abort. Be careful with noisy data -

## Loading
Reads in files

In [78]:
experiment_path = "../data/example data/exampleA_ecolistrains/mg1655_P3-A8_neg.mzXML"
experiment = read_experiment(experiment_path)

In [None]:
from corems.transient.input.brukerSolarix import ReadBrukerSolarix
from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas
from corems.mass_spectrum.output.export import HighResMassSpecExport
from matplotlib import pyplot
from corems.mass_spectra.input.rawFileReader import ThermoBaseClass

file_path= 'tests/tests_data/ftms/ESI_NEG_SRFA.d'

# Instatiate the Bruker Solarix reader with the filepath
bruker_reader = ReadBrukerSolarix(file_path)

# Use the reader to instatiate a transient object
bruker_transient_obj = bruker_reader.get_transient()

# Calculate the transient duration time
T =  bruker_transient_obj.transient_time

# Use the transient object to instatitate a mass spectrum object
mass_spectrum_obj = bruker_transient_obj.get_mass_spectrum(plot_result=False, auto_process=True)

# The following SearchMolecularFormulas function does the following
# - searches monoisotopic molecular formulas for all mass spectral peaks
# - calculates fine isotopic structure based on monoisotopic molecular formulas found and current dynamic range
# - searches molecular formulas of correspondent calculated isotopologues
# - settings are stored at SearchConfig.json and can be changed directly on the file or inside the framework class

SearchMolecularFormulas(mass_spectrum_obj, first_hit=False).run_worker_mass_spectrum()

# Iterate over mass spectral peaks objs within the mass_spectrum_obj
for mspeak in mass_spectrum_obj.sort_by_abundance():

    # If there is at least one molecular formula associated, mspeak returns True
    if  mspeak:

        # Get the molecular formula with the highest mass accuracy
        molecular_formula = mspeak.molecular_formula_lowest_error

        # Plot mz and peak height
        pyplot.plot(mspeak.mz_exp, mspeak.abundance, 'o', c='g')

        # Iterate over all molecular formulas associated with the ms peaks obj
        for molecular_formula in mspeak:

            # Check if the molecular formula is a isotopologue
            if molecular_formula.is_isotopologue:

                # Access the molecular formula text representation and print
                print (molecular_formula.string)

                # Get 13C atoms count
                print (molecular_formula['13C'])
    else:
        # Get mz and peak height
        print(mspeak.mz_exp,mspeak.abundance)

# Save data
## to a csv file
mass_spectrum_obj.to_csv("filename")
mass_spectrum_obj.to_hdf("filename")
# to pandas Datarame pickle
mass_spectrum_obj.to_pandas("filename")

# Extract data as a pandas Dataframe
df = mass_spectrum_obj.to_dataframe()