In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import requests
import zipfile

from parse_reference_spectra import parse_gnps_json, output_loop
from spectra_to_sirius import master_loop
from clone_ds_to_beta import copy_beta
from results_from_metaspace_msms_process import logon_metaspace
from metaspace.sm_annotation_utils import SMInstance
from results_from_metaspace_msms_process import split_data_frame_list
from sirius_to_metaspace_db import primary_loop

Workflow steps on spotted standards for METASAPCE MS/MS publication.

In [3]:
# 1. Authentic standard compounds of interest (COI's).
spotted_cmpds_df = pd.read_csv('input/spotted_cmpds.csv', sep="\t")

In [None]:
### not now ###
# 2. Downloading database references: GNPS
# Academic users only: contact "ccms-web@cs.ucsd.edu" and request "gnps_exp_all.json"
# Copy to "intermdiate/gnps_exp_all.json"
gnps_df = parse_gnps_json('intermdiate/gnps_exp_all.json')
gnps_df.to_pickle('intermediate/databases/gnps_df.pickle')

In [None]:
### not now ###
# 2. Downloading database references: HMDB 
with open('intermediate/hmdb_experimental_msms_spectra.zip', 'wb') as f:
    f.write(requests.get('http://specdb.wishartlab.com/downloads/exports/spectra_xml/hmdb_experimental_msms_spectra.zip', allow_redirects=True).content)
with zipfile.ZipFile('intermediate/hmdb_experimental_msms_spectra.zip', 'r') as z:
    z.extractall('intermediate/hmdb_lcms/hmdb_experimental_msms_spectra/')
    
with open('intermediate/hmdb_predicted_msms_spectra.zip', 'wb') as f:
    f.write(requests.get('http://specdb.wishartlab.com/downloads/exports/spectra_xml/hmdb_predicted_msms_spectra.zip', allow_redirects=True).content)
with zipfile.ZipFile('intermediate/hmdb_predicted_msms_spectra.zip', 'r') as z:
    z.extractall('intermediate/hmdb_lcms/hmdb_predicted_msms_spectra/')

!python hmdb_parser.py --input intermediate/hmdb_lcms/ --output intermediate/databases/ --polarity both

In [None]:
### not now ###
# 2. Downloading database references: MONA
!python query_mona.py --input input/core_metabolome_v3.pickle --output intermediate/databases/ --polarity negative
!python query_mona.py --input input/core_metabolome_v3.pickle --output intermediate/databases/ --polarity positive

In [4]:
# 3. Collecting predicted MS/MS spectra for spotted compounds
ref_theo_pos = 'input/reference_spectra/positive_theoretical'
ref_theo_neg = 'input/reference_spectra/negative_theoretical'

In [5]:
# 4. Collecting reference experimental MS/MS spectra for spotted compounds
ref_expt_pos = 'input/reference_spectra/positive_experimental'
ref_expt_neg = 'input/reference_spectra/negative_experimental'

In [6]:
# 5. Collecting in-house experimental MS/MS spectra for spotted compounds
embl_expt_pos = 'input/reference_spectra/embl_maldi/positive'
embl_expt_neg = 'input/reference_spectra/embl_maldi/negative'

## Parse to [(mz,int),..]

In [None]:
# 6. Generating custom MS1 database using known spotted compounds.
cm3_df = pd.read_pickle('input/core_metabolome_v3.pickle')
cm3_stds_df = cm3_df[cm3_df.id.isin(list(spotted_cmpds_df.id))].copy(deep=True)

In [None]:
### not now ###
# 7. Running and interpreting the METASPACE results.
spotted_ds_id_in = {'id_1': 'positive', 'id_n': 'negative'}
out_dict_list = []
for ds, polarity in spotted_ds_id_in.items():
    if polarity == 'positive':
        x = copy_beta(ds, 'HNaKM', 'cm3_stds')
    elif polarity == 'negative':
        x = copy_beta(ds, 'HM', 'cm3_stds')
    out_dict_list.append(x)
    
out_df = pd.DataFrame(out_dict_list)
out_df.to_pickle('cm3_spotted_msms_df.pickle')

# Check that jobs are complete on server before running next cell
## Update list of dataset ids after experiment ran

In [None]:
### not now ###
# 7. Running and interpreting the METASPACE results.
df_list = []
for ms_ds_id in out_df.ds_id_out:
    ds_id_in = list(out_df[out_df.ds_id_out == ms_ds_id].ds_id_in)[0]
    sm = SMInstance(host='https://beta.metaspace2020.eu')
    sm = logon_metaspace(sm)
    ds = sm.dataset(id=ms_ds_id)
    results_df = ds.results(database='cm3_stds').reset_index()
    results_df['ds_id_in'] = ds_id_in
    df_list.append(results_df)

ms1_df = pd.concat(df_list)
ms1_df = ms1_df[ms1_df.fdr <=0.2]
ms1_df = ms1_df[['ds_id_in', 'moleculeIds']]
ms1_df = split_data_frame_list(ms1_df, 'moleculeIds')

## Plot MS1 figures here for standards to folder.
## Need ds_id first!

In [None]:
### not now ###
# 8. Generating custom MS2 database using known spotted compounds.
gnps_exp_all = 'intermdiate/gnps_exp_all.json'
hmdb_exp_negative = 'intermediate/databases/hmdb_exp_negative.pickle'
hmdb_exp_positive = 'intermediate/databases/hmdb_exp_positive.pickle'
hmdb_theo_negative = 'intermediate/databases/hmdb_theo_negative.pickle'
hmdb_theo_positive = 'intermediate/databases/hmdb_theo_positive.pickle'
mona_exp_negative = 'intermediate/databases/mona_exp_negative.pickle'
mona_exp_positive = 'intermediate/databases/mona_exp_positive.pickle'

# Generate reference database limited to cmpds with theo/exp spectra
output_loop(cm3_stds_df,
            'intermediate/databases/',
            gnps_exp_all,
            hmdb_exp_positive,
            hmdb_exp_negative,
            hmdb_theo_positive,
            hmdb_theo_negative,
            mona_exp_positive,
            mona_exp_negative
           )

# Parses spectra from databases and runs Sirius
master_loop('intermediate/databases/ref_expt_df.pickle',
            'intermediate/sirius_out/',
            gnps_exp_all,
            hmdb_exp_positive,
            hmdb_exp_negative,
            hmdb_theo_positive,
            hmdb_theo_negative,
            mona_exp_positive,
            mona_exp_negative,
            'positive',
            'exp'
           )

master_loop('intermediate/databases/ref_expt_df.pickle',
            'intermediate/sirius_out/',
            gnps_exp_all,
            hmdb_exp_positive,
            hmdb_exp_negative,
            hmdb_theo_positive,
            hmdb_theo_negative,
            mona_exp_positive,
            mona_exp_negative,
            'negative',
            'exp'
           )

master_loop('intermediate/databases/ref_theo_df.pickle',
            'intermediate/sirius_out/',
            gnps_exp_all,
            hmdb_exp_positive,
            hmdb_exp_negative,
            hmdb_theo_positive,
            hmdb_theo_negative,
            mona_exp_positive,
            mona_exp_negative,
            'positive',
            'theo'
           )

master_loop('intermediate/databases/ref_theo_df.pickle',
            'intermediate/sirius_out/',
            gnps_exp_all,
            hmdb_exp_positive,
            hmdb_exp_negative,
            hmdb_theo_positive,
            hmdb_theo_negative,
            mona_exp_positive,
            mona_exp_negative,
            'negative',
            'theo'
           )

In [None]:
### not now ###
# 8. Generating custom MS2 database using known spotted compounds.
for ds, polarity in spotted_ds_id_in.items():    
    primary_loop(list(cm3_stds_df.name.unique()),
                 ds,
                 'I_spotted_standards',
                 polarity,
                 'intermediate/sirius_out/exp_positive.pickle',
                 'intermediate/sirius_out/theo_positive.pickle',
                 'intermediate/sirius_out/exp_negative.pickle',
                 'intermediate/sirius_out/theo_negative.pickle',
                 'intermediate/databases/ref_expt_df.pickle',
                 'intermediate/databases/ref_theo_df.pickle'
                 )

In [None]:
### not now ###
# 9. Running and interpreting the METASPACE MSMS results.
out_dict_list = []
for ds, polarity in spotted_ds_id_in.items():
    x = copy_beta(ds, 'M', 'ds')
    out_dict_list.append(x)
    
out_df = pd.DataFrame(out_dict_list)
out_df.to_pickle('I_spotted_standards/cm3_spotted_msms_df.pickle')

In [None]:
### Stop

In [None]:
# 10. Generate psuedo-MS/MS spectra from ISF data.

In [None]:

# 11. Score predicted, experimental, and psuedo-MS/MS spectra together.
from cosine_spectra import score_alignment, read_ms_file
score_alignment(read_ms_file(f1),
                read_ms_file(f2),
                f1p,f2p,0.1
               )[0]
# 12. Plot examples.
# 13. Carry well-behaved molecules forward.

In [None]:
spectra_db = [ref_theo_pos, ref_theo_neg, 
              ref_expt_pos, ref_expt_neg,
              embl_expt_pos, embl_expt_neg]