In [36]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [55]:
import pandas as pd
import numpy as np
import glob
from results_from_metaspace_msms_process import reporting_loop
from results_processing_on_dl_results import annotate_cos_parent_fragment
from results_processing_on_dl_results import spectral_encoder
from results_processing_on_dl_results import ref_spectra_encoder

This notebook will a) download an existing METASPACE MS/MS report from the server provided ds_id and db_id and then b) generate psuedo-MS/MS spectra from the report, and c) compare them versus reference MS/MS from core_metabolome_v3

In [38]:
## 1. Check path to reference spectra from core_metabolome_v3
path_positive_ref_spectra = 'tbd1'
path_negative_ref_spectra = 'tbd2'

'tbd'

In [42]:
## 2. Download METASPACE MS/MS report from METASPACE

# Replace variables below for your dataset!
original_ds_id_on_prod = '2020-03-12_17h55m21s'
db_id_on_beta = '2020-05-13_17h50m21s' 
ds_id_on_beta = '2020-05-14_16h32m01s'
path_to_reports = 'TEMP/reporting/'

# Download results
reporting_loop(original_ds_id_on_prod,
               db_id_on_beta,
               ds_id_on_beta,
               path_to_reports,
               parent_and_fragment_req=True,
               fdr_max=0.5,
               save_image=False)

# Annotate results with cosine similarity
df_path = glob.glob(path_to_reports + ds_id_on_beta + "/*.pickle")[0]
df = pd.read_pickle(df_path)
df = annotate_cos_parent_fragment(path_to_reports,
                                  [ds_id_on_beta], 
                                  df_path)

2020-05-14_16h32m01s  Counter:  11
1   2020-05-14_16h32m01s


In [45]:
## 3. Generate psuedo-MS/MS spectra from ISF data.

ds_spectra_dict = {}
psuedo_y_axis_list = ['binary', 'fdr', 'msm', 'cos', 'intensity']

# Change psuedo-MS/MS y-axis input here:
psuedo_y_axis = psuedo_y_axis_list[0]
par_df = df[df.par_or_frag == 'P'].copy(deep=True)
par_df['psuedo_msms'] = par_df.apply(lambda x: spectral_encoder(df,
                                                               x.ds_id,
                                                               x.id_x,
                                                               psuedo_y_axis),
                                    axis=1
                                    ) 

In [53]:
par_df

Unnamed: 0,adduct,cos,ds_id,fdr,formula,id_x,id_y,intensity,msm,mz,n_frag_y,name,par_formula,par_frag,par_or_frag,parent_y,psuedo_msms
0,[M]+,1.0,2020-05-14_16h32m01s,0.05,C24H51NO7P,HMDB0010382,1,31429.802734,0.98052,496.339727,4,LysoPC(16:0),C24H51NO7P,7p,P,1,"[(104.10695125566524, 1), (184.07328163766385,..."
5,[M]+,1.0,2020-05-14_16h32m01s,0.05,C52H101NO8P,HMDB0008578,1,6263.323242,0.955263,898.725893,5,PC(22:1(13Z)/22:1(13Z)),C52H101NO8P,10p,P,1,"[(104.10695125566524, 1), (184.07328163766385,..."


In [31]:
## 4. Find matches between psuedo-MS/MS and reference MS/MS spectra

# Set the polarity
polarities = ['positive', 'negative']
polarity = polarities[1]

par_df['reference_msms'] = par_df.apply(lambda x: find_ref(x.id_x,
                                                           x.formula,
                                                           polarity,
                                                           path_positive_ref_spectra,
                                                           path_negative_ref_spectra),
                                        axis=1)

# Search spectral database keyed on id_x from par_df
# Check for adduct somehow? "K" or "Na" in formula
# Format spectra and join as column

In [54]:
def find_ref(name, formula, polarity, path_positive_ref_spectra, 
             path_negative_ref_spectra):
    # Finds reference spectra matching pseudo-MS/MS spectra
    # Only [M + H+]+, [M + Na+]+, [M + K+]+, and [M - H+]- supported
    if polarity == 'positive':
        spectral_libary = glob.glob(path_positive_ref_spectra)
    if polarity == 'negative':
        spectral_libary = glob.glob(path_negative_ref_spectra)
    matches = [s for s in spectral_libary if name in s]
    
    if matches == []:
        return None
    else:
        match_dict = {}
        for match in matches:
            if match.find['Na'] == True:
                match_dict['Na'] == match
            elif match.find['K'] == True:
                match_dict['Na'] == match
            else:
                match_dict['H'] == match

        if formula.find['Na'] == True:
            return ref_spectra_encoder(match_dict['Na'])
        elif formula.find['K'] == True:
            return ref_spectra_encoder(match_dict['K'])
        else:
            return ref_spectra_encoder(match_dict['H'])

In [None]:
## 5. Plot psuedo-MS/MS and reference MS/MS spectra
par_df.apply(lambda x: plot_spectra(x.reference_msms,
                                    x.psuedo_msms
                                   "temp/spectra",
                                   psuedo_y_axis,
                                   polarity,
                                   x.id_x,
                                   ds_id_on_beta), 
             axis=1)

In [56]:
def plot_spectra(ref_spectra, psuedo_spectra, out_path, 
                 psuedo_y_axis, polarity, id_x, ds_id_on_beta):
    # Plots mirrored MS/MS spectra!
    
    mzs = [x[0] for x in ref_spectra] + [x[0] for x in psuedo_spectra]
    ref_ys = [x[1] for x in ref_spectra]
    psuedo_ys = [x[1]*-1 for x in psuedo_spectra]
    ys = ref_ys + psuedo_ys
    
    fig, ax = plt.subplots()
    ax.stem(mzs, ints ,markerfmt=' ')
    plt.title(id_x + " " + polarity + " ds_id_beta:" + ds_id_on_beta)
    plt.xlabel('m/z (Da)')
    plt.ylabel(psuedo_y_axis)
    plt.save_fig(out_path + id_x + polarity + ds_id_on_beta)
    return fig, ax

To do:
1. Wait for consensus specrta to be generated.
2. Rename them!
3. Move to reference folder.
4. Test workflow.