In [None]:
%matplotlib notebook
import dill
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import pickle
import glob

## User Input

In [None]:
## Select whether to use the output from positive or negative ion mode
dataset = 'Negative'  # "Postive" | "Negative"  [default: "Negative"]

## Import and Preprocess Data

In [None]:
# Import pickled dictionary of all data
if dataset == "Positive":
    inputfile = "/home/roli/DOE_Project/fullcyc/metabolomics/raw/positive_mode_isotope_data.pkl"
else:
    inputfile = "/home/roli/DOE_Project/fullcyc/metabolomics/raw/negative_mode_isotope_data.pkl"

with open(inputfile,'rb') as fid:
    data = dill.load(fid)

In [None]:
# Return dictionary to pandas object format
for i,d in enumerate(data):   
    rt_peak = pd.DataFrame(data[i]['rt_peak'])
    rt_peak = rt_peak.rename(columns={col: col.replace('20171221_TS_DB_fullcyc_QE144_50454-738379_','') for col in rt_peak.columns})   
    data[i]['rt_peak'] = rt_peak
    
    peak_height = pd.DataFrame(data[i]['peak_height'])
    peak_height = peak_height.rename(columns={col: col.replace('20171221_TS_DB_fullcyc_QE144_50454-738379_','') for col in peak_height.columns})   
    data[i]['peak_height'] = peak_height   

## Overview of Data Structure

In [None]:
# Data object is comprised of 
data[cpd_id].keys()

In [None]:
# List of identified metabolites
[(i,d['compound']) for i,d in enumerate(data)]

In [None]:
# Total number of metabolites
len(data)

## Example of Data Associated with a Single Metabolite

In [None]:
cpd_id = 31

In [None]:
data[cpd_id]['compound']

In [None]:
data[cpd_id]["peak_height"]

## Fork: Output to Data to R for More Flexible Processing

In [None]:
if not os.path.isdir("tsv_output"):
    os.mkdir("tsv_output")
    
for n in range(0,len(data)):
    compound = re.sub("_",".",data[n]['compound'])
    
    data[n]["peak_height"].to_csv("./tsv_output/"+compound+"."+dataset+".tsv", sep='\t')

## Compute Difference between Labeled and Control

In [None]:
# Treatments are Day + Label
treatments = list(set(['_'.join(c.split('_')[7:9]) for c in data[cpd_id]['peak_height'].columns if 'D48' in c]))

In [None]:
time_points = []
for c in data[cpd_id]['peak_height'].columns:
    try:
        found = re.search('_D\d+_', c).group()
        time_points.append(found.strip('_'))
    except AttributeError:
        pass
time_points = list(set(time_points))
time_points

## Identify 12C Control

In [None]:
groups = list(set(['_'.join(c.split('_')[7:10]) for c in data[cpd_id]['peak_height'].columns if 'D48' in c]))

## Subtract Control from Labeled

In [None]:
peak_height = data[cpd_id]['peak_height']
control = peak_height[[c for c in peak_height.columns if groups[0] in c]].mean(axis=1)
labeled = peak_height[[c for c in peak_height.columns if groups[2] in c]].mean(axis=1)

In [None]:
labeled - control

In [None]:
diff_data = {}
for g in groups:
    sub_data = peak_height[[c for c in peak_height.columns if g in c]].mean(axis=1)
    diff_data[g] = sub_data
df = pd.DataFrame(diff_data)
df

## Plot the Difference Curve for all Metabolites

In [None]:
plt.ioff()
for time_point in time_points:
    folder_name = 'isotope_figs'
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    for cpd_id in range(len(data)):
        peak_height = data[cpd_id]['peak_height']
        diff_data = {}
        fig = plt.figure()
        ax = fig.gca()
        for treatment in treatments:
            g = '%s_%s'%(treatment,time_point)
            sub_data = peak_height[[c for c in peak_height.columns if g in c]].mean(axis=1)
            diff_data[g] = sub_data
        df = pd.DataFrame(diff_data)
        df.index = [d.replace('p','.') for d in df.index]
        df[:-1].plot(kind='bar',ax=ax) #drop the unnecessary last ion
        plt.title(data[cpd_id]['compound'],fontsize=18)
        ax.set_yscale('log')
        plt.ylabel('Peak Height (au)')
        plt.xlabel('m/z')
        plt.tight_layout()
        fig.savefig(os.path.join(folder_name,'%s_%s.pdf'%(data[cpd_id]['compound'],time_point)))
        fig.clear()
plt.ion()