# MIIIT - The Metabolomics Interactive Intensity Integration Tool

In [3]:
import os
import uuid
import datetime
import traitlets
import ipywidgets as widgets
import pandas as pd

from ipywidgets import Button
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display
from tkinter import Tk, filedialog
from tqdm import tqdm_notebook
from pyteomics import mzxml

In [40]:
class SelectFilesButton(widgets.Button):
    """A file widget that leverages tkinter.filedialog."""

    def __init__(self, text='Button', callback=None):
        super(SelectFilesButton, self).__init__()
        # Add the selected_files trait
        self.add_traits(files=traitlets.traitlets.List())
        # Create the button.
        self.description = text
        self.icon = "square-o"
        self.style.button_color = "orange"
        # Set on click behavior.
        self.on_click(self.do_stuff)
        self.callback = callback   
    
    def do_stuff(self, b):
        self.select_files(b)
        self.callback()
        if len(self.files) > 0:
            self.style.button_color = "lightgreen"
        else:
            self.style.button_color = "red"
        
    @staticmethod
    def select_files(b):
        """Generate instance of tkinter.filedialog.

        Parameters
        ----------
        b : obj:
            An instance of ipywidgets.widgets.Button 
        """
        try:
            # Create Tk root
            root = Tk()
            # Hide the main window
            root.withdraw()
            # Raise the root to the top of all windows.
            root.call('wm', 'attributes', '.', '-topmost', True)
            # List of selected fileswill be set to b.value
            b.files = filedialog.askopenfilename(multiple=True)
        except:
            pass


In [41]:
def integrate_peak(filename, rtmin, rtmax, mz, dmz, peaklabel=None):
    if peaklabel is None:
        peaklabel = 'Intensity'
    df = mzxml_to_pandas_df(filename)
    intensity = slice_ms1_mzxml(df, rtmin, rtmax, mz, dmz)\
                    ['intensity array'].sum().sum()
    result = pd.DataFrame({'rtmin': [rtmin], 
                           'rtmax': [rtmax],
                           'peakMz': [mz],
                           'deltaMz': [dmz],
                           peaklabel: [intensity]}, index=[filename])
    result.index.name = 'FileName'
    return result


def integrate_peaks(filename, df_peaklist):
    

def mzxml_to_pandas_df(filename):
    slices = []
    file = mzxml.MzXML(filename)
    while True:
        try:
            slices.append(pd.DataFrame(file.next()))
        except:
            break
    df = pd.concat(slices)
    df_to_numeric(df)
    return df


def df_to_numeric(df):
    for col in df:
        df.loc[:, col] = pd.to_numeric(df[col], errors='ignore')


def slice_ms1_mzxml(df, rtmin, rtmax, mz, dmz):
    df_slice = df.loc[(rtmin <= df.retentionTime) &
                      (df.retentionTime <= rtmax) &
                      (mz-dmz <= df['m/z array']) & 
                      (df['m/z array'] <= mz+dmz)]
    return df_slice

In [47]:
from IPython.display import clear_output
from ipywidgets import Button, HBox, VBox, Textarea, HTML
import time

class App():
    def __init__(self):
        self.files = SelectFilesButton(text='Select mzXML', callback=self.list_files)
        self.peaklist = SelectFilesButton(text='Peaklist', callback=self.list_files)
        self.min_rt = widgets.BoundedFloatText(
            value=5.07,
            min=0,
            max=100000.0,
            step=0.001,
            description='rtmin:',
            disabled=False)
        self.max_rt = widgets.BoundedFloatText(
            value=5.09,
            min=0,
            max=15,
            step=0.01,
            description='rtmax:',
            disabled=False)
        self.mz = widgets.BoundedFloatText(
            value=151.0605,
            min=0,
            max=100000.0,
            step=0.001,
            description='peakMz:',
            disabled=False)
        self.dmz = widgets.BoundedFloatText(
            value=0.0005,
            min=0,
            max=100000.0,
            step=0.005,
            description='Delta m/z',
            disabled=False)
        self.message_box = Textarea(
            value='',
            placeholder='Please select some files and click on Run.',
            description='',
            disabled=True,
            layout={'width': '95%', 'height': '500px', 'font_family': 'monospace'})
        self.list_button = Button(description="List Files")
        self.list_button.on_click(self.list_files)
        self.run_button = Button(description="Run")
        self.run_button.on_click(self.run)
        self.download_button = Button(description="Download")
        self.download_button.on_click(self.download)
        self.results = None
        self.download_html = HTML("""Nothing to download""")
        
    def run(self, b):
        # print('Running')
        min_rt = self.min_rt.value
        max_rt = self.max_rt.value 
        mz = self.mz.value
        dmz = self.dmz.value
        time.sleep(1)
        results = []
        for filename in tqdm_notebook(self.files.files):
            result = integrate_peak(filename, min_rt, max_rt, mz, dmz, 'peakArea')
            results.append(result)
        self.results = pd.concat(results)
        self.message_box.value = self.results.to_string()
        self.download(None)
        return self.results

    def list_files(self, b=None):
        text = 'mzXML files to process:\n'
        self.files.files = [i for i in self.files.files if i.endswith('.mzXML')]
        for line in self.files.files:
            text += line+'\n'
        text += '\n\nUsing peak list:\n'
        if len(self.peaklist.files) != 0:
            text += '\n'.join(self.peaklist.files)
        else:
            text += '\nNo'
        self.message_box.value = text
        
    
    def download(self, b):
        if self.results is None:
            print('First you have to create some results.')
        else:
            uid = str(uuid.uuid4()).split('-')[-1]
            now = datetime.datetime.now().strftime("%Y-%m-%d")
            filename = '{}-metabolomics_peak_intensity-{}.csv'.format(now, uid)
            self.results.to_csv(filename)
            self.download_html.value = """<a download='{}' href='{}'>Download</a>""".format(filename, filename)

app = App()

VBox([HBox([app.files, app.peaklist, app.run_button]),
      HBox([app.mz, app.dmz]), 
      HBox([app.min_rt, app.max_rt]),
      app.message_box,
      app.download_html])


VBox(children=(HBox(children=(SelectFilesButton(description='Select mzXML', icon='square-o', style=ButtonStyle…

In [45]:
app.peaklist.files

[]