In [1]:
#######################################################################
# This script was written by Nana Owusu, it is meant to preprocess    #
# metabolomic information from CSV files and using the mean absolute  #
# deviation of each treatment group, remove outliers.                 #
#######################################################################
# Modules for text interpretation and math
import os, sys, re, fnmatch
import numpy as np
# Modules for plotting and reading csv
# import matplotlib.pyplot as plt
# from matplotlib.figure import Figure
# import csv
import pandas as pd
# %matplotlib inline
# from ipywidgets import interactive
# Module for GUI
# import tkinter as tk
# from tkinter import filedialog
# Module for saving plots as PDF
# from matplotlib.backends.backend_pdf import PdfPages

## Load csv files with pandas

In [2]:
dataFile = pd.read_csv("/Users/nowusu/uiHackyHour/uihh_nowusu/20190723_chronicrotenoneisradipine_driftnorm.csv")

###     Group metabolite columns by drug treatment

In [3]:
## variable that contains the names of each metabolite measured
# The syntax here is known as a "list comprehension"
columns = [col for col in dataFile.columns if col not in ["Condition", "sample"]]

# gives a Pandas Series of the 21 treatment drugs
groups = dataFile['Condition']

### Get metabolite values sorted into a multi-index format

In [4]:
# Routine for numbering each individual sample
# treated with a particular drug. The final
# output is a list of tuples (a python data type defined by ())
drug_enumerate = [(groups[0],0)]
count = 0
oldDrug = groups[0]
for i in range(1,len(groups)):
    if oldDrug == groups[i]:
        count += 1
    else :
        count = 0
    drug_enumerate.append((groups[i],count))
    oldDrug = groups[i]


drug_multiIdx = pd.MultiIndex.from_tuples(drug_enumerate)

# the multi-index Pandas object will be added to this dataframe object
# for allow for better group analyses using pandas tools for stats.
metabolites_mi = pd.DataFrame(dataFile[columns])
metabolites_mi.set_axis(labels=drug_multiIdx, axis='index', inplace=True)

In [69]:
metabolites_mi

Unnamed: 0,Unnamed: 1,X1.Octadecanol,X2.Hydroxybutyrate,X2.Hydroxyglutarate,X2.Oxoadipate,X3.Hydroxypyruvate,X3.Phosphoglycerate,X6.Phosphogluconate,Aconitate,Adenine,Adenosine,...,Taurine,Threonine,Thymine,Tryptophan,Tyrosine,Uracil,Urea,Uridine,Valine,Xanthine
Isradipine,0,1.040873,1.131138,1.062883,0.959504,1.059183,1.442928,0.937136,1.400339,0.865777,0.459292,...,0.846821,1.227933,1.236293,0.831964,0.996407,0.942517,1.383028,0.546667,0.90833,0.853259
Isradipine,1,1.077633,1.056377,1.463635,1.439655,0.780903,1.327279,1.529427,1.311075,1.117801,0.70264,...,1.265575,1.79841,1.183618,0.71292,1.233209,0.964758,1.375169,0.893564,0.74606,1.297741
Isradipine,2,0.972901,0.995689,1.385253,1.383549,0.362198,1.148379,1.419673,1.231772,1.00015,1.174073,...,1.177109,1.327587,0.75693,0.73259,1.132917,1.10361,1.284988,1.336208,0.761586,1.34227
Isradipine,3,1.120797,0.971689,0.973966,1.13606,1.945651,0.850187,1.211664,1.2308,1.667962,2.191231,...,1.028476,1.025927,1.381209,1.043316,1.1819,1.157158,0.894826,1.433015,1.459262,1.058894
Isradipine,4,1.189482,1.179786,0.983789,1.133995,1.191188,0.475886,1.245228,1.280423,1.280922,1.253166,...,1.754046,1.14122,1.092819,0.734338,1.178083,1.132758,1.285634,0.805984,1.026312,1.098644
Isradipine+Rotenone,0,1.051199,1.061541,1.543269,1.332296,1.157584,1.108452,1.595933,1.550948,1.129846,1.518952,...,1.172283,1.45446,1.380894,0.781666,1.534711,1.180244,1.498181,1.328515,1.152285,1.320922
Isradipine+Rotenone,1,0.951072,1.11936,1.046026,0.948473,1.242748,0.777225,0.996103,0.923527,0.924495,0.392102,...,0.926647,1.100534,0.994375,0.828117,0.835861,0.944892,1.395723,0.608319,0.894907,0.897816
Isradipine+Rotenone,2,0.985426,1.027503,1.296233,1.32693,0.972275,1.227894,1.419509,1.249656,1.361324,0.844683,...,1.056928,1.605808,0.828703,0.934409,1.28439,1.129186,1.809741,0.948982,1.007041,1.333583
Isradipine+Rotenone,3,1.023643,1.031288,1.1561,1.137521,1.581426,0.645601,1.152196,1.238969,1.40027,0.455451,...,1.280345,1.171085,1.188795,0.778987,1.024199,1.050141,1.040541,0.688699,0.951515,1.022931
Rotenone,0,1.180639,0.953011,0.437137,0.625078,0.82078,0.652504,0.601315,1.362454,0.766073,1.959433,...,0.653797,1.010428,1.605366,0.934873,1.079065,0.916262,0.672503,1.703369,1.509299,1.100258


## Functions for calculating group statistics

In [70]:
def getConditions(condGroups):
    ''' Routine for counting how many constituents are in a sequence
    after the first occurrence and saves the constituent as well
    as the count '''
    
    drugs = {}
    for treatment in condGroups:
        if treatment not in drugs:
            drugs[treatment] = 0
        drugs[treatment] += 1
    
    return drugs

def stdErr(group,metabSet):
    # calculate standard deviation for
    # each group    
    metabStdErr = pd.concat([pd.DataFrame
                    (metabSet.loc[treatment,columns].std(axis='index')).T 
                             for treatment in group], ignore_index=True)
    
    metabStdErr.set_axis(axis='index', labels=group, inplace=True)
    metabStdErr.columns.names = ['Standard Deviation']
    return metabStdErr
    

def meanStdErr(group,metabSet):
    # calculate mean standard error 
    # of each group    
    metabMeanStdErr = pd.concat([pd.DataFrame
                    (metabSet.loc[treatment,columns].sem(axis='index')).T 
                             for treatment in group], ignore_index=True)
    
    metabMeanStdErr.set_axis(axis='index', labels=group, inplace=True)
    metabMeanStdErr.columns.names = ['Mean Std. Error']
    
    return metabMeanStdErr

def coefOfVar(group,metabStdErr,metabMean):
    # calculate coefficient of variation 
    # of each group    
    metabCoefOfVar = metabStdErr.truediv(other=metabMean,axis='index')
    
    metabCoefOfVar.columns.names = ['Coeff. of Variation']
    
    return metabCoefOfVar

def mean(group,metabSet):
    # calculate mean of each group    
    metabMean = pd.concat([pd.DataFrame
                    (metabSet.loc[treatment,columns].mean(axis='index')).T 
                             for treatment in group], ignore_index=True)
    
    metabMean.set_axis(axis='index', labels=group, inplace=True)
    metabMean.columns.names = ['Mean Std. Deviation']
    
    return metabMean

def grubbs(group,metabSet,metabMean,metabStdErr):
    # perform Grubb's analysis

    meanAbsDev = pd.DataFrame([])
    for treatment in group:
            operand = metabSet.loc[treatment,columns].sub \
                        (metabMean.loc[treatment,columns])
            operand = operand.abs()
            meanAbsDev = meanAbsDev.append(operand.div(metabStdErr.loc[treatment,columns]))
    
    meanAbsDev.set_axis(labels=drug_multiIdx,axis='index',inplace=True)
    meanAbsDev.columns.names = ['Mean Abs. Deviation']
    
    return meanAbsDev

## Calculate standard deviations

In [7]:
conditions = getConditions(groups)
std = stdErr(conditions,metabolites_mi)
std

Standard Deviation,X1.Octadecanol,X2.Hydroxybutyrate,X2.Hydroxyglutarate,X2.Oxoadipate,X3.Hydroxypyruvate,X3.Phosphoglycerate,X6.Phosphogluconate,Aconitate,Adenine,Adenosine,...,Taurine,Threonine,Thymine,Tryptophan,Tyrosine,Uracil,Urea,Uridine,Valine,Xanthine
Isradipine,0.081635,0.0882,0.232947,0.198022,0.584402,0.390667,0.226114,0.070003,0.309447,0.665659,...,0.340921,0.297744,0.233368,0.137891,0.090085,0.099384,0.201164,0.372482,0.291293,0.19737
Isradipine+Rotenone,0.043797,0.042452,0.21458,0.182604,0.254977,0.273511,0.26816,0.256213,0.22128,0.517672,...,0.152018,0.237651,0.239118,0.072665,0.304962,0.102556,0.317048,0.324339,0.110495,0.217951
Rotenone,0.105189,0.108737,0.290952,0.228455,0.166712,0.212089,0.247988,0.269809,0.188617,0.53188,...,0.460815,0.158283,0.284253,0.829449,0.081365,0.05695,0.123609,0.354919,0.258844,0.184383
Vehicle,0.035389,0.032227,0.135794,0.135549,0.198383,0.242246,0.187461,0.247503,0.224286,0.38611,...,0.196949,0.283296,0.315509,0.196011,0.216226,0.062737,0.075085,0.194236,0.19626,0.215471


## Calculate Averages

In [10]:
avg = mean(conditions,metabolites_mi)
avg

Mean Std. Deviation,X1.Octadecanol,X2.Hydroxybutyrate,X2.Hydroxyglutarate,X2.Oxoadipate,X3.Hydroxypyruvate,X3.Phosphoglycerate,X6.Phosphogluconate,Aconitate,Adenine,Adenosine,...,Taurine,Threonine,Thymine,Tryptophan,Tyrosine,Uracil,Urea,Uridine,Valine,Xanthine
Isradipine,1.080337,1.066936,1.173905,1.210552,1.067824,1.048932,1.268625,1.290882,1.186522,1.15608,...,1.214405,1.304215,1.130174,0.811026,1.144503,1.06016,1.244729,1.003088,0.98031,1.130161
Isradipine+Rotenone,1.002835,1.059923,1.260407,1.186305,1.238508,0.939793,1.290935,1.240775,1.203984,0.802797,...,1.109051,1.332972,1.098192,0.830794,1.16979,1.076116,1.436046,0.893629,1.001437,1.143813
Rotenone,0.984598,1.060898,0.881029,0.852653,0.818615,0.963391,0.890017,1.079369,0.732074,1.001246,...,1.057271,1.080459,1.140802,1.327125,0.981871,0.988229,0.800997,1.027838,1.139632,0.907087
Vehicle,0.963829,1.050027,1.105293,1.065775,1.265481,1.188858,1.079686,1.198236,0.916226,0.817197,...,1.140498,1.132253,1.210951,0.839922,0.854348,0.914731,0.825405,0.91469,1.004315,0.924769


## Calculate Coefficient of Variation

In [11]:
cv = coefOfVar(conditions,std,avg)
cv

Coeff. of Variation,X1.Octadecanol,X2.Hydroxybutyrate,X2.Hydroxyglutarate,X2.Oxoadipate,X3.Hydroxypyruvate,X3.Phosphoglycerate,X6.Phosphogluconate,Aconitate,Adenine,Adenosine,...,Taurine,Threonine,Thymine,Tryptophan,Tyrosine,Uracil,Urea,Uridine,Valine,Xanthine
Isradipine,0.075564,0.082666,0.198437,0.16358,0.547283,0.372442,0.178235,0.054229,0.260802,0.57579,...,0.280731,0.228293,0.206488,0.17002,0.078711,0.093744,0.161612,0.371335,0.297143,0.174639
Isradipine+Rotenone,0.043673,0.040052,0.170247,0.153927,0.205874,0.291033,0.207725,0.206494,0.18379,0.644835,...,0.13707,0.178287,0.217738,0.087465,0.260698,0.095302,0.220778,0.362946,0.110336,0.190547
Rotenone,0.106834,0.102495,0.330241,0.267934,0.203651,0.220148,0.278634,0.249969,0.257648,0.531218,...,0.435853,0.146496,0.249169,0.624997,0.082867,0.057628,0.154318,0.345306,0.22713,0.203269
Vehicle,0.036717,0.030691,0.122858,0.127183,0.156765,0.203763,0.173626,0.206556,0.244793,0.472482,...,0.172687,0.250205,0.260547,0.233368,0.253089,0.068586,0.090967,0.212352,0.195416,0.233


## Perform Grubb's Analysis

In [71]:
madVals = grubbs(conditions,metabolites_mi,avg,std)
madVals

Unnamed: 0,Mean Abs. Deviation,X1.Octadecanol,X2.Hydroxybutyrate,X2.Hydroxyglutarate,X2.Oxoadipate,X3.Hydroxypyruvate,X3.Phosphoglycerate,X6.Phosphogluconate,Aconitate,Adenine,Adenosine,...,Taurine,Threonine,Thymine,Tryptophan,Tyrosine,Uracil,Urea,Uridine,Valine,Xanthine
Isradipine,0,0.48343,0.727916,0.476599,1.26778,0.014787,1.008523,1.466029,1.563599,1.036512,1.046765,...,1.07821,0.256202,0.454729,0.151846,1.643953,1.183729,0.687494,1.225349,0.247106,1.40296
Isradipine,1,0.033128,0.119714,1.243757,1.156953,0.490966,0.712493,1.153406,0.288463,0.222078,0.68119,...,0.150093,1.659798,0.229011,0.711476,0.984684,0.959939,0.648429,0.294037,0.804173,0.849061
Isradipine,2,1.316058,0.807791,0.907281,0.87362,1.207432,0.254557,0.668017,0.844387,0.602274,0.02703,...,0.109399,0.078497,1.599379,0.568824,0.128611,0.437191,0.20013,0.894325,0.750874,1.07467
Isradipine,3,0.495626,1.079901,0.858303,0.376183,1.502092,0.508732,0.251915,0.858276,1.555805,1.555076,...,0.545372,0.934657,1.075705,1.684602,0.415129,0.975993,1.739394,1.154223,1.64423,0.361084
Isradipine,4,1.336991,1.27949,0.816137,0.386611,0.211093,1.466842,0.103478,0.1494,0.305059,0.145849,...,1.582889,0.547435,0.160067,0.556147,0.372752,0.730483,0.203343,0.529162,0.157923,0.159688
Isradipine+Rotenone,0,1.10428,0.038103,1.318214,0.799496,0.317378,0.616643,1.137371,1.210605,0.33504,1.383415,...,0.415957,0.511203,1.182273,0.676098,1.19661,1.015328,0.195977,1.340838,1.365204,0.812613
Isradipine+Rotenone,1,1.181879,1.400119,0.999071,1.302446,0.016627,0.594374,1.099464,1.238218,1.263054,0.79335,...,1.199887,0.978065,0.434165,0.036852,1.094985,1.279525,0.127184,0.879665,0.964116,1.128682
Isradipine+Rotenone,2,0.397501,0.763695,0.166957,0.770108,1.044147,1.053342,0.479467,0.034661,0.711045,0.080912,...,0.342874,1.148056,1.127014,1.425911,0.375783,0.517471,1.178668,0.170664,0.050714,0.870701
Isradipine+Rotenone,3,0.475101,0.674526,0.4861,0.267158,1.344898,1.075612,0.517375,0.007048,0.887049,0.670977,...,1.126804,0.681194,0.378906,0.71296,0.477408,0.253274,1.247461,0.631837,0.451803,0.554632
Rotenone,0,1.863705,0.992186,1.525652,0.996147,0.01299,1.465833,1.164174,1.049204,0.180252,1.801512,...,0.875568,0.442442,1.634335,0.472908,1.194545,1.263703,1.039524,1.903339,1.428149,1.047663


## Function for determining outliers

In [80]:
def outliers(grubbsData,initVals,thresh):
    checkFunc = lambda val: val > thresh
    
    valueCheck = grubbsData.mask(cond=checkFunc,inplace=False)
    
    valueCheck.where(cond=checkFunc,other=initVals,inplace=False)
    
    return valueCheck

In [81]:
whichVals = outliers(madVals,metabolites_mi,1.15)
whichVals

Unnamed: 0,Mean Abs. Deviation,X1.Octadecanol,X2.Hydroxybutyrate,X2.Hydroxyglutarate,X2.Oxoadipate,X3.Hydroxypyruvate,X3.Phosphoglycerate,X6.Phosphogluconate,Aconitate,Adenine,Adenosine,...,Taurine,Threonine,Thymine,Tryptophan,Tyrosine,Uracil,Urea,Uridine,Valine,Xanthine
Isradipine,0,1.040873,1.131138,1.062883,,1.059183,,,,0.865777,0.459292,...,0.846821,,,0.831964,,,,,0.90833,
Isradipine,1,1.077633,1.056377,,,0.780903,,,,1.117801,0.70264,...,,,,0.71292,,0.964758,,0.893564,0.74606,
Isradipine,2,,0.995689,,,,1.148379,,,1.00015,,...,,,,0.73259,1.132917,1.10361,,,0.761586,
Isradipine,3,1.120797,0.971689,0.973966,1.13606,,0.850187,,,,,...,1.028476,1.025927,,,,,,,,1.058894
Isradipine,4,,,0.983789,1.133995,,,,,,,...,,1.14122,1.092819,0.734338,,1.132758,,0.805984,1.026312,1.098644
Isradipine+Rotenone,0,1.051199,1.061541,,,,1.108452,,,1.129846,,...,,,,0.781666,,,,,,
Isradipine+Rotenone,1,,,1.046026,,,0.777225,0.996103,,,0.392102,...,,1.100534,0.994375,0.828117,0.835861,,,0.608319,0.894907,0.897816
Isradipine+Rotenone,2,0.985426,1.027503,,,0.972275,,,,,0.844683,...,1.056928,,0.828703,,,1.129186,,0.948982,1.007041,
Isradipine+Rotenone,3,1.023643,1.031288,,1.137521,,0.645601,,,,0.455451,...,,,,0.778987,1.024199,1.050141,,0.688699,0.951515,1.022931
Rotenone,0,,0.953011,,0.625078,0.82078,,,,0.766073,,...,0.653797,1.010428,,0.934873,,,0.672503,,,1.100258
