In [1]:
#######################################################################
# This script was written by Nana Owusu, it is meant to preprocess    #
# metabolomic information from CSV files and using the mean absolute  #
# deviation of each treatment group, remove outliers.                 #
#######################################################################
# Modules for text interpretation and math
import os, sys, re, fnmatch
import numpy as np
# Modules for plotting and reading csv
# import matplotlib.pyplot as plt
# from matplotlib.figure import Figure
# import csv
import pandas as pd
# %matplotlib inline
# from ipywidgets import interactive
# Module for GUI
# import tkinter as tk
# from tkinter import filedialog
# Module for saving plots as PDF
# from matplotlib.backends.backend_pdf import PdfPages

## Load csv files with pandas

In [2]:
dataFile = pd.read_csv("/Users/nowusu/uiHackyHour/uihh_nowusu/20190723_chronicrotenoneisradipine_driftnorm.csv")

###     Group metabolite columns by drug treatment

In [3]:
## variable that contains the names of each metabolite measured
# The syntax here is known as a "list comprehension"
columns = [col for col in dataFile.columns if col not in ["Condition", "sample"]]

# gives a Pandas Series of the 21 treatment drugs
groups = dataFile['Condition']

### Get metabolite values sorted into a multi-index format

In [4]:
# Routine for numbering each individual sample
# treated with a particular drug. The final
# output is a list of tuples (a python data type defined by ())
drug_enumerate = [(groups[0],0)]
count = 0
oldDrug = groups[0]
for i in range(1,len(groups)):
    if oldDrug == groups[i]:
        count += 1
    else :
        count = 0
    drug_enumerate.append((groups[i],count))
    oldDrug = groups[i]


drug_multiIdx = pd.MultiIndex.from_tuples(drug_enumerate)

# the multi-index Pandas object will be added to this dataframe object
# for allow for better group analyses using pandas tools for stats.
metabolites_mi = pd.DataFrame(dataFile[columns])
metabolites_mi.set_axis(labels=drug_multiIdx, axis='index', inplace=True)

In [5]:
metabolites_mi

Unnamed: 0,Unnamed: 1,X1.Octadecanol,X2.Hydroxybutyrate,X2.Hydroxyglutarate,X2.Oxoadipate,X3.Hydroxypyruvate,X3.Phosphoglycerate,X6.Phosphogluconate,Aconitate,Adenine,Adenosine,...,Taurine,Threonine,Thymine,Tryptophan,Tyrosine,Uracil,Urea,Uridine,Valine,Xanthine
Isradipine,0,1.040873,1.131138,1.062883,0.959504,1.059183,1.442928,0.937136,1.400339,0.865777,0.459292,...,0.846821,1.227933,1.236293,0.831964,0.996407,0.942517,1.383028,0.546667,0.90833,0.853259
Isradipine,1,1.077633,1.056377,1.463635,1.439655,0.780903,1.327279,1.529427,1.311075,1.117801,0.70264,...,1.265575,1.79841,1.183618,0.71292,1.233209,0.964758,1.375169,0.893564,0.74606,1.297741
Isradipine,2,0.972901,0.995689,1.385253,1.383549,0.362198,1.148379,1.419673,1.231772,1.00015,1.174073,...,1.177109,1.327587,0.75693,0.73259,1.132917,1.10361,1.284988,1.336208,0.761586,1.34227
Isradipine,3,1.120797,0.971689,0.973966,1.13606,1.945651,0.850187,1.211664,1.2308,1.667962,2.191231,...,1.028476,1.025927,1.381209,1.043316,1.1819,1.157158,0.894826,1.433015,1.459262,1.058894
Isradipine,4,1.189482,1.179786,0.983789,1.133995,1.191188,0.475886,1.245228,1.280423,1.280922,1.253166,...,1.754046,1.14122,1.092819,0.734338,1.178083,1.132758,1.285634,0.805984,1.026312,1.098644
Isradipine+Rotenone,0,1.051199,1.061541,1.543269,1.332296,1.157584,1.108452,1.595933,1.550948,1.129846,1.518952,...,1.172283,1.45446,1.380894,0.781666,1.534711,1.180244,1.498181,1.328515,1.152285,1.320922
Isradipine+Rotenone,1,0.951072,1.11936,1.046026,0.948473,1.242748,0.777225,0.996103,0.923527,0.924495,0.392102,...,0.926647,1.100534,0.994375,0.828117,0.835861,0.944892,1.395723,0.608319,0.894907,0.897816
Isradipine+Rotenone,2,0.985426,1.027503,1.296233,1.32693,0.972275,1.227894,1.419509,1.249656,1.361324,0.844683,...,1.056928,1.605808,0.828703,0.934409,1.28439,1.129186,1.809741,0.948982,1.007041,1.333583
Isradipine+Rotenone,3,1.023643,1.031288,1.1561,1.137521,1.581426,0.645601,1.152196,1.238969,1.40027,0.455451,...,1.280345,1.171085,1.188795,0.778987,1.024199,1.050141,1.040541,0.688699,0.951515,1.022931
Rotenone,0,1.180639,0.953011,0.437137,0.625078,0.82078,0.652504,0.601315,1.362454,0.766073,1.959433,...,0.653797,1.010428,1.605366,0.934873,1.079065,0.916262,0.672503,1.703369,1.509299,1.100258


## Functions for calculating group statistics

In [6]:
def getConditions(condGroups):
    ''' Routine for counting how many constituents are in a sequence
    after the first occurrence and saves the constituent as well
    as the count '''
    
    drugs = {}
    for treatment in condGroups:
        if treatment not in drugs:
            drugs[treatment] = 0
        drugs[treatment] += 1
    
    return drugs

def stdErr(group,metabSet):
    # calculate standard deviation for
    # each group    
    metabStdErr = pd.concat([pd.DataFrame
                    (metabSet.loc[treatment,columns].std(axis='index')).T 
                             for treatment in group], ignore_index=True)
    
    metabStdErr.set_axis(axis='index', labels=group, inplace=True)
    metabStdErr.columns.names = ['Standard Deviation']
    return metabStdErr
    

def meanStdErr(group,metabSet):
    # calculate mean standard error 
    # of each group    
    metabMeanStdErr = pd.concat([pd.DataFrame
                    (metabSet.loc[treatment,columns].sem(axis='index')).T 
                             for treatment in group], ignore_index=True)
    
    metabMeanStdErr.set_axis(axis='index', labels=group, inplace=True)
    metabMeanStdErr.columns.names = ['Mean Std. Error']
    
    return metabMeanStdErr

def coefOfVar(group,metabStdErr,metabMean):
    # calculate coefficient of variation 
    # of each group    
    metabCoefOfVar = metabStdErr.truediv(other=metabMean,axis='index')
    
    metabCoefOfVar.columns.names = ['Coeff. of Variation']
    
    return metabCoefOfVar

def mean(group,metabSet):
    # calculate mean of each group    
    metabMean = pd.concat([pd.DataFrame
                    (metabSet.loc[treatment,columns].mean(axis='index')).T 
                             for treatment in group], ignore_index=True)
    
    metabMean.set_axis(axis='index', labels=group, inplace=True)
    metabMean.columns.names = ['Mean Std. Deviation']
    
    return metabMean

def grubbs(group,metabSet,metabMean,metabStdErr):
    # perform Grubb's analysis

    meanAbsDev = pd.DataFrame([])
    for treatment in group:
            operand = metabSet.loc[treatment,columns].sub \
                        (metabMean.loc[treatment,columns])
            operand = operand.abs()
            meanAbsDev = meanAbsDev.append(operand.div(metabStdErr.loc[treatment,columns]))
    
    meanAbsDev.set_axis(labels=drug_multiIdx,axis='index',inplace=True)
    meanAbsDev.columns.names = ['Mean Abs. Deviation']
    
    return meanAbsDev

## Calculate standard deviations

In [7]:
conditions = getConditions(groups)
std = stdErr(conditions,metabolites_mi)
std

## Calculate Averages

In [8]:
avg = mean(conditions,metabolites_mi)
avg

## Calculate Coefficient of Variation

In [9]:
cv = coefOfVar(conditions,std,avg)
cv

## Perform Grubb's Analysis

In [10]:
madVals = grubbs(conditions,metabolites_mi,avg,std)
madVals

## Function for determining outliers

In [11]:
def outliers(grubbsData,initVals,thresh):
    
    # If the condition above is true, replace the value with NaN
    testDF = grubbsData.gt(thresh)
    valueCheck = grubbsData.mask(cond=testDF,other=np.nan)
    
    # if condition above is false, replace with previous value
    valueCheck = valueCheck.where(cond=testDF,other=initVals)
    
    return valueCheck

In [12]:
whichVals = outliers(madVals,metabolites_mi,1.15)
whichVals

Unnamed: 0,Mean Abs. Deviation,X1.Octadecanol,X2.Hydroxybutyrate,X2.Hydroxyglutarate,X2.Oxoadipate,X3.Hydroxypyruvate,X3.Phosphoglycerate,X6.Phosphogluconate,Aconitate,Adenine,Adenosine,...,Taurine,Threonine,Thymine,Tryptophan,Tyrosine,Uracil,Urea,Uridine,Valine,Xanthine
Isradipine,0,1.040873,1.131138,1.062883,,1.059183,1.442928,,,0.865777,0.459292,...,0.846821,1.227933,1.236293,0.831964,,,1.383028,,0.90833,
Isradipine,1,1.077633,1.056377,,,0.780903,1.327279,,1.311075,1.117801,0.70264,...,1.265575,,1.183618,0.71292,1.233209,0.964758,1.375169,0.893564,0.74606,1.297741
Isradipine,2,,0.995689,1.385253,1.383549,,1.148379,1.419673,1.231772,1.00015,1.174073,...,1.177109,1.327587,,0.73259,1.132917,1.10361,1.284988,1.336208,0.761586,1.34227
Isradipine,3,1.120797,0.971689,0.973966,1.13606,,0.850187,1.211664,1.2308,,,...,1.028476,1.025927,1.381209,,1.1819,1.157158,,,,1.058894
Isradipine,4,,,0.983789,1.133995,1.191188,,1.245228,1.280423,1.280922,1.253166,...,,1.14122,1.092819,0.734338,1.178083,1.132758,1.285634,0.805984,1.026312,1.098644
Isradipine+Rotenone,0,1.051199,1.061541,,1.332296,1.157584,1.108452,1.595933,,1.129846,,...,1.172283,1.45446,,0.781666,,1.180244,1.498181,,,1.320922
Isradipine+Rotenone,1,,,1.046026,,1.242748,0.777225,0.996103,,,0.392102,...,,1.100534,0.994375,0.828117,0.835861,,1.395723,0.608319,0.894907,0.897816
Isradipine+Rotenone,2,0.985426,1.027503,1.296233,1.32693,0.972275,1.227894,1.419509,1.249656,1.361324,0.844683,...,1.056928,1.605808,0.828703,,1.28439,1.129186,,0.948982,1.007041,1.333583
Isradipine+Rotenone,3,1.023643,1.031288,1.1561,1.137521,,0.645601,1.152196,1.238969,1.40027,0.455451,...,1.280345,1.171085,1.188795,0.778987,1.024199,1.050141,,0.688699,0.951515,1.022931
Rotenone,0,,0.953011,,0.625078,0.82078,,,1.362454,0.766073,,...,0.653797,1.010428,,0.934873,,,0.672503,,,1.100258
