In [1]:
import numpy as np
import pandas as pd
import requests
import time

from statsmodels.stats.multitest import multipletests as multipletests
from scipy.stats import ttest_ind as ttest_ind

pd.set_option("display.max_rows", 500)


def get_metab_ids(metabs_ls, verbose=False):
    """GETs a list of metabs from the Metaboanalyst API and returns their IDs as a dataframe.
    
    PARAMS
    ------
    metabs_ls: list of string; input list of metab common names
    verbose: bool; verbosity flag.
    
    RETURNS
    -------
    response_obj: list of dictionaries of IDs.
    """
    
    input_str = ";".join(metabs_ls)
    url = "http://api.xialab.ca/mapcompounds"
    payload = '{\n\t\"queryList\": \"'+input_str+';\",\n\t\"inputType\": \"name\"\n}'
    headers = {
        'Content-Type': "application/json",
        'cache-control': "no-cache",
        }

    r = requests.request("POST", url, data=payload, headers=headers)
    
    response_obj = r.json()
    
    contents = []
    colnames_ls = ["metab", "hit", "hmdb_id", "kegg_id", "pubchem_id", "chebi_id", "metlin_id", "smiles"]
    for row in response_obj:
        contents.append([row[colname] for colname in colnames_ls])

    d_id = pd.DataFrame(data=contents, columns=colnames_ls)
    d_id.fillna("undef", inplace=True)
    
    d_id.replace({"-": "undef", "":"undef", "NA":"undef"}, inplace=True)

    return d_id

  import pandas.util.testing as tm


In [2]:
d0 = pd.read_csv("/Users/don/Documents/flask_boxplot_reports_v3/sample_data/sample_gcms_normalized_n171.csv")
pairs_ls = [["group1", "group2"], ["group2", "group3"], ["group3", "group4"]]

sample_colname= d0.columns[0]
group_colname = d0.columns[1]
metabs_ls = list(d0.columns)[2:]

In [None]:
d_id = get_metab_ids(metabs_ls)

In [None]:
comparisons_df_dict = {}
for pair in pairs_ls:
    k = "|".join(pair)
    d_t = d0.loc[d0[group_colname].isin(pair)]
    contents = []
    for mtb in metabs_ls:
        record_dict = {}
        arr0 = d_t.loc[d_t[group_colname]==pair[0]][mtb]
        arr1 = d_t.loc[d_t[group_colname]==pair[1]][mtb]

        t_test_result = ttest_ind(arr0, arr1, equal_var = False)
        new_row = [mtb, t_test_result.statistic, t_test_result.pvalue, np.average(arr0)/np.average(arr1)]
        contents.append(new_row)

    d_stats = pd.DataFrame(data=contents, columns=["metab", "t_stat", "p_val", "fc"])

    # Multiple hypothesis testing correction
    multtest_result = multipletests(list(d_stats["p_val"]), alpha=0.05, method="fdr_bh", is_sorted=False, returnsorted=False)
    d_stats["BH_q_val"] = multtest_result[1]
    
    comparisons_df_dict[k] = d_stats

In [None]:
comparisons_df_dict["group1|group2"]