In [None]:
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as mcolor
import sys
import os
import pandas as pd
import numpy as np
from Bio import Phylo
import seaborn as sns
from scipy.stats import t, ttest_1samp, wilcoxon, mannwhitneyu, ttest_rel, zscore, spearmanr
import json
from statsmodels.stats.multitest import multipletests
from scipy.stats import gaussian_kde
from sklearn import linear_model
import re
from matplotlib.colors import ListedColormap
import networkx as nx
from scipy.stats import f_oneway

In [None]:
matplotlib.rcParams['font.family']       = 'Arial'
matplotlib.rcParams['font.sans-serif']   = ["Arial","DejaVu Sans","Lucida Grande","Verdana"]
matplotlib.rcParams['figure.figsize']    = [4,3]
matplotlib.rcParams['font.size']         = 10
matplotlib.rcParams["axes.labelcolor"]   = "#000000"
matplotlib.rcParams["axes.linewidth"]    = 1.0 
matplotlib.rcParams["xtick.major.width"] = 1.0
matplotlib.rcParams["ytick.major.width"] = 1.0
cmap1 = plt.cm.tab20
cmap2 = plt.cm.Set3  
#plt.style.use('default')

In [None]:
os.chdir("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0156")

for dir in ["figures", "tables", "networks"]:
    try:
        os.mkdir(dir)
    except:
        None

In [None]:
# Classess of KOs

df_path_ko = pd.read_table("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151/tables/path_ko.txt", names = ['Pathway', 'KO'])
df_rn_ko = pd.read_table("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151/tables/rn_ko.txt", names = ['Reaction','KO'])
df_md_ko = pd.read_table("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151/tables/md_ko.txt", names = ['Module','KO'])
df_path_md = pd.read_table("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151/tables/path_md.txt", names = ['Pathway','Module'])
ontology = json.load(open("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151/json/ko00001.json"))

ontology_tree = Phylo.BaseTree.Tree(Phylo.BaseTree.Clade(name=ontology['name']))
root_clade    = Phylo.BaseTree.Clade(name=ontology['name'])
stack = [(ontology, root_clade)]

while len(stack) > 0:
    term, clade = stack.pop()
    if ('children' in term.keys()):
        for child in term['children']:
            child_clade = Phylo.BaseTree.Clade(name = child['name'])
            clade.clades.append(child_clade)
            stack.append((child, child_clade))

ontology_tree = Phylo.BaseTree.Tree(root_clade)

list_category_ko = []
for clade in ontology_tree.clade.clades[0].clades:
    for tip in clade.get_terminals():
        KO = tip.name.split()[0]
        if (KO[0] == 'K'):
            list_category_ko.append([clade.name, KO])
df_category_ko = pd.DataFrame(list_category_ko, columns = ['category', 'KO'])
st_category_ko = []
for clade in ontology_tree.clade.clades[0].clades:
    for tip in clade.get_terminals():
        KO = tip.name.split()[0]
        if (KO[0] == 'K'):
            list_category_ko.append([clade.name, KO])
df_category_ko = pd.DataFrame(list_category_ko, columns = ['category', 'KO'])
df_category_ko = df_category_ko[~df_category_ko.duplicated()]

df_ko_count = pd.DataFrame(df_category_ko.KO.value_counts())
set_ko_with_unique_category = set(df_ko_count[df_ko_count['KO']==1].index)
df_category_ko['unique'] = [(ko in set_ko_with_unique_category) for ko in df_category_ko.KO]
df_uniquecategory_ko = df_category_ko[df_category_ko['unique']]

# color of function categories

colors = ['#66C2A5', '#FC8D62', '#8DA0CB', '#E78AC3', '#555555', '#FC8D62', '#8DA0CB', '#E78AC3', '#66C2A5', '#FC8D62', '#000000']

cm_name = 'Set3' # B->G->R
cm = plt.get_cmap(cm_name)

df_category_ko_module = pd.merge(df_category_ko, df_md_ko, on = 'KO')
df_category_ko_module['Nko'] = 1
df_category_module_count = df_category_ko_module.groupby(['category', 'Module'], as_index = False).sum()
df_maxcategory_module = df_category_module_count.loc[df_category_module_count.groupby('Module')['Nko'].idxmax(),:].sort_values('category')
df_maxcategory_module = df_maxcategory_module.reset_index().loc[:, ['category', 'Module']]
df_category_color = pd.DataFrame([[category, i] for i, category in enumerate(df_maxcategory_module.category.unique())], columns = ["category", 'category_id'])
df_category_color['color'] = [mcolor.rgb2hex(cm(i)) for i in df_category_color['category_id']]
#df_category_color

df_category_ko_pathway = pd.merge(df_category_ko, df_path_ko, on = 'KO')
df_category_ko_pathway['Nko'] = 1
df_category_pathway_count = df_category_ko_pathway.groupby(['category', 'Pathway'], as_index = False).sum()
df_maxcategory_pathway = df_category_pathway_count.loc[df_category_pathway_count.groupby('Pathway')['Nko'].idxmax(),:].sort_values('category')
df_maxcategory_pathway = df_maxcategory_pathway.reset_index().loc[:, ['category', 'Pathway']]
df_maxcategory_pathway

#### Visualize AUC of cross validation of all OGs

In [None]:
tree = 'mlgtdb'
acr  = 'MPPA'

for tree, acr in [('mlgtdb', 'MPPA'), ('mlgtdb', 'DOWNPASS'), ('nj', 'MPPA'), ('nj', 'DOWNPASS')]:
    df_auc_raw = pd.read_table("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0156/result/ko_auc."+tree+"."+acr+".txt", names = ['KO', 'target', 'method', 'auc'])
    df_auc = df_auc_raw.groupby(['KO', 'target', 'method'], as_index = False).mean()

    # visualize
    #df_auc_ext = df_auc[df_auc['target'] == target]
    fig = plt.figure(figsize=(2,2))
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    ax.set_ylim(-0.1,1.1)
    sns.violinplot(data = df_auc, x = 'method', y = 'auc', hue = 'target', linewidth=0.5, order = ['RF','LR'], palette = ['#FFFFFF', '#FFFFFF'])
    sns.stripplot (data = df_auc, x = 'method', y = 'auc', hue = 'target', linewidth=0,   order = ['RF','LR'], palette = ['#9FA6F1', '#E1BB63'], size = 1, alpha=.3,jitter=0.3, dodge=True)
    ax.set_ylabel("AUC")
    ax.set_xlabel("")
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['top'].set_visible(False)
    ax.get_legend().remove()
    ax.set_title(tree+" "+acr)
    ax.axhline(0.5, linewidth = 1, alpha = 0.5, color = '#555555')
    plt.savefig("figures/NK_M0156_crossvalidation_"+tree+"_"+acr+".pdf",bbox_inches='tight')
    plt.close()

    # test if average AUC is 0.5
    for method in ['LR', 'RF']:
        for target in ['gain', 'loss']:
            print(tree, acr, target, method, ttest_1samp(df_auc[(df_auc['method'] == method) & (df_auc['target'] == target)]['auc'], 0.5).pvalue)

    # test if AUC of LR and RF is equal
    for target in ['gain', 'loss']:
        df_auc_comp = pd.merge(df_auc[(df_auc['method'] == 'LR') & (df_auc['target'] == target)], df_auc[(df_auc['method'] == 'RF') & (df_auc['target'] == target)], on = 'KO')
        print(tree, acr, target, 'LR', 'RF', wilcoxon(df_auc_comp['auc_x'], df_auc_comp['auc_y']).pvalue)

    # test if AUC of gain and loss is equal
    for method in ['LR', 'RF']:
        df_auc_gain = df_auc[(df_auc['method'] == method) & (df_auc['target'] == 'gain')]
        df_auc_loss = df_auc[(df_auc['method'] == method) & (df_auc['target'] == 'loss')]
        print(tree, acr, method, 'gain', 'loss', mannwhitneyu(df_auc_gain.auc, df_auc_loss.auc).pvalue)

#### Visualize AUC of cross validation of metabolic OGs

In [None]:
df_auc_raw = pd.read_table("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0156/result/ko_auc."+tree+"."+acr+".txt", names = ['KO', 'target', 'method', 'auc'])
df_auc = df_auc_raw.groupby(['KO', 'target', 'method'], as_index = False).mean()

df_auc_raw_metabolic = pd.read_table("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151/result/ko_auc."+tree+"."+acr+".txt", names = ['KO', 'target', 'method', 'auc'])
metabolic_OG_set = set(df_auc_raw_metabolic.KO)
df_auc_raw

In [None]:
df_auc_raw["metabolic"] = [(KO in metabolic_OG_set) for KO in df_auc_raw["KO"]]
df_auc_raw[df_auc_raw["metabolic"]]

In [None]:
tree = 'mlgtdb'
acr  = 'MPPA'

for tree, acr in [('mlgtdb', 'MPPA'), ('mlgtdb', 'DOWNPASS'), ('nj', 'MPPA'), ('nj', 'DOWNPASS')]:
    df_auc_raw = pd.read_table("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0156/result/ko_auc."+tree+"."+acr+".txt", names = ['KO', 'target', 'method', 'auc'])

    df_auc_raw_metabolic = pd.read_table("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151/result/ko_auc."+tree+"."+acr+".txt", names = ['KO', 'target', 'method', 'auc'])
    metabolic_OG_set = set(df_auc_raw_metabolic.KO)
    df_auc_raw["metabolic"] = [(KO in metabolic_OG_set) for KO in df_auc_raw["KO"]]
    df_auc_raw = df_auc_raw[df_auc_raw["metabolic"]].reset_index(drop=True)

    df_auc = df_auc_raw.groupby(['KO', 'target', 'method'], as_index = False).mean()

    # visualize
    #df_auc_ext = df_auc[df_auc['target'] == target]
    fig = plt.figure(figsize=(2,2))
    ax = fig.add_axes([0.1,0.1,0.8,0.8])
    ax.set_ylim(-0.1,1.1)
    sns.violinplot(data = df_auc, x = 'method', y = 'auc', hue = 'target', linewidth=0.5, order = ['RF','LR'], palette = ['#FFFFFF', '#FFFFFF'])
    sns.stripplot (data = df_auc, x = 'method', y = 'auc', hue = 'target', linewidth=0,   order = ['RF','LR'], palette = ['#9FA6F1', '#E1BB63'], size = 1, alpha=.3,jitter=0.3, dodge=True)
    ax.set_ylabel("AUC")
    ax.set_xlabel("")
    plt.gca().spines['right'].set_visible(False)
    plt.gca().spines['top'].set_visible(False)
    ax.get_legend().remove()
    ax.set_title(tree+" "+acr)
    ax.axhline(0.5, linewidth = 1, alpha = 0.5, color = '#555555')
    plt.savefig("figures/NK_M0156_crossvalidation_"+tree+"_"+acr+".metabolic.pdf",bbox_inches='tight')
    plt.close()

    # test if average AUC is 0.5
    for method in ['LR', 'RF']:
        for target in ['gain', 'loss']:
            print(tree, acr, target, method, ttest_1samp(df_auc[(df_auc['method'] == method) & (df_auc['target'] == target)]['auc'], 0.5).pvalue)

    # test if AUC of LR and RF is equal
    for target in ['gain', 'loss']:
        df_auc_comp = pd.merge(df_auc[(df_auc['method'] == 'LR') & (df_auc['target'] == target)], df_auc[(df_auc['method'] == 'RF') & (df_auc['target'] == target)], on = 'KO')
        print(tree, acr, target, 'LR', 'RF', wilcoxon(df_auc_comp['auc_x'], df_auc_comp['auc_y']).pvalue)

    # test if AUC of gain and loss is equal
    for method in ['LR', 'RF']:
        df_auc_gain = df_auc[(df_auc['method'] == method) & (df_auc['target'] == 'gain')]
        df_auc_loss = df_auc[(df_auc['method'] == method) & (df_auc['target'] == 'loss')]
        print(tree, acr, method, 'gain', 'loss', mannwhitneyu(df_auc_gain.auc, df_auc_loss.auc).pvalue)