In [None]:
import networkx as nx
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as mcolor
import os
import json
from Bio import Phylo

In [None]:
matplotlib.rcParams['font.family']       = 'Arial'
matplotlib.rcParams['font.sans-serif']   = ["Arial","DejaVu Sans","Lucida Grande","Verdana"]
matplotlib.rcParams['figure.figsize']    = [4,3]
matplotlib.rcParams['font.size']         = 10
matplotlib.rcParams["axes.labelcolor"]   = "#000000"
matplotlib.rcParams["axes.linewidth"]    = 1.0 
matplotlib.rcParams["xtick.major.width"] = 1.0
matplotlib.rcParams["ytick.major.width"] = 1.0
cmap1 = plt.cm.tab20
cmap2 = plt.cm.Set3  
#plt.style.use('default')

In [None]:
os.chdir("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0152")

for dir in ["figures", "tables", "networks"]:
    try:
        os.mkdir(dir)
    except:
        None

In [None]:
# Classess of KOs

table_dir = "/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151"

df_path_ko = pd.read_table(table_dir + "/tables/path_ko.txt", names = ['Pathway', 'KO'])
df_rn_ko = pd.read_table(table_dir + "/tables/rn_ko.txt", names = ['Reaction','KO'])
df_md_ko = pd.read_table(table_dir + "/tables/md_ko.txt", names = ['Module','KO'])
df_path_md = pd.read_table(table_dir + "/tables/path_md.txt", names = ['Pathway','Module'])
ontology = json.load(open("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/MetabolicNetworkEvolution/experiment/NK_M0151/json/ko00001.json"))

ontology_tree = Phylo.BaseTree.Tree(Phylo.BaseTree.Clade(name=ontology['name']))
root_clade    = Phylo.BaseTree.Clade(name=ontology['name'])
stack = [(ontology, root_clade)]

while len(stack) > 0:
    term, clade = stack.pop()
    if ('children' in term.keys()):
        for child in term['children']:
            child_clade = Phylo.BaseTree.Clade(name = child['name'])
            clade.clades.append(child_clade)
            stack.append((child, child_clade))

ontology_tree = Phylo.BaseTree.Tree(root_clade)

list_category_ko = []
for clade in ontology_tree.clade.clades[0].clades:
    for tip in clade.get_terminals():
        KO = tip.name.split()[0]
        if (KO[0] == 'K'):
            list_category_ko.append([clade.name, KO])
df_category_ko = pd.DataFrame(list_category_ko, columns = ['category', 'KO'])
st_category_ko = []
for clade in ontology_tree.clade.clades[0].clades:
    for tip in clade.get_terminals():
        KO = tip.name.split()[0]
        if (KO[0] == 'K'):
            list_category_ko.append([clade.name, KO])
df_category_ko = pd.DataFrame(list_category_ko, columns = ['category', 'KO'])
df_category_ko = df_category_ko[~df_category_ko.duplicated()]

df_ko_count = pd.DataFrame(df_category_ko.KO.value_counts())
set_ko_with_unique_category = set(df_ko_count[df_ko_count['KO']==1].index)
df_category_ko['unique'] = [(ko in set_ko_with_unique_category) for ko in df_category_ko.KO]
df_uniquecategory_ko = df_category_ko[df_category_ko['unique']]

# color of function categories

colors = ['#66C2A5', '#FC8D62', '#8DA0CB', '#E78AC3', '#555555', '#FC8D62', '#8DA0CB', '#E78AC3', '#66C2A5', '#FC8D62', '#000000']

cm_name = 'Set3' # B->G->R
cm = plt.get_cmap(cm_name)

df_category_ko_module = pd.merge(df_category_ko, df_md_ko, on = 'KO')
df_category_ko_module['Nko'] = 1
df_category_module_count = df_category_ko_module.groupby(['category', 'Module'], as_index = False).sum()
df_maxcategory_module = df_category_module_count.loc[df_category_module_count.groupby('Module')['Nko'].idxmax(),:].sort_values('category')
df_maxcategory_module = df_maxcategory_module.reset_index().loc[:, ['category', 'Module']]
df_category_color = pd.DataFrame([[category, i] for i, category in enumerate(df_maxcategory_module.category.unique())], columns = ["category", 'category_id'])
df_category_color['color'] = [mcolor.rgb2hex(cm(i)) for i in df_category_color['category_id']]
#df_category_color

df_category_ko_pathway = pd.merge(df_category_ko, df_path_ko, on = 'KO')
df_category_ko_pathway['Nko'] = 1
df_category_pathway_count = df_category_ko_pathway.groupby(['category', 'Pathway'], as_index = False).sum()
df_maxcategory_pathway = df_category_pathway_count.loc[df_category_pathway_count.groupby('Pathway')['Nko'].idxmax(),:].sort_values('category')
df_maxcategory_pathway = df_maxcategory_pathway.reset_index().loc[:, ['category', 'Pathway']]
df_maxcategory_pathway

In [None]:
# Enumerate pairs of KEGG Modules which share one or more reactions or contains adjacent reactions
network_M = nx.read_gml("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/SyntrophyExploration/NK_S0002/Single_Filter_N_CompressedNetwork.gml")
mapping = {module: module.split(":")[1] for module in network_M.nodes}
network_M = nx.relabel_nodes(network_M, mapping)
network_M_Xenobiotics = network_M.subgraph(df_maxcategory_module[df_maxcategory_module['category'] == '09111 Xenobiotics biodegradation and metabolism'].Module)
nx.write_gml(network_M_Xenobiotics, "networks/network_module.xenobiotics.gml")

In [None]:
for edge in network_M_Xenobiotics.edges:
    print(edge[0], edge[1])

Adjacent module pairs are
M00551 < M00543 
M00551 < M00538
M00551 < M00537
M00534 > M00638
M00568 < M00548
M00568 < M00637
M00539 < M00419
M00418 > M00541
M00547 > M00569
M00548 > M00569
M00637 > M00569

In [None]:
for category in set(df_maxcategory_pathway['category']):

    # Enumerate pairs of KEGG Modules which share one or more reactions or contains adjacent reactions
    network_M = nx.read_gml("/Users/konnonaoki/Documents/backupped/Research/IwasakiLab/Data/SyntrophyExploration/NK_S0002/Single_Filter_N_CompressedNetwork.gml")
    mapping = {module: module.split(":")[1] for module in network_M.nodes}
    network_M = nx.relabel_nodes(network_M, mapping)
    network_M_ext = network_M.subgraph(df_maxcategory_module[df_maxcategory_module['category'] == category].Module)
    nx.write_gml(network_M_ext, "networks/network_module."+category.split(" ")[0]+".gml")

In [None]:
set(df_maxcategory_pathway['category'])

#### Aminoacid synthesisの繋がったモジュールペアを列挙

In [None]:
list_modulepair = []
network_M_ext = network_M.subgraph(df_maxcategory_module[df_maxcategory_module['category'] == '09105 Amino acid metabolism'].Module)
for edge in network_M_ext.edges:
    list_modulepair.append([edge[0], edge[1]])
df_modulepair = pd.DataFrame(list_modulepair, columns = ["md1", "md2"])
df_modulepair

In [None]:
df_md_desc = pd.read_table("/Users/konnonaoki/GoogleDrive/Research/KonnoNaoki/repositories/handyenrich/ref/class_description/module.kegg.txt", names = ["Module", "Description"])
df_md_desc["Module"] = [md.split(":")[1] for md in df_md_desc["Module"]]
df_md_desc

In [None]:
df_modulepair = pd.merge(df_modulepair, df_md_desc, left_on = "md1", right_on = "Module").loc[:, ["md1", "md2", "Description"]].rename(columns = {"Description":"desc1"})
df_modulepair = pd.merge(df_modulepair, df_md_desc, left_on = "md2", right_on = "Module").loc[:, ["md1", "md2", "desc1", "Description"]].rename(columns = {"Description":"desc2"})
df_modulepair.to_csv("tables/aminoacid.csv", index=False)