In [15]:
import numpy as np
import networkx as nx
from pyvis.network import Network
import math
import community as community_louvain

import seaborn as sns
import pandas as pd
import os

os.chdir(os.path.expanduser('~/Documents/vivarium-ecoli'))

import matplotlib.pyplot as plt
import dill
import xmltodict
import os
import requests
from bs4 import BeautifulSoup
from ecoli.processes.metabolism_redux import NetworkFlowModel, FlowResult, MetabolismRedux


FREE_RXNS = ["TRANS-RXN-145", "TRANS-RXN0-545", "TRANS-RXN0-474"]

In [16]:
s = requests.Session() # create session
# Post login credentials to session:
s.post('https://websvc.biocyc.org/credentials/login/', data={'email':'cellulararchitect@protonmail.com', 'password':'Cellman0451'})

<Response [200]>

In [17]:
env_name = 'Nitrate_as_reduction_target'

In [18]:
condition_df = pd.read_excel("ecoli/experiments/new_environments_test/done.xlsx")
condition_df["Add"] = condition_df["Add"].apply(lambda x: set(str(x).split(", ") if x == x else set()))
condition_df["Remove"] = condition_df["Remove"].apply(lambda x: set(str(x).split(", ") if x == x else set()))

row_num = condition_df[condition_df['Environment'] == env_name].index[0]
important_add = condition_df["Add"].loc[row_num]
important_remove = condition_df["Remove"].loc[row_num]

In [19]:
time = '50'
date = '2023-07-21'
experiment = 'fba_new_environments'
entry = f'{experiment}_{env_name}_{time}_{date}'
folder = f'out/fba_new_env/{entry}/'

f = open(folder + 'agent_steps.pkl', 'rb')
agent = dill.load(f)
f.close()

metabolism = agent['ecoli-metabolism-redux']
stoichiometry = metabolism.stoichiometry

output = np.load(folder + 'output.npy',allow_pickle='TRUE').item()
output = output['agents']['0']
fba = output['listeners']['fba_results']
mass = output['listeners']['mass']
bulk = pd.DataFrame(output['bulk'])

sim_fluxes = pd.DataFrame(fba["estimated_fluxes"], columns = metabolism.reaction_names)
sim_fluxes = pd.DataFrame(sim_fluxes.loc[24, :].abs().sort_values(ascending=False))

metabolites_idx = {species: i for i, species in enumerate(metabolism.metabolite_names)}
index_to_met = {v: k for k, v in metabolites_idx.items()}

reaction_idx = {reaction: i for i, reaction in enumerate(metabolism.reaction_names)}
index_to_rxn = {v: k for k, v in reaction_idx.items()}

metabolism.catalyst_ids = metabolism.parameters['catalyst_ids']
catalyst_idx = {catalyst: i for i, catalyst in enumerate(metabolism.catalyst_ids)}

In [20]:
heatmap_df = pd.read_excel("out/new_env_analysis/heatmap.xlsx")
heatmap_df = heatmap_df[heatmap_df[env_name] != 0]

In [21]:
G = nx.DiGraph()

met_length = len(metabolism.metabolite_names)
reaction_length = np.shape(stoichiometry)[1]
met_not_include = ["ATP[c]", "WATER[c]", "PROTON[p]", "Pi[c]", "PROTON[c]", "ADP[c]", "Pi[p]", "WATER[p]", "NAD[c]", "NADH[c]", "NADP[c]", "NADPH[c]", "PPI[c]", "CARBON-DIOXIDE[c]", "CO-A[c]", "ACETYL-COA[c]"
  ]
# met_not_include = []
do_not_include = []

for met in met_not_include:
    do_not_include.append(metabolites_idx[met])

def plot_pathway_from_reaction(G, reaction, heatmap_df, pathway_to_colors, completed_pathways):
    index = reaction_idx[reaction]
    reactants = []
    products = []
    pathways = []
    colors = ['purple', 'orange', 'green', 'lightblue', 'blue', 'pink', 'brown', 'red']
    color_index = 0

    for k in range(met_length):
        if stoichiometry[k, index] > 0 and index_to_met[k] not in met_not_include:
            products.append(index_to_met[k])
        elif stoichiometry[k, index] < 0 and index_to_met[k] not in met_not_include:
            reactants.append(index_to_met[k])

    for reactant in reactants:
        pathways += get_pathway_from_metabolite(reactant)

    for product in products:
        pathways += get_pathway_from_metabolite(product)

    weird_name = []
    pathways = list(set(pathways) - set(completed_pathways))
    for pathway in pathways:
        plot_reactions = []
        try:
            req_str = f"https://websvc.biocyc.org/getxml?id=ECOLI:{pathway}&detail=low"
            r = s.get(req_str)
            o = xmltodict.parse(r.content)['ptools-xml']['Pathway']['reaction-list']['Reaction']
            if type(o) is dict:
                o = [o]

            for rxn in o:
                if type(rxn) is not str:
                    plot_reactions.append(rxn['@frameid'])
        except KeyError:
            pass
        except Exception:
            pass

        has_node = False
        for reaction in plot_reactions:
            try:
                row_number = heatmap_df[heatmap_df['Reactions'] == reaction].index[0]
            except IndexError:
                try:
                    row_number = heatmap_df[heatmap_df['Reactions'].str.contains(reaction)].index[0]
                    weird_name.append(reaction)
                    reaction = heatmap_df['Reactions'].loc[row_number]
                except IndexError:
                    continue

            if heatmap_df[env_name].loc[row_number] == 0:
                continue
            else:
                flux = math.log(heatmap_df[env_name].loc[row_number], 10)

            if abs(flux) < 0.2:
                continue

            has_node = True
            plot_reaction(G, reaction, flux, colors[color_index])

        if pathway not in pathway_to_colors and has_node:
            pathway_to_colors[pathway] = colors[color_index]
            color_index = (color_index + 1) % len(colors)

    completed_pathways += pathways
    return weird_name


In [22]:
def plot_reaction(G, reaction, flux, color):
    index = reaction_idx[reaction]
    reactants = []
    products = []
    clean_name = clean_reaction_name(reaction)
    for k in range(met_length):
        if stoichiometry[k, index] > 0 and index_to_met[k] not in met_not_include:
            products.append(index_to_met[k])
        elif stoichiometry[k, index] < 0 and index_to_met[k] not in met_not_include:
            reactants.append(index_to_met[k])

    for reactant in reactants:
        G.add_edge(reactant, clean_name, color=color, title=flux)

    for product in products:
        G.add_edge(clean_name, product, color=color, title=flux)

In [23]:
def get_pathway_from_metabolite(met):
    to_return = []
    function = "pathways-of-compound"
    req_str = f"https://websvc.biocyc.org/apixml?fn={function}&id=ECOLI:{met[:-3]}&detail=none&fmt=json"

    r = s.get(req_str)
    try:
        o = xmltodict.parse(r.content)['ptools-xml']['Pathway']
        if type(o) is dict:
            o = [o]

        for pathway in o:
            if type(pathway) is not str: # no rxns
                to_return.append(pathway['@frameid'])
    except KeyError:
        pass
    except Exception:
        pass

    return to_return

In [24]:
def clean_reaction_name(reaction):
    if '/' in reaction:
        index = reaction.find('RXN')
        if (reaction[index+3] == '-' and reaction[index+4].isdigit()) or (reaction[index+4] == '-' and reaction[index+5].isdigit()):
            index = index + 4
            if reaction[index] == '-':
                index += 1
            while reaction[index].isdigit():
                index += 1
            reaction = reaction[:index]
        else:
            reaction = reaction[:index+3]
    elif '__' in reaction:
        reaction = reaction[:reaction.find('__')]
    elif '(reverse)' in reaction:
        reaction = reaction[:reaction.find('(reverse)') - 1]
    elif '[' in reaction:
        reaction = reaction[:reaction.find('[') - 1]
    elif ' ' in reaction:
        reaction = reaction[:reaction.find('[')]
    return reaction

In [25]:
def plot_metabolite(G, metabolite, heatmap_df):
    index = metabolites_idx[metabolite]
    to_return = []
    for k in range(reaction_length):
        if stoichiometry[index, k] > 0:
            reaction = index_to_rxn[k]
            clean_reaction = clean_reaction_name(reaction)

            try:
                row = heatmap_df[heatmap_df['Reactions'] == reaction].index[0]
                flux_num = math.log(heatmap_df[env_name].loc[row], 10)
            except IndexError:
                flux_num = 0

            if abs(flux_num) < 0.2:
                continue

            color = 'black'

            G.add_edge(clean_reaction, metabolite, color=color, title=flux_num)
            for l in range(met_length):
                if stoichiometry[l, k] < 0:
                    if l != index and l not in do_not_include:
                        reactant = index_to_met[l]
                        to_return.append(reactant)
                        G.add_edge(reactant, clean_reaction, color=color, title=flux_num)
                elif stoichiometry[l, k] > 0:
                    if l != index and l not in do_not_include:
                        product = index_to_met[l]
                        G.add_edge(clean_reaction, product, color=color, title=flux_num)

    return to_return

In [26]:
# Change K here to select the top k reactions/pathways to plot

k = 6
green_offset = 0
heatmap_df.sort_values(by=[env_name], ascending=False, inplace=True)
top_increase = [heatmap_df.iloc[i, 0] for i in range(k + green_offset)]
bottom_increase = [heatmap_df.iloc[i, 0] for i in range(-1, -1*k - 1, -1)]
pathway_to_colors = {}

failed_name = []
completed_pathways = []

if "Non-zero Values for each environment" in top_increase:
    top_increase.remove("Non-zero Values for each environment")
elif "Non-zero Values for each environment" in bottom_increase:
    bottom_increase.remove("Non-zero Values for each environment")

for reaction in top_increase:
    failed_name += plot_pathway_from_reaction(G, reaction, heatmap_df, pathway_to_colors, completed_pathways)

for reaction in bottom_increase:
    failed_name += plot_pathway_from_reaction(G, reaction, heatmap_df, pathway_to_colors, completed_pathways)

failed_name = list(set(failed_name))

with open(f'out/new_env_analysis/pathway_visualization/{env_name}_{k}_legend.txt', 'w') as f:
    for reaction in (top_increase + bottom_increase):
        row_number = heatmap_df[heatmap_df['Reactions'] == reaction].index[0]
        flux = heatmap_df[env_name].loc[row_number]
        f.write('%s: %s\n' % (reaction, flux))

    for key, value in pathway_to_colors.items():
        f.write('%s: %s\n' % (key, value))


In [27]:
# Add interesting extra metabolites to examine here
met_level = 1
extra_metabolites = ["AMMONIUM[c]"]
temp = []

for i in range(met_level):
    for met in extra_metabolites:
        if met is None:
            continue
        temp += plot_metabolite(G, met, heatmap_df)
    temp = list(set(temp))
    extra_metabolites = temp
    temp = []

In [28]:
closeness = nx.closeness_centrality(G)
sort_closeness = sorted(closeness.items(), key=lambda x:x[1], reverse=True)

sort_closeness = sort_closeness[:8]
nt = Network('1000px', '1500px', notebook=True, cdn_resources='in_line', directed=True)
nt.from_nx(G)

for tup in sort_closeness:
    nt.get_node(tup[0])['color'] = 'blue'

for n in nt.nodes:
    if n['label'] in metabolites_idx:
        n['shape'] = 'star'
    if n['label'] in important_add:
        n['color'] = 'orange'
    if n['label'] in important_remove:
        n['color'] = 'purple'


nt.show_buttons(filter_=['physics'])
nt.show(f'out/new_env_analysis/pathway_visualization/{env_name}_pathway_{k}_graph.html')

out/new_env_analysis/pathway_visualization/Nitrate_as_reduction_target_pathway_6_graph.html
