In [16]:
import numpy as np
import networkx as nx
from pyvis.network import Network
import math
import community as community_louvain

import seaborn as sns
import pandas as pd
import os

os.chdir(os.path.expanduser('~/Documents/vivarium-ecoli'))

import matplotlib.pyplot as plt
import dill
import xmltodict
import os
import requests
from bs4 import BeautifulSoup
from ecoli.processes.metabolism_redux import NetworkFlowModel, FlowResult, MetabolismRedux


FREE_RXNS = ["TRANS-RXN-145", "TRANS-RXN0-545", "TRANS-RXN0-474"]

In [17]:
s = requests.Session() # create session
# Post login credentials to session:
s.post('https://websvc.biocyc.org/credentials/login/', data={'email':'cellulararchitect@protonmail.com', 'password':'Cellman0451'})

<Response [200]>

In [18]:
def clean_name(reaction):
    if '/' in reaction:
        index = reaction.find('RXN')
        if (reaction[index+3] == '-' and reaction[index+4].isdigit()) or (reaction[index+4] == '-' and reaction[index+5].isdigit()):
            index = index + 4
            if reaction[index] == '-':
                index += 1
            while reaction[index].isdigit():
                index += 1
            reaction = reaction[:index]
        else:
            reaction = reaction[:index+3]
    elif '__' in reaction:
        reaction = reaction[:reaction.find('__')]
    elif '(reverse)' in reaction:
        reaction = reaction[:reaction.find('(reverse)') - 1]
    elif '[' in reaction:
        reaction = reaction[:reaction.find('[') - 1]
    elif ' ' in reaction:
        reaction = reaction[:reaction.find('[')]
    return reaction

def get_gene_from_reaction(reaction):
    function = "genes-of-reaction"
    req_str = f"https://websvc.biocyc.org/apixml?fn={function}&id=ECOLI:{reaction}&detail=none&fmt=json"

    r = s.get(req_str)
    try:
        o = xmltodict.parse(r.content)['ptools-xml']['Gene']
        if type(o) is dict:
            o = [o]

        for gene in o:
            if type(gene) is not str: # no rxns
                return gene['@frameid']
    except KeyError:
        pass
    except Exception:
        pass

    return None

def get_pathway_from_gene(gene):
    function = "pathways-of-gene"
    req_str = f"https://websvc.biocyc.org/apixml?fn={function}&id=ECOLI:{gene}&detail=none&fmt=json"

    r = s.get(req_str)
    try:
        o = xmltodict.parse(r.content)['ptools-xml']['Pathway']
        if type(o) is dict:
            o = [o]

        for pathway in o:
            if type(pathway) is not str: # no rxns
                return pathway['@frameid']
    except KeyError:
        pass
    except Exception:
        pass

    return None

def get_reactions_from_pathway(pathway):
    to_return = []
    try:
        req_str = f"https://websvc.biocyc.org/getxml?id=ECOLI:{pathway}&detail=low"
        r = s.get(req_str)
        o = xmltodict.parse(r.content)['ptools-xml']['Pathway']['reaction-list']['Reaction']
        if type(o) is dict:
            o = [o]

        for rxn in o:
            if type(rxn) is not str:
                to_return.append(rxn['@frameid'])
    except KeyError:
        pass
    except Exception:
        pass

    return to_return

In [None]:
heatmap_df = pd.read_excel("out/new_env_analysis/heatmap.xlsx")

In [28]:
pathway_df = pd.DataFrame()
firstDone = False

for index, row in heatmap_df.iterrows():
    rxn = clean_name(row['Reactions'])
    gne = get_gene_from_reaction(rxn)
    if gne is None:
        continue

    pathway = get_pathway_from_gene(gne)
    if pathway is None:
        continue

    if firstDone and pathway in pathway_df['Pathways'].values:
        continue

    reactions_list = get_reactions_from_pathway(pathway)
    append_dict = {'Pathways': pathway}
    for col in heatmap_df.columns:
        sum = 0
        count = 0
        if col != 'Reactions':
            for reac in reactions_list:
                try:
                    row_number = heatmap_df[heatmap_df['Reactions'] == reac].index[0]
                except IndexError:
                    try:
                        row_number = heatmap_df[heatmap_df['Reactions'].str.contains(reac)].index[0]
                    except IndexError:
                        continue

                count += 1
                if heatmap_df[col].loc[row_number] != 0:
                    sum += math.log(heatmap_df[col].loc[row_number], 10)
                else:
                    sum += 0

        append_dict[col] = sum / count if count != 0 else 0
    pathway_df = pathway_df.append(append_dict, ignore_index=True)
    firstDone = True

print(pathway_df)

         Pathways  Reactions  Hydrogen_fumarate_-O2_Glc  Ribose_C-source  \
0       PWY0-1336        0.0                   2.651957         0.000000   
1        PWY-8291        0.0                  -0.004411        -0.000151   
2        PWY-5437        0.0                   0.000000         0.000000   
3    GLUCONEO-PWY        0.0                   1.734921        -0.008320   
4             TCA        0.0                  -0.051712        -0.096573   
..            ...        ...                        ...              ...   
294     PWY0-1602        0.0                   0.000000         0.000000   
295     PWY0-1600        0.0                   0.000000         0.000000   
296     PWY0-1599        0.0                   0.000000         0.000000   
297     PWY0-1353        0.0                   0.000000         0.032344   
298      PWY-8164        0.0                   0.000000         0.000000   

     Thiamine_salvaging  Nitrate_as_reduction_target  \
0              0.000000        

In [29]:
pathway_df.to_csv(rf'out/new_env_analysis/pathway_heatmap.csv')