### Imports

In [2]:
import requests
import pandas as pd
import numpy as np
from xml.etree import ElementTree
from collections import defaultdict
import time
from tqdm import tqdm

### Set up API Environment

In [3]:
s = requests.Session() # create session
# Po`st login credentials to session:
s.post('https://websvc.biocyc.org/credentials/login/', data={'email':'aniketh@stanford.edu', 'password':'dcy*BWEfAm'})

<Response [200]>

### Helper Functions

In [4]:
def etree_to_dict(t):
    d = {t.tag: {} if t.attrib else None}
    children = list(t)
    if children:
        dd = defaultdict(list)
        for dc in map(etree_to_dict, children):
            for k, v in dc.items():
                dd[k].append(v)
        d = {t.tag: {k: v[0] if len(v) == 1 else v
                     for k, v in dd.items()}}
    if t.attrib:
        d[t.tag].update(('@' + k, v)
                        for k, v in t.attrib.items())
    if t.text:
        text = t.text.strip()
        if children or t.attrib:
            if text:
                d[t.tag]['#text'] = text
        else:
            d[t.tag] = text
    return d

### Get Data

In [5]:
flows = pd.read_csv("whole_cell_flow_data.csv")
stoichiometry = pd.read_csv("whole_cell_s_matrix (1).csv")
metabolite_names = list(stoichiometry['Unnamed: 0'])
reaction_names = list(stoichiometry.columns[1:])
n_mets = len(metabolite_names)
n_rxns = len(reaction_names)
S = stoichiometry.iloc[:, 1:].to_numpy()

In [6]:
metabolites_processed = {}
for meta in metabolite_names:
    new = meta.split('[', 1)[0]
    metabolites_processed[meta] = new

In [7]:
reactions_processed = {}

for rxn in reaction_names:
    new = rxn
    
    if "(reverse)" in new:
        idx = new.find("(reverse)")
        new = new[:idx - 1]
    
    if "__" in new:
        idx = new.find("__")
        new = new[:idx]
    
    if "-RXN" in new:
        idx = new.find("-RXN")
        new = new[:idx + 4]
    
    reactions_processed[rxn] = new

### Get Hierarchy for Reactions

In [176]:
def bfs_pathways(ID):
    
    # Initialize Variables
    pathway_tree = {}
    issue = {}
    queue = [(ID, 0)]
        
    # Loop
    while len(queue) > 0:
        
        # New Value
        ID, curr_lvl = queue.pop(0)
        
        # API
        request = "https://websvc.biocyc.org/getxml?id=ECOLI:" + ID
        response = s.get(request)
        if response.status_code == 200:
            tree = ElementTree.fromstring(response.content)
            r = etree_to_dict(tree)
            
        # Error
        else:
            if curr_lvl not in issue:
                issue[curr_lvl] = []
            issue[curr_lvl].append((ID, response.status_code))
            continue
            
        curr = r['ptools-xml']
        
        if 'Pathway' in curr and 'parent' in curr['Pathway']:
            parents = curr['Pathway']['parent']

            if type(parents) is list:
                for parent in parents:
                    try:
                        queue.append((parent['Pathway']['@frameid'], curr_lvl + 1))
                    except KeyError:
                        if curr_lvl not in issue:
                            issue[curr_lvl] = []
                        issue[curr_lvl].append((parent, KeyError))
            else:
                try:
                    queue.append((parents['Pathway']['@frameid'], curr_lvl + 1))
                except KeyError:
                    if curr_lvl not in issue:
                        issue[curr_lvl] = []
                    issue[curr_lvl].append((parents, KeyError))   
            
        elif 'Reaction' in curr and 'in-pathway' in curr['Reaction']:
            if 'Pathway' in curr['Reaction']['in-pathway']:
                parents = curr['Reaction']['in-pathway']['Pathway']
            else:
                parents = curr['Reaction']['in-pathway']['Reaction']
        
            if type(parents) is list:
                for parent in parents:
                    try:
                        queue.append((parent['@frameid'], curr_lvl + 1))
                    except KeyError:
                        if curr_lvl not in issue:
                            issue[curr_lvl] = []
                        issue[curr_lvl].append((parent, KeyError))
            else:
                try:
                    queue.append((parents['@frameid'], curr_lvl + 1))
                except KeyError:
                    if curr_lvl not in issue:
                        issue[curr_lvl] = []
                    issue[curr_lvl].append((parents, KeyError)) 
        
            
            
        if curr_lvl not in pathway_tree:
            pathway_tree[ID] = -1 * float("inf")
        pathway_tree[ID] = max(curr_lvl, pathway_tree[ID])
        
    result = {}
    
    for key in pathway_tree:
        value = pathway_tree[key]
        
        if value not in result:
            result[value] = []
        result[value].append(key)
        
    return result, issue

In [177]:
print(bfs_pathways('Purine-Deoxyribonuc-De-Novo-Biosynthesis'))

({0: ['Purine-Deoxyribonuc-De-Novo-Biosynthesis'], 1: ['Deoxyribonucleotide-Biosynthesis', 'Purine-Nucleotide-De-Novo-Biosynthesis'], 3: ['Nucleotide-Biosynthesis'], 2: ['PUR-NUC-SYN'], 4: ['Biosynthesis']}, {})


In [179]:
not_found = {}
pathway_forests = {}
for reaction in tqdm(reaction_names):
    ID = reactions_processed[reaction]
    pathway_forests[reaction], not_found[reaction] = bfs_pathways(ID)

100%|█████████████████████████████████████████| 470/470 [05:21<00:00,  1.46it/s]


In [184]:
pathway_forests

{'1.5.1.20-RXN-5-METHYL-THF/NAD//METHYLENE-THF/NADH/PROTON.44. (reverse)': {0: ['1.5.1.20-RXN']},
 '1.5.1.20-RXN-CPD-1302/NAD//CPD-12996/NADH/PROTON.36. (reverse)': {0: ['1.5.1.20-RXN']},
 '1PFRUCTPHOSN-RXN': {0: ['1PFRUCTPHOSN-RXN'],
  1: ['PWY0-1314'],
  2: ['Sugars-And-Polysaccharides-Degradation'],
  3: ['Carbohydrates-Degradation'],
  4: ['Degradation']},
 '1TRANSKETO-RXN (reverse)': {0: ['1TRANSKETO-RXN'],
  1: ['NONOXIPENT-PWY'],
  2: ['Pentose-Phosphate-Cycle'],
  3: ['Energy-Metabolism']},
 '2-DEHYDROPANTOATE-REDUCT-RXN (reverse)': {0: ['2-DEHYDROPANTOATE-REDUCT-RXN'],
  1: ['PANTO-PWY'],
  2: ['Pantothenate-Biosynthesis'],
  3: ['CoA-Biosynthesis'],
  4: ['Carriers-Biosynthesis'],
  5: ['Cofactor-Biosynthesis'],
  6: ['Biosynthesis']},
 '2-ISOPROPYLMALATESYN-RXN': {0: ['2-ISOPROPYLMALATESYN-RXN'],
  1: ['LEUSYN-PWY'],
  2: ['LEUCINE-SYN'],
  3: ['IND-AMINO-ACID-SYN'],
  4: ['Amino-Acid-Biosynthesis'],
  5: ['Biosynthesis']},
 '2-OCTAPRENYL-6-METHOXYPHENOL-HYDROX-RXN': {0: ['2

In [160]:
response = s.get("https://websvc.biocyc.org/getxml?id=ECOLI:2.3.1.157-RXN")
    
tree = ElementTree.fromstring(response.content)
r = etree_to_dict(tree)

In [183]:
not_found

{'1.5.1.20-RXN-5-METHYL-THF/NAD//METHYLENE-THF/NADH/PROTON.44. (reverse)': {},
 '1.5.1.20-RXN-CPD-1302/NAD//CPD-12996/NADH/PROTON.36. (reverse)': {},
 '1PFRUCTPHOSN-RXN': {},
 '1TRANSKETO-RXN (reverse)': {},
 '2-DEHYDROPANTOATE-REDUCT-RXN (reverse)': {},
 '2-ISOPROPYLMALATESYN-RXN': {},
 '2-OCTAPRENYL-6-METHOXYPHENOL-HYDROX-RXN': {},
 '2-OCTAPRENYL-6-OHPHENOL-METHY-RXN': {},
 '2-OCTAPRENYL-METHOXY-BENZOQ-METH-RXN': {},
 '2-OCTAPRENYLPHENOL-HYDROX-RXN': {},
 '2.3.1.157-RXN': {},
 '2.5.1.19-RXN': {},
 '2.5.1.64-RXN': {},
 '2.7.1.148-RXN': {},
 '2.7.7.60-RXN': {},
 '2.8.1.6-RXN': {},
 '2OXOGLUTARATEDEH-RXN': {},
 '2PGADEHYDRAT-RXN': {},
 '2TRANSKETO-RXN (reverse)': {},
 '3-CH3-2-OXOBUTANOATE-OH-CH3-XFER-RXN-CPD-12996/2-KETO-ISOVALERATE/WATER//2-DEHYDROPANTOATE/CPD-1301.63.': {},
 '3-DEHYDROQUINATE-DEHYDRATASE-RXN': {},
 '3-DEHYDROQUINATE-SYNTHASE-RXN': {},
 '3-OCTAPRENYL-4-OHBENZOATE-DECARBOX-RXN': {},
 '3.2.2.10-RXN-CMP/WATER//RIBOSE-5P/CYTOSINE.30.': {},
 '3.2.2.10-RXN-UMP/WATER//RIBOSE

In [161]:
r['ptools-xml']['Reaction']['in-pathway']['Pathway']

[{'@resource': 'getxml?ECOLI:PWY0-1261',
  '@orgid': 'ECOLI',
  '@frameid': 'PWY0-1261'},
 {'@resource': 'getxml?ECOLI:UDPNAGSYN-PWY',
  '@orgid': 'ECOLI',
  '@frameid': 'UDPNAGSYN-PWY'}]