# CS524 Project: Flux Balance Analysis for Metabolic Networks

# Introduction
Biological systems has numerous interconnected
metabolic (chemical) reactions that convert nutrients into energy, biomass, and other prod-
ucts. Understanding how the reaction ﬂuxes are distributed across a metabolic network
is an important question in biology. Flux Balance Analysis (FBA) is a computational
method to model the reaction ﬂuxes in a metabolic network at steady state.1 It uses a
complete set of all reactions of a cell/organism, and formulate a linear program model to
optimize a biological objective such as growth rate, and hence obtaining a metabolic net-
work model with optimal ﬂux distribution. In this project, I want to use the idea of FBA
to construct metabolic models using public data, and then perform subsequent analyses.

### Part A. The process of data pipeline and model construction

In [None]:
import requests
import json
import pandas as pd
import os

names_dict = {"E. coli (core)":"e_coli_core",
         "E. coli": "iAF1260",
         "Brewer's yeast (S. cerevisiae)": "iMM904", 
         "House mouse (M. musculus)": "iMM1415"}

# select one model
NAME = "E. coli (core)"

def fetch_data(model_name, save_loc=None):
    url = f"http://bigg.ucsd.edu/static/models/{model_name}.json"
    response = requests.get(url)
    if save_loc:
        with open(save_loc, "wb") as f:
            f.write(response.content)
    return response.json()

if os.path.exists(f"data/{names_dict[NAME]}.json"):
    with open(f"data/{names_dict[NAME]}.json", "r") as f:
        data_json = json.load(f)
else:
    data_json = fetch_data(names_dict[NAME], f"data/{names_dict[NAME]}.json")

In [63]:

    
data_json.keys()

dict_keys(['metabolites', 'reactions', 'genes', 'id', 'compartments', 'version'])

In [99]:

metabolites_df = pd.DataFrame(data_json['metabolites'])
metabolites_df.head()

Unnamed: 0,id,name,compartment,charge,formula,notes,annotation
0,glc__D_e,D-Glucose,e,0,C6H12O6,{'original_bigg_ids': ['glc_D_e']},"{'bigg.metabolite': ['glc__D'], 'biocyc': ['ME..."
1,gln__L_c,L-Glutamine,c,0,C5H10N2O3,{'original_bigg_ids': ['gln_L_c']},"{'bigg.metabolite': ['gln__L'], 'biocyc': ['ME..."
2,gln__L_e,L-Glutamine,e,0,C5H10N2O3,{'original_bigg_ids': ['gln_L_e']},"{'bigg.metabolite': ['gln__L'], 'biocyc': ['ME..."
3,glu__L_c,L-Glutamate,c,-1,C5H8NO4,{'original_bigg_ids': ['glu_L_c']},"{'bigg.metabolite': ['glu__L'], 'biocyc': ['ME..."
4,glu__L_e,L-Glutamate,e,-1,C5H8NO4,{'original_bigg_ids': ['glu_L_e']},"{'bigg.metabolite': ['glu__L'], 'biocyc': ['ME..."


In [100]:
reactions_df = pd.DataFrame(data_json['reactions'])
reactions_df.head()

Unnamed: 0,id,name,metabolites,lower_bound,upper_bound,gene_reaction_rule,subsystem,notes,annotation,objective_coefficient
0,PFK,Phosphofructokinase,"{'adp_c': 1.0, 'atp_c': -1.0, 'f6p_c': -1.0, '...",0.0,1000.0,b3916 or b1723,Glycolysis/Gluconeogenesis,{'original_bigg_ids': ['PFK']},"{'bigg.reaction': ['PFK'], 'ec-code': ['2.7.1....",
1,PFL,Pyruvate formate lyase,"{'accoa_c': 1.0, 'coa_c': -1.0, 'for_c': 1.0, ...",0.0,1000.0,((b0902 and b0903) and b2579) or (b0902 and b0...,Pyruvate Metabolism,{'original_bigg_ids': ['PFL']},"{'bigg.reaction': ['PFL'], 'biocyc': ['META:PY...",
2,PGI,Glucose-6-phosphate isomerase,"{'f6p_c': 1.0, 'g6p_c': -1.0}",-1000.0,1000.0,b4025,Glycolysis/Gluconeogenesis,{'original_bigg_ids': ['PGI']},"{'bigg.reaction': ['PGI'], 'biocyc': ['META:PG...",
3,PGK,Phosphoglycerate kinase,"{'13dpg_c': 1.0, '3pg_c': -1.0, 'adp_c': 1.0, ...",-1000.0,1000.0,b2926,Glycolysis/Gluconeogenesis,{'original_bigg_ids': ['PGK']},"{'bigg.reaction': ['PGK'], 'biocyc': ['META:PH...",
4,PGL,6-phosphogluconolactonase,"{'6pgc_c': 1.0, '6pgl_c': -1.0, 'h2o_c': -1.0,...",0.0,1000.0,b0767,Pentose Phosphate Pathway,{'original_bigg_ids': ['PGL']},"{'bigg.reaction': ['PGL'], 'biocyc': ['META:6P...",


In [115]:
reactions_df = pd.DataFrame(data_json['reactions'])

reaction_mat_df = pd.concat([reactions_df[['id', 'name']],   
           pd.DataFrame(reactions_df['metabolites'].to_list()).fillna(0) ],
           axis=1)
# find biomass reaction
biomass_rxn_idx = reaction_mat_df['name'].str.contains('biomass', case=False)
# replace id to "biomass"
reaction_mat_df.at[biomass_rxn_idx.idxmax(), 'id'] = 'biomass'
reaction_mat_df.head()
# get columes other than id name
# reaction_mat_df[reaction_mat_df.columns.difference(['id', 'name'])]

Unnamed: 0,id,name,adp_c,atp_c,f6p_c,fdp_c,h_c,accoa_c,coa_c,for_c,...,glc__D_e,gln__L_e,glu__L_e,h2o_e,mal__L_e,nh4_e,o2_e,mal__L_c,nh4_c,glx_c
0,PFK,Phosphofructokinase,1.0,-1.0,-1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,PFL,Pyruvate formate lyase,0.0,0.0,0.0,0.0,0.0,1.0,-1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,PGI,Glucose-6-phosphate isomerase,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,PGK,Phosphoglycerate kinase,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,PGL,6-phosphogluconolactonase,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,13dpg_c,2pg_c,3pg_c,6pgc_c,6pgl_c,ac_c,ac_e,acald_c,acald_e,accoa_c,...,pyr_e,q8_c,q8h2_c,r5p_c,ru5p__D_c,s7p_c,succ_c,succ_e,succoa_c,xu5p__D_c
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
91,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
