In [30]:
import init
from SBMLLint.common import constants as cn
from SBMLLint.common.molecule import Molecule, MoleculeStoichiometry
from SBMLLint.common import simple_sbml
from SBMLLint.common.reaction import Reaction
from SBMLLint.tools import sbmllint
from SBMLLint.tools import print_reactions

import os
import numpy as np
import pandas as pd
from pulp import *
import matplotlib.pyplot as plt
import time

from scipy.linalg import lu, inv
from scipy.optimize import linprog

In [2]:
from games_setup import *
from SBMLLint.common import constants as cn
from SBMLLint.common.simple_sbml import SimpleSBML
from SBMLLint.common.stoichiometry_matrix import StoichiometryMatrix

from SBMLLint.games.som import SOM
from SBMLLint.games.games_pp import GAMES_PP, SOMStoichiometry, SOMReaction, TOLERANCE
from SBMLLint.games.games_report import GAMESReport, SimplifiedReaction

Current Directory: /Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebooks


In [3]:
os.getcwd()

'/Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebooks'

In [4]:
# Load relevant bigg models
# bigg constants
BIGG_RESULTS = "results"
BIGG_ID = "bigg_id"
BIGG_GENECOUNT = "gene_count"
BIGG_REACTION_COUNT = "reaction_count"
BIGG_ORGANISM = "organism"
BIGG_METABOLITE_COUNT = "metabolite_count"
import requests
res = requests.get('http://bigg.ucsd.edu/api/v2/models')
bigg_models = res.json()[BIGG_RESULTS]
bigg_ids = [bigg[BIGG_ID] for bigg in bigg_models]
print("number of BiGG models: %d" % len(bigg_models))
bigg_models[:1]

number of BiGG models: 108


[{'bigg_id': 'e_coli_core',
  'gene_count': 137,
  'reaction_count': 95,
  'organism': 'Escherichia coli str. K-12 substr. MG1655',
  'metabolite_count': 72}]

In [5]:
bigg_df = pd.DataFrame(bigg_models).set_index(BIGG_ID)
bigg_df.sort_values(by=BIGG_REACTION_COUNT, 
                    inplace=True)
bigg_df.head()

Unnamed: 0_level_0,gene_count,reaction_count,organism,metabolite_count
bigg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
e_coli_core,137,95,Escherichia coli str. K-12 substr. MG1655,72
iAB_RBC_283,346,469,Homo sapiens,342
iIS312,312,519,Trypanosoma cruzi Dm28c,606
iIS312_Amastigote,312,519,Trypanosoma cruzi Dm28c,606
iIS312_Epimastigote,312,519,Trypanosoma cruzi Dm28c,606


In [6]:
bigg_df.tail()

Unnamed: 0_level_0,gene_count,reaction_count,organism,metabolite_count
bigg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RECON1,1905,3741,Homo sapiens,2766
iCHOv1_DG44,1184,3942,Cricetulus griseus,2751
iLB1027_lipid,1027,4456,Phaeodactylum tricornutum CCAP 1055/1,2172
iCHOv1,1766,6663,Cricetulus griseus,4456
Recon3D,2248,10600,Homo sapiens,5835


In [7]:
# define four bigg models depending on size
MODEL_1000 = "iCN718.xml"
MODEL_4000 = "iLB1027_lipid.xml"
MODEL_6000 = "iCHOv1.xml"
MODEL_10000 = "Recon3D.xml"
FOUR_BIGG = [MODEL_1000, MODEL_4000, MODEL_6000, MODEL_10000]
print("------------------------------------------------------")
for bigg in FOUR_BIGG:
  print(bigg_df.loc[bigg[:-4]])
  print("------------------------------------------------------")

------------------------------------------------------
gene_count                                  709
reaction_count                             1015
organism            Acinetobacter baumannii AYE
metabolite_count                            888
Name: iCN718, dtype: object
------------------------------------------------------
gene_count                                           1027
reaction_count                                       4456
organism            Phaeodactylum tricornutum CCAP 1055/1
metabolite_count                                     2172
Name: iLB1027_lipid, dtype: object
------------------------------------------------------
gene_count                        1766
reaction_count                    6663
organism            Cricetulus griseus
metabolite_count                  4456
Name: iCHOv1, dtype: object
------------------------------------------------------
gene_count                  2248
reaction_count             10600
organism            Homo sapiens
metabolite

In [8]:
SAMPLE_MODEL = FOUR_BIGG[0]
fpath = os.path.join(cn.BIGG_DIR, SAMPLE_MODEL)
simple = SimpleSBML()
simple.initialize(fpath)

In [29]:
print("xGAMES time: %f" % xgames_time)

xGAMES time: 95.359424


In [32]:
def solveMILP(mat):
  prob = LpProblem("Finding_Unconserved_Metabolites", LpMaximize)
  species = list(mat.index)
  species_inclusion = pulp.LpVariable.dicts("species", species, cat="Binary")
  species_mass = pulp.LpVariable.dicts("mass", species, cat="Continuous")
  # objective function (to maximize the number of species)
  prob += lpSum([species_inclusion[i] for i in species])
  # constraint 1 (for each reaction, the sum(stoichiometry[i]*mass[i])=0)
  for reaction in mat.columns:
    prob += lpSum(sum([mat[reaction][species]*species_mass[species] for species in species_inclusion])) == 0
  # constraint 2 (species_inclusion is less than or equal to mass of each species)
  for species in species_mass.keys():
    prob += species_inclusion[species] <= species_mass[species]
  prob.solve()
  return prob
# print("Status:", LpStatus[prob.status])

In [43]:
def getUnconservedMetabolites(milp_result):
  unconserved_metabolites = []
  for v in milp_result.variables():
    if v.varValue==0:
      if v.name[:7]=="species":
        unconserved_metabolites.append(v.name[8:])
  return unconserved_metabolites

In [59]:
def analyzeBIGG(simple, model_file):
  simple.initialize(os.path.join(cn.BIGG_DIR, model_file))
  print("Model: %s" % model_file)
  xgames_start = time.time()
  m = GAMES_PP(simple)
  res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
  xgames_end = time.time()
  xgames_time = xgames_end - xgames_start
  print("xGAMES time: %f" % xgames_time)
  if res:
    print("xGAMES found an error!")
  else:
    print("xGAMES didn't find an error!")
  lp_start = time.time()
  s = StoichiometryMatrix(simple)
  lp_res = s.isConsistent()
  lp_end = time.time()
  lp_time = lp_end - lp_start
  print("LP time: %f" % lp_time)
  if s.result.status!=0:
    print("LP found an error!")
  milp_start = time.time()
  milp_result = solveMILP(s.stoichiometry_matrix)
  unconserved_metabolites = getUnconservedMetabolites(milp_result)
  milp_end = time.time()
  milp_time = milp_end - milp_start
  print("MILP time: %f" % milp_time)
  return m, s, milp_result, unconserved_metabolites

In [60]:
m_1000, s_1000, milp_result_1000, unconserved_metabolites_1000 = analyzeBIGG(simple, MODEL_1000)

Model: iCN718.xml
xGAMES time: 58.408306
xGAMES found an error!




LP time: 1.910074
LP found an error!
MILP time: 21.994424


In [61]:
m_4000, s_4000, milp_result_4000, unconserved_metabolites_4000 = analyzeBIGG(simple, MODEL_4000)

Model: iLB1027_lipid.xml
xGAMES time: 1626.331340
xGAMES found an error!
LP time: 272.211592
LP found an error!
MILP time: 285.884376


In [62]:
m_6000, s_6000, milp_result_6000, unconserved_metabolites_6000 = analyzeBIGG(simple, MODEL_6000)

Model: iCHOv1.xml
xGAMES time: 7200.678418
xGAMES found an error!
LP time: 759.267629
LP found an error!
MILP time: 796.065499


In [68]:
len(unconserved_metabolites_6000)

2370

In [71]:
print(s_4000.result.status)
print(s_6000.result.status)

2
2


In [184]:
pulp.cplex_dll_path

'/usr/ilog/cplex/bin/x86_rhel4.0_3.4/libcplex110.so'

In [172]:
import cobra

In [179]:
from SBMLLint.common import util
import libsbml
xml = util.getXML(os.path.join(cn.BIGG_DIR, MODEL_1000))
reader = libsbml.SBMLReader()
document = reader.readSBMLFromString(xml)
model = document.getModel()

In [182]:
cobra.(model,'massBalance', true)

AttributeError: module 'cobra' has no attribute 'verifyModel'

In [104]:
# test roundup
echelon_df = round(m_1000.echelon_df)
rref_df = round(m_1000.getRREFMatrix(echelon_df))

In [105]:
lower_inv = inv(np.round(m.lower))

In [106]:
op_df = m_1000.rref_operation.dot(lower_inv)

In [147]:
op_df.index

Index(['R_G3PD4', 'R_ASPT', 'R_BDH', 'R_PTHPS', 'R_PPPPH', 'R_METTRS',
       'R_IMPC', 'R_EDD', 'R_BACCL2', 'R_CAOPT3',
       ...
       'R_4ABUTD', 'R_ADK1', 'R_CTPS1', 'R_TYRt2r', 'R_GLUabc', 'R_ALDD20y',
       'R_NTPP1', 'R_ALDD2y', 'R_GLNt2r', 'R_LYSabc'],
      dtype='object', length=821)

In [149]:
op_df.columns = op_df.index

In [107]:
m_1000.som_stoichiometry_matrix.shape

(788, 821)

In [151]:
som_reactions = m_1000.convertMatrixToSOMReactions(rref_df)

In [117]:
m.echelon_errors

[]

In [141]:
gr = GAMESReport(m_1000)

In [158]:
reaction_operation = gr.convertOperationSeriesToReactionOperations(op_df.T[res[0]])

In [159]:
reaction_operation[:3]

[ReactionOperation(reaction='R_HMGL_2', operation=-1.0),
 ReactionOperation(reaction='R_BPNT', operation=19.0),
 ReactionOperation(reaction='R_FBP', operation=1.0)]

In [170]:
m_1000.simple.getReaction("R_HMGL_2")

In [169]:
gr.mesgraph.simple.getReaction("R_HMGL_2")

In [171]:
gr.getOperationStoichiometryMatrix(reaction_operation[1:])

AttributeError: 'NoneType' object has no attribute 'reactants'

In [155]:
inferred_reaction = gr.getInferredReaction(reaction_operation)

AttributeError: 'NoneType' object has no attribute 'reactants'

In [129]:
res = []
for reaction in som_reactions:
  if reaction.category == cn.REACTION_ERROR:
    res.append(reaction.label)
print(res)

['R_FBP', 'R_TKT1', 'R_ASPK', 'R_ASPTA', 'R_TALA', 'R_ASAD', 'R_OOAP', 'R_HCO3E', 'R_ASPt2r', 'R_GLCD', 'R_NT5C', 'R_GLUSx_copy2', 'R_DHFR2i', 'R_DHFOR', 'R_MALTt2', 'R_G5SADs', 'R_PSP_L', 'R_FUM']


In [130]:
op_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,811,812,813,814,815,816,817,818,819,820
R_G3PD4,2.25000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R_ASPT,0.00000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R_BDH,-8.25000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R_PTHPS,-1.53125,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
R_PPPPH,3.78125,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
R_ALDD20y,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
R_NTPP1,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
R_ALDD2y,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
R_GLNt2r,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


'reaction_error'

In [99]:
for col in op_df.columns[:10]:
  result_reaction = op_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,811,812,813,814,815,816,817,818,819,820
{M_mqn7_c},-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
{M_fum_c},0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
{M_bhb_c},0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
{M_6pthp_c},0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
{M_gbdp_c},0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
{M_2mcit_c=M_micit_c},5.0,0.0,4.0,1.0,9.0,-7.0,11.0,0.0,8.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
{M_ara5p_c=M_r1p_c=M_r5p_c=M_ru5p__D_c=M_xu5p__D_c=M_xyl__D_c=M_xyl__D_e},-133.0,-1.0,-34.0,43.0,-13.0,-10.0,45.0,0.0,-67.0,-126.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
{M_lac__L_c=M_lac__L_e},0.0,0.0,-1.0,1.0,-1.0,1.0,-2.0,0.0,-2.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
{M_2hyoxplac_c=M_34dhpha_c},-1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,-2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [79]:
(((echelon_df < TOLERANCE) & (echelon_df > 0)).sum()).sum()

6433

In [81]:
m_1000.error_summary

[ErrorSummnary(type='echelon', errors=[R_ASPO1:  -> 0.00 {M_o2_c=M_o2_e} + 0.11 {M_26dap_LL_c=M_26dap__M_c} + 0.14 {M_glc__D_c=M_glc__bD_c} + 0.20 {M_f6p_c=M_g1p_c=M_g6p_B_c=M_g6p_c=M_man1p_c=M_man6p_c} + 0.08 {M_prfp_c=M_prlp_c} + 0.06 {M_asp__D_c=M_asp__D_e=M_asp__L_c} + 0.06 {M_2obut_c=M_2obut_e} + 0.03 {M_ara5p_c=M_r1p_c=M_r5p_c=M_ru5p__D_c=M_xu5p__D_c=M_xyl__D_c=M_xyl__D_e} + 0.06 {M_no2_c=M_no2_e}, R_GLYCLTDy: 0.00 {M_asp__D_c=M_asp__D_e=M_asp__L_c} + 0.00 {M_2obut_c=M_2obut_e} + 0.00 {M_ara5p_c=M_r1p_c=M_r5p_c=M_ru5p__D_c=M_xu5p__D_c=M_xyl__D_c=M_xyl__D_e} + 0.01 {M_no2_c=M_no2_e} -> , R_P5CCD: 0.05 {M_asp__D_c=M_asp__D_e=M_asp__L_c} + 0.12 {M_2obut_c=M_2obut_e} + 0.12 {M_ara5p_c=M_r1p_c=M_r5p_c=M_ru5p__D_c=M_xu5p__D_c=M_xyl__D_c=M_xyl__D_e} + 0.37 {M_no2_c=M_no2_e} -> ])]

In [69]:
# for MODEL_1000
simple.getReaction("R_SEAHCYSHYD_1")

In [15]:
simple.getReaction("R_SEAHCYSHYD")

R_SEAHCYSHYD: M_h2o_c + M_seahcys_c -> M_adn_c + M_selhcys_c

In [16]:
simple.getMolecule("M_h2o_c")

M_h2o_c

In [17]:
m.getNode(simple.getMolecule("M_adn_c"))

{M_adn_c}

In [18]:
error_column = m.lower_inverse.T[m.echelon_errors[0].label]
error_loc = error_column.to_numpy().nonzero()
error_reactions = error_column.index[error_loc]
print(error_reactions)
print(error_column[error_loc[0]])

Index(['R_ENO'], dtype='object')
R_ENO    1.0
Name: R_ENO, dtype: float64


In [19]:
simple.getReaction("R_ENO")

R_ENO: M_2pg_c -> M_h2o_c + M_pep_c

In [22]:
m.getNode(simple.getMolecule("M_2pg_c"))

{M_2pg_c=M_3pg_c}

In [23]:
# select_mat = m.som_stoichiometry_matrix[set(error_reactions)]
# lp_mat = select_mat.loc[(select_mat!=0.0).any(axis=1)].T
# nmet = lp_mat.shape[1]
# # number of reactions
# nreac = lp_mat.shape[0]  
# #  
# b = np.zeros(nreac)
# c = np.ones(nmet)
# # Linear programming. c is constraint (here, zero), 
# # b is vector of possible values for molecule vector. 
# lp_res = linprog(c, A_eq=lp_mat, b_eq=b, bounds=(1, None))
# lp_res