In [1]:
# BiGG analysis - 
# before doing that need to do two things:
# 1. finalize roundup: done
# 2. test coverage for biomodels: 
# 3. "Reaction Isolation Set": done
# 4. check (text) reporting threshold: ?
# 5. change Report: done

import init
from SBMLLint.common import constants as cn
from SBMLLint.common.molecule import Molecule, MoleculeStoichiometry
from SBMLLint.common import simple_sbml
from SBMLLint.common.reaction import Reaction
from SBMLLint.tools import sbmllint
from SBMLLint.tools import print_reactions

import os
import numpy as np
import pandas as pd
from pulp import *
import matplotlib.pyplot as plt
import time

from scipy.linalg import lu, inv
from scipy.optimize import linprog

In [2]:
from games_setup import *
from SBMLLint.common import constants as cn
from SBMLLint.common.simple_sbml import SimpleSBML
from SBMLLint.common.stoichiometry_matrix import StoichiometryMatrix

from SBMLLint.games.som import SOM
from SBMLLint.games.games_pp import GAMES_PP, SOMStoichiometry, SOMReaction, TOLERANCE
from SBMLLint.games.games_report import GAMESReport, SimplifiedReaction

Current Directory: /Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebooks


In [3]:
os.getcwd()

'/Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebooks'

In [4]:
# Load relevant bigg models
# bigg constants
BIGG_RESULTS = "results"
BIGG_ID = "bigg_id"
BIGG_GENECOUNT = "gene_count"
BIGG_REACTION_COUNT = "reaction_count"
BIGG_ORGANISM = "organism"
BIGG_METABOLITE_COUNT = "metabolite_count"
import requests
res = requests.get('http://bigg.ucsd.edu/api/v2/models')
bigg_models = res.json()[BIGG_RESULTS]
bigg_ids = [bigg[BIGG_ID] for bigg in bigg_models]
print("number of BiGG models: %d" % len(bigg_models))
bigg_models[:1]

number of BiGG models: 108


[{'bigg_id': 'e_coli_core',
  'gene_count': 137,
  'reaction_count': 95,
  'organism': 'Escherichia coli str. K-12 substr. MG1655',
  'metabolite_count': 72}]

In [5]:
bigg_df = pd.DataFrame(bigg_models).set_index(BIGG_ID)
bigg_df.sort_values(by=BIGG_REACTION_COUNT, 
                    inplace=True)
bigg_df.tail(5)

Unnamed: 0_level_0,gene_count,reaction_count,organism,metabolite_count
bigg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RECON1,1905,3741,Homo sapiens,2766
iCHOv1_DG44,1184,3942,Cricetulus griseus,2751
iLB1027_lipid,1027,4456,Phaeodactylum tricornutum CCAP 1055/1,2172
iCHOv1,1766,6663,Cricetulus griseus,4456
Recon3D,2248,10600,Homo sapiens,5835


In [6]:
# define four bigg models depending on size
MODEL_1000 = "iCN718.xml"
MODEL_3000 = "iECIAI1_1343.xml"
MODEL_4000 = "iLB1027_lipid.xml"
MODEL_6000 = "iCHOv1.xml"
MODEL_10000 = "Recon3D.xml"
FOUR_BIGG = [MODEL_1000, MODEL_3000, MODEL_4000, MODEL_6000, MODEL_10000]
print("------------------------------------------------------")
for bigg in FOUR_BIGG:
  print(bigg_df.loc[bigg[:-4]])
  print("------------------------------------------------------")

------------------------------------------------------
gene_count                                  709
reaction_count                             1015
organism            Acinetobacter baumannii AYE
metabolite_count                            888
Name: iCN718, dtype: object
------------------------------------------------------
gene_count                           1343
reaction_count                       2765
organism            Escherichia coli IAI1
metabolite_count                     1968
Name: iECIAI1_1343, dtype: object
------------------------------------------------------
gene_count                                           1027
reaction_count                                       4456
organism            Phaeodactylum tricornutum CCAP 1055/1
metabolite_count                                     2172
Name: iLB1027_lipid, dtype: object
------------------------------------------------------
gene_count                        1766
reaction_count                    6663
organism     

In [7]:
# use Recon3D
# SAMPLE_MODEL = FOUR_BIGG[3]
# fpath = os.path.join(cn.BIGG_DIR, SAMPLE_MODEL)
# simple = SimpleSBML()
# simple.initialize(fpath)

In [8]:
def solveMILP(mat):
  prob = LpProblem("Finding_Unconserved_Metabolites", LpMaximize)
  species = list(mat.index)
  species_inclusion = pulp.LpVariable.dicts("species", species, cat="Binary")
  species_mass = pulp.LpVariable.dicts("mass", species, cat="Continuous")
  # objective function (to maximize the number of species)
  prob += lpSum([species_inclusion[i] for i in species])
  # constraint 1 (for each reaction, the sum(stoichiometry[i]*mass[i])=0)
  for reaction in mat.columns:
    prob += lpSum(sum([mat[reaction][species]*species_mass[species] for species in species_inclusion])) == 0
  # constraint 2 (species_inclusion is less than or equal to mass of each species)
  for species in species_mass.keys():
    prob += species_inclusion[species] <= species_mass[species]
  prob.solve()
  return prob
def getUnconservedMetabolites(milp_result):
  unconserved_metabolites = []
  for v in milp_result.variables():
    if v.varValue==0:
      if v.name[:7]=="species":
        unconserved_metabolites.append(v.name[8:])
  return unconserved_metabolites

In [9]:
# milp_start = time.time()
# s = StoichiometryMatrix(simple)
# milp_result = solveMILP(s.stoichiometry_matrix)
# unconserved_metabolites = getUnconservedMetabolites(milp_result)
# milp_end = time.time()
# milp_time = milp_end - milp_start
# print("MILP time: %f" % milp_time)

In [10]:
def analyzeBIGG(model_file):
  simple = SimpleSBML()
  simple.initialize(os.path.join(cn.BIGG_DIR, model_file))
  print("Model: %s" % model_file)
  xgames_start = time.time()
  m = GAMES_PP(simple)
  res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
  xgames_end = time.time()
  xgames_time = xgames_end - xgames_start
  print("xGAMES time: %f" % xgames_time)
  if res:
    print("xGAMES found an error!")
  else:
    print("xGAMES didn't find an error!")
  lp_start = time.time()
  s = StoichiometryMatrix(simple)
  lp_res = s.isConsistent()
  lp_end = time.time()
  lp_time = lp_end - lp_start
  print("LP time: %f" % lp_time)
  if s.result.status!=0:
    print("LP found an error!")
  milp_start = time.time()
  milp_result = solveMILP(s.stoichiometry_matrix)
  unconserved_metabolites = getUnconservedMetabolites(milp_result)
  milp_end = time.time()
  milp_time = milp_end - milp_start
  print("MILP time: %f" % milp_time)
  return m, s, milp_result, unconserved_metabolites

In [11]:
def analyzeByGAMES(model_file):
  simple = SimpleSBML()
  simple.initialize(os.path.join(cn.BIGG_DIR, model_file))
  print("Model: %s" % model_file)
  xgames_start = time.time()
  m = GAMES_PP(simple)
  res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
  xgames_end = time.time()
  xgames_time = xgames_end - xgames_start
  print("xGAMES time: %f" % xgames_time)
  if res:
    print("xGAMES found an error!")
  else:
    print("xGAMES didn't find an error!")
  return m, xgames_time

In [12]:
def getISS(m_games):
  result = []
  gr = GAMESReport(m_games)
  operation_df = gr.getOperationMatrix()
  if m_games.echelon_errors:
    for reaction in m_games.echelon_errors:
      operation_series = operation_df.T[reaction.label]
      result.append(len(operation_series.to_numpy().nonzero()[0]))
  if m_games.type_three_errors:
    for reaction in m_games.echelon_errors:
      operation_series = operation_df.T[reaction.label]
      result.append(len(operation_series.to_numpy().nonzero()[0]))
  print(result)
  return(result)

In [14]:
# m1_1000.echelon_errors

In [15]:
# er = m2_1000.echelon_errors[0]
# gr = GAMESReport(m2_1000)
# operation_df = gr.getOperationMatrix()
# sere = operation_df.T[er.label]
# len(sere.to_numpy().nonzero()[0])

In [16]:
m1_1000, time1_1000 = analyzeByGAMES(MODEL_1000)
m2_1000, time2_1000 = analyzeByGAMES(MODEL_1000)
m3_1000, time3_1000 = analyzeByGAMES(MODEL_1000)

Model: iCN718.xml
xGAMES time: 99.683036
xGAMES found an error!
Model: iCN718.xml
xGAMES time: 55.920234
xGAMES found an error!
Model: iCN718.xml
xGAMES time: 97.075783
xGAMES found an error!


In [17]:
ave_1000_time = (time1_1000 + time2_1000 + time3_1000)/3
"average games time for 1000-reaction model: %.02f" % ave_1000_time

'average games time for 1000-reaction model: 84.23'

In [18]:
m1_1000_ris = getISS(m1_1000)
m2_1000_ris = getISS(m2_1000)
m3_1000_ris = getISS(m3_1000)
#calculate mean of each run, and mean of all three

[160, 164, 176, 170, 189, 164, 294, 159, 294, 158, 164, 281, 221, 281, 197, 230, 219, 215, 215, 217, 194, 194, 191, 188, 181, 166, 167, 167, 167, 167, 172, 166, 163, 163, 160, 162, 162, 163, 162, 158, 141, 151, 153, 138, 150, 151, 145, 104, 110, 103, 160, 164, 176, 170, 189, 164, 294, 159, 294, 158, 164, 281, 221, 281, 197, 230, 219, 215, 215, 217, 194, 194, 191, 188, 181, 166, 167, 167, 167, 167, 172, 166, 163, 163, 160, 162, 162, 163, 162, 158, 141, 151, 153, 138, 150, 151, 145, 104, 110, 103]
[111, 113]
[113, 234, 125, 100, 238, 125, 138, 121, 106, 137, 124, 125, 138, 236, 125, 200, 238, 121, 235, 237, 233, 224, 121, 223, 199, 199, 201, 199, 138, 138, 138, 121, 136, 138, 126, 130, 125, 126, 124, 124, 124, 123, 123, 124, 125, 123, 114, 99, 116, 120, 106, 113, 113, 100, 99, 89, 93, 93, 92, 92, 85, 82, 113, 234, 125, 100, 238, 125, 138, 121, 106, 137, 124, 125, 138, 236, 125, 200, 238, 121, 235, 237, 233, 224, 121, 223, 199, 199, 201, 199, 138, 138, 138, 121, 136, 138, 126, 130, 125, 1

In [28]:
(np.mean(m1_1000_ris) + np.mean(m2_1000_ris) + np.mean(m3_1000_ris))/3

143.76602150537633

In [30]:
(len(m1_1000_ris) + len(m2_1000_ris) + len(m3_1000_ris)) / 3

75.33333333333333

In [65]:
m2_1000.error_summary

[ErrorSummary(type='echelon', errors=[R_ASPO1:  -> 3.07 {M_2obut_c=M_2obut_e} + 1.55 {M_2mcit_c=M_micit_c} + 2.32 {M_ara5p_c=M_r1p_c=M_r5p_c=M_ru5p__D_c=M_xu5p__D_c=M_xyl__D_c=M_xyl__D_e} + 0.59 {M_2hyoxplac_c=M_34dhpha_c} + 7.31 {M_no2_c=M_no2_e}, R_AIRC1:  -> 0.01 {M_2obut_c=M_2obut_e} + 0.00 {M_2mcit_c=M_micit_c} + 0.01 {M_no2_c=M_no2_e}, R_FUMt2r:  -> 0.00 {M_2obut_c=M_2obut_e} + 0.00 {M_no2_c=M_no2_e}, R_ACLSb:  -> 0.00 {M_2obut_c=M_2obut_e} + 0.00 {M_ara5p_c=M_r1p_c=M_r5p_c=M_ru5p__D_c=M_xu5p__D_c=M_xyl__D_c=M_xyl__D_e} + 0.00 {M_no2_c=M_no2_e}])]

In [20]:
m1_3000, time1_3000 = analyzeByGAMES(MODEL_3000)
m2_3000, time2_3000 = analyzeByGAMES(MODEL_3000)
m3_3000, time3_3000 = analyzeByGAMES(MODEL_3000)

Model: iECIAI1_1343.xml
xGAMES time: 864.426708
xGAMES found an error!
Model: iECIAI1_1343.xml
xGAMES time: 889.898211
xGAMES found an error!
Model: iECIAI1_1343.xml
xGAMES time: 838.926634
xGAMES found an error!


In [31]:
ave_3000_time = (time1_3000 + time2_3000 + time3_3000)/3
"average games time for 1000-reaction model: %.02f" % ave_3000_time

'average games time for 1000-reaction model: 864.42'

In [23]:
m1_3000_ris = getISS(m1_3000)
m2_3000_ris = getISS(m2_3000)
m3_3000_ris = getISS(m3_3000)
(np.mean(m1_3000_ris) + np.mean(m2_3000_ris) + np.mean(m2_3000_ris))/3

[505, 505, 517, 483, 505, 484, 507, 506, 503, 506, 443, 500, 501, 510, 501, 505, 484, 514, 370, 501, 506, 501, 483, 497, 500, 498, 502, 443, 502, 500, 502, 501, 501, 497, 499, 498, 485, 500, 501, 500, 499, 500, 501, 461, 499, 503, 503, 502, 501, 502, 499, 500, 500, 499, 504, 503, 502, 500, 501, 500, 502, 498, 501, 497, 497, 499, 499, 501, 500, 498, 498, 499, 500, 497, 497, 497, 501, 512, 500, 499, 497, 497, 490, 489, 483, 488, 483, 476, 484, 484, 488, 484, 461, 482, 478, 463, 464, 460, 461, 460, 460, 461, 461, 466, 465, 459, 463, 464, 462, 460, 461, 458, 458, 458, 458, 458, 458, 457, 454, 462, 457, 458, 454, 452, 448, 449, 450, 447, 450, 452, 449, 450, 451, 450, 449, 444, 444, 447, 442, 444, 445, 446, 447, 445, 443, 443, 442, 444, 447, 444, 445, 445, 431, 429, 428, 419, 409, 406, 402, 397, 397, 393, 391, 386, 385, 387, 387, 385, 386, 383, 377, 369, 371, 369, 366, 361, 356, 353, 341, 339, 505, 505, 517, 483, 505, 484, 507, 506, 503, 506, 443, 500, 501, 510, 501, 505, 484, 514, 370, 501,

487.16949891067543

In [32]:
(len(m1_3000_ris) + len(m2_3000_ris) + len(m3_3000_ris)) / 3

282.0

In [21]:
# m1_4000, time1_4000 = analyzeByGAMES(MODEL_4000)
# m2_4000, time2_4000 = analyzeByGAMES(MODEL_4000)
# m3_4000, time3_4000 = analyzeByGAMES(MODEL_4000)

Model: iLB1027_lipid.xml
xGAMES time: 1646.834592
xGAMES didn't find an error!
Model: iLB1027_lipid.xml
xGAMES time: 1653.936392
xGAMES didn't find an error!
Model: iLB1027_lipid.xml
xGAMES time: 1661.339419
xGAMES didn't find an error!


In [24]:
m1_6000, time1_6000 = analyzeByGAMES(MODEL_6000)

Model: iCHOv1.xml
xGAMES time: 10228.406311
xGAMES found an error!


In [25]:
m2_6000, time2_6000 = analyzeByGAMES(MODEL_6000)

Model: iCHOv1.xml
xGAMES time: 10203.272835
xGAMES found an error!


In [26]:
m3_6000, time3_6000 = analyzeByGAMES(MODEL_6000)

Model: iCHOv1.xml
xGAMES time: 10777.770320
xGAMES found an error!


In [27]:
# m4_6000, time4_6000 = analyzeByGAMES(MODEL_6000)

In [33]:
ave_6000_time = (time1_6000 + time2_6000 + time3_6000)/3
"average games time for 1000-reaction model: %.02f" % ave_6000_time

'average games time for 1000-reaction model: 10403.15'

In [86]:
# ave_6000_time = (10206.073371 + 7281.181085 + 10132.363248)/3
# "average games time for 1000-reaction model: %.02f" % ave_6000_time

'average games time for 1000-reaction model: 9206.54'

In [35]:
m1_6000_ris = getISS(m1_6000)
m2_6000_ris = getISS(m2_6000)
m3_6000_ris = getISS(m3_6000)
(np.mean(m1_6000_ris) + np.mean(m2_6000_ris) + np.mean(m3_6000_ris))/3

[476, 513, 512, 578, 514, 513, 510, 510, 485, 427, 401, 485, 644, 483, 620, 483, 437, 510, 487, 429, 557, 441, 551, 546, 535, 528, 521, 519, 520, 514, 514, 509, 510, 510, 509, 509, 509, 510, 508, 508, 508, 502, 489, 483, 483, 483, 476, 480, 480, 481, 477, 476, 476, 476, 468, 464, 457, 457, 452, 447, 444, 447, 441, 442, 447, 443, 441, 442, 441, 436, 430, 427, 426, 426, 416, 407, 406, 400, 400, 400, 400, 399]
[375, 482, 515, 535, 514, 507, 437, 496, 558, 439, 438, 380, 438, 561, 559, 559, 550, 537, 537, 534, 535, 534, 538, 533, 509, 509, 509, 509, 508, 507, 507, 508, 512, 512, 503, 498, 496, 496, 496, 498, 492, 492, 488, 488, 489, 488, 481, 481, 482, 483, 472, 474, 469, 469, 442, 443, 442, 438, 438, 439, 438, 437, 436, 435, 431, 435, 407, 385, 379, 379, 379]
[550, 517, 548, 544, 544, 513, 505, 529, 465, 542, 333, 407, 542, 545, 542, 503, 541, 467, 407, 542, 533, 542, 519, 515, 405, 545, 552, 523, 524, 515, 545, 405, 535, 406, 534, 514, 544, 522, 366, 516, 516, 547, 405, 546, 400, 405, 53

475.8008496806868

In [36]:
(len(m1_6000_ris) + len(m2_6000_ris) + len(m3_6000_ris)) / 3

153.0

In [13]:
# games_1000 = analyzeByGAMES(MODEL_1000)

Model: iCN718.xml
xGAMES time: 55.211241
xGAMES found an error!


In [37]:
# print(games_1000.error_summary)

[ErrorSummary(type='echelon', errors=[R_MTHFC_1: {M_h_c} -> , R_DNADDP:  -> 0.47 {M_2obut_c=M_2obut_e} + 0.06 {M_2mcit_c=M_micit_c}, R_ASPO6:  -> 0.35 {M_2mcit_c=M_micit_c}])]


In [97]:
# gr_1000 = GAMESReport(games_1000)

In [98]:
# print(gr_1000.reportEchelonError([games_1000.echelon_errors[0]], explain_details=True)[0])


We detected a mass imbalance
: M_h_c -> 

from the following reaction isolation set.

1. R_MTHFC: M_h2o_c + M_methf_c -> M_10fthf_c + M_h_c
2. R_MTHFC_1: M_h2o_c + M_methf_c -> M_10fthf_c

----------------------------------------------------------------------
----------------------------------------------------------------------

These uni-uni reactions created mass-equivalence.
(The chemical species within a curly bracket have the same atomic mass.)

{M_h2o_c=M_h2o_e} is inferred by:
3. R_H2Ot: M_h2o_e -> M_h2o_c

----------------------------------------------------------------------

Based on the uni-uni reactions above, we create mass-equivalent pseudo reactions.

(pseudo 1.) R_MTHFC: {M_methf_c} + {M_h2o_c=M_h2o_e} -> {M_10fthf_c} + {M_h_c}
(pseudo 2.) R_MTHFC_1: {M_methf_c} + {M_h2o_c=M_h2o_e} -> {M_10fthf_c}

----------------------------------------------------------------------

An operation between the pseudo reactions:
-1.00 * R_MTHFC + 1.00 * R_MTHFC_1

will result in empty 

In [17]:
# simple = SimpleSBML()
# simple.initialize(os.path.join(cn.BIGG_DIR, MODEL_1000))
# simple.getReaction("R_TRILIP")

R_TRILIP: M_tg160_c -> M_12dgr160_c + 0.04 M_bhdodec_c + 0.01 M_decacid_c + 0.05 M_dodecacid_c + 0.02 M_hepadecacid_c + 0.02 M_hepedecacid_c + 0.28 M_hexadecacid_c + 0.19 M_hexedecacid_c + 0.01 M_octadecacid_c + 0.38 M_octedecacid_c + 0.00 M_pendecacid_c + 0.00 M_tetdecacid_c

In [19]:
# simple.getReaction("R_BIWLIP")

R_BIWLIP: M_coa_c + M_tg160_c -> M_12dgr160_c + 0.04 M_bhdodec_c + 0.01 M_decacid_c + 0.05 M_dodecacid_c + 0.02 M_hepadecacid_c + 0.02 M_hepedecacid_c + 0.28 M_hexadecacid_c + 0.19 M_hexedecacid_c + 0.01 M_octadecacid_c + 0.38 M_octedecacid_c + 0.00 M_pendecacid_c + 0.00 M_tetdecacid_c

In [99]:
# games_10000, time_10000 = analyzeByGAMES(MODEL_10000)

Model: Recon3D.xml
xGAMES time: 23622.311754
xGAMES didn't find an error!


In [100]:
# m_10000_ris = getISS(games_10000)
# np.mean(m_10000_ris)

[]


nan

In [21]:
m_1000, s_1000, milp_result_1000, unconserved_metabolites_1000 = analyzeBIGG(MODEL_1000)

Model: iCN718.xml
xGAMES time: 108.660436
xGAMES found an error!




LP time: 2.030797
LP found an error!
MILP time: 23.239786


In [61]:
m_4000, s_4000, milp_result_4000, unconserved_metabolites_4000 = analyzeBIGG(MODEL_4000)

Model: iLB1027_lipid.xml
xGAMES time: 1626.331340
xGAMES found an error!
LP time: 272.211592
LP found an error!
MILP time: 285.884376


In [62]:
m_6000, s_6000, milp_result_6000, unconserved_metabolites_6000 = analyzeBIGG(MODEL_6000)

Model: iCHOv1.xml
xGAMES time: 7200.678418
xGAMES found an error!
LP time: 759.267629
LP found an error!
MILP time: 796.065499


In [68]:
len(unconserved_metabolites_6000)

2370

In [71]:
print(s_4000.result.status)
print(s_6000.result.status)

2
2


In [184]:
pulp.cplex_dll_path

'/usr/ilog/cplex/bin/x86_rhel4.0_3.4/libcplex110.so'