In [1]:
# BiGG analysis - 
# still need to run the last time. from m_1000 to m_10000

import init
from SBMLLint.common import constants as cn
from SBMLLint.common.molecule import Molecule, MoleculeStoichiometry
from SBMLLint.common import simple_sbml
from SBMLLint.common.reaction import Reaction
from SBMLLint.tools import sbmllint
from SBMLLint.tools import print_reactions

import os
import numpy as np
import pandas as pd
from pulp import *
import matplotlib.pyplot as plt
import time

from scipy.linalg import lu, inv
from scipy.optimize import linprog

In [2]:
from games_setup import *
from SBMLLint.common import constants as cn
from SBMLLint.common.simple_sbml import SimpleSBML
from SBMLLint.common.stoichiometry_matrix import StoichiometryMatrix

from SBMLLint.games.som import SOM
from SBMLLint.games.games_pp import GAMES_PP, SOMStoichiometry, SOMReaction, TOLERANCE
from SBMLLint.games.games_report import GAMESReport, SimplifiedReaction

Current Directory: /Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebooks


In [3]:
os.getcwd()

'/Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebooks'

In [4]:
# Load relevant bigg models
# bigg constants
BIGG_RESULTS = "results"
BIGG_ID = "bigg_id"
BIGG_GENECOUNT = "gene_count"
BIGG_REACTION_COUNT = "reaction_count"
BIGG_ORGANISM = "organism"
BIGG_METABOLITE_COUNT = "metabolite_count"
import requests
res = requests.get('http://bigg.ucsd.edu/api/v2/models')
bigg_models = res.json()[BIGG_RESULTS]
bigg_ids = [bigg[BIGG_ID] for bigg in bigg_models]
print("number of BiGG models: %d" % len(bigg_models))
bigg_models[:1]

number of BiGG models: 108


[{'bigg_id': 'e_coli_core',
  'gene_count': 137,
  'reaction_count': 95,
  'organism': 'Escherichia coli str. K-12 substr. MG1655',
  'metabolite_count': 72}]

In [5]:
bigg_df = pd.DataFrame(bigg_models).set_index(BIGG_ID)
bigg_df.sort_values(by=BIGG_REACTION_COUNT, 
                    inplace=True)
bigg_df.head(30)

Unnamed: 0_level_0,gene_count,reaction_count,organism,metabolite_count
bigg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
e_coli_core,137,95,Escherichia coli str. K-12 substr. MG1655,72
iAB_RBC_283,346,469,Homo sapiens,342
iIS312,312,519,Trypanosoma cruzi Dm28c,606
iIS312_Amastigote,312,519,Trypanosoma cruzi Dm28c,606
iIS312_Epimastigote,312,519,Trypanosoma cruzi Dm28c,606
iIS312_Trypomastigote,312,520,Trypanosoma cruzi Dm28c,606
iIT341,339,554,Helicobacter pylori 26695,485
iLJ478,482,652,Thermotoga maritima MSB8,570
iAF692,692,690,Methanosarcina barkeri str. Fusaro,628
iSB619,619,743,Staphylococcus aureus subsp. aureus N315,655


In [6]:
# define four bigg models depending on size
MODEL_1000 = "iCN718.xml"
MODEL_3000 = "iECIAI1_1343.xml"
MODEL_4000 = "iLB1027_lipid.xml"
MODEL_6000 = "iCHOv1.xml"
MODEL_10000 = "Recon3D.xml"
FOUR_BIGG = [MODEL_1000, MODEL_3000, MODEL_4000, MODEL_6000, MODEL_10000]
print("------------------------------------------------------")
for bigg in FOUR_BIGG:
  print(bigg_df.loc[bigg[:-4]])
  print("------------------------------------------------------")

------------------------------------------------------
gene_count                                  709
reaction_count                             1015
organism            Acinetobacter baumannii AYE
metabolite_count                            888
Name: iCN718, dtype: object
------------------------------------------------------
gene_count                           1343
reaction_count                       2765
organism            Escherichia coli IAI1
metabolite_count                     1968
Name: iECIAI1_1343, dtype: object
------------------------------------------------------
gene_count                                           1027
reaction_count                                       4456
organism            Phaeodactylum tricornutum CCAP 1055/1
metabolite_count                                     2172
Name: iLB1027_lipid, dtype: object
------------------------------------------------------
gene_count                        1766
reaction_count                    6663
organism     

In [7]:
# use Recon3D
# SAMPLE_MODEL = FOUR_BIGG[3]
# fpath = os.path.join(cn.BIGG_DIR, SAMPLE_MODEL)
# simple = SimpleSBML()
# simple.initialize(fpath)

In [8]:
def solveMILP(mat):
  prob = LpProblem("Finding_Unconserved_Metabolites", LpMaximize)
  species = list(mat.index)
  species_inclusion = pulp.LpVariable.dicts("species", species, cat="Binary")
  species_mass = pulp.LpVariable.dicts("mass", species, cat="Continuous")
  # objective function (to maximize the number of species)
  prob += lpSum([species_inclusion[i] for i in species])
  # constraint 1 (for each reaction, the sum(stoichiometry[i]*mass[i])=0)
  for reaction in mat.columns:
    prob += lpSum(sum([mat[reaction][species]*species_mass[species] for species in species_inclusion])) == 0
  # constraint 2 (species_inclusion is less than or equal to mass of each species)
  for species in species_mass.keys():
    prob += species_inclusion[species] <= species_mass[species]
  prob.solve()
  return prob
def getUnconservedMetabolites(milp_result):
  unconserved_metabolites = []
  for v in milp_result.variables():
    if v.varValue==0:
      if v.name[:7]=="species":
        unconserved_metabolites.append(v.name[8:])
  return unconserved_metabolites

In [9]:
# milp_start = time.time()
# s = StoichiometryMatrix(simple)
# milp_result = solveMILP(s.stoichiometry_matrix)
# unconserved_metabolites = getUnconservedMetabolites(milp_result)
# milp_end = time.time()
# milp_time = milp_end - milp_start
# print("MILP time: %f" % milp_time)

In [10]:
def analyzeBIGG(model_file):
  simple = SimpleSBML()
  simple.initialize(os.path.join(cn.BIGG_DIR, model_file))
  print("Model: %s" % model_file)
  xgames_start = time.time()
  m = GAMES_PP(simple)
  res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
  xgames_end = time.time()
  xgames_time = xgames_end - xgames_start
  print("xGAMES time: %f" % xgames_time)
  if res:
    print("xGAMES found an error!")
  else:
    print("xGAMES didn't find an error!")
  lp_start = time.time()
  s = StoichiometryMatrix(simple)
  lp_res = s.isConsistent()
  lp_end = time.time()
  lp_time = lp_end - lp_start
  print("LP time: %f" % lp_time)
  if s.result.status!=0:
    print("LP found an error!")
  milp_start = time.time()
  milp_result = solveMILP(s.stoichiometry_matrix)
  unconserved_metabolites = getUnconservedMetabolites(milp_result)
  milp_end = time.time()
  milp_time = milp_end - milp_start
  print("MILP time: %f" % milp_time)
  return m, s, milp_result, unconserved_metabolites

In [11]:
def analyzeByGAMES(model_file):
  simple = SimpleSBML()
  simple.initialize(os.path.join(cn.BIGG_DIR, model_file))
  print("Model: %s" % model_file)
  xgames_start = time.time()
  m = GAMES_PP(simple)
  res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
  xgames_end = time.time()
  xgames_time = xgames_end - xgames_start
  print("xGAMES time: %f" % xgames_time)
  if res:
    print("xGAMES found an error!")
  else:
    print("xGAMES didn't find an error!")
  return m, xgames_time

In [12]:
def getRIS(m_games):
  result = []
  gr = GAMESReport(m_games)
  operation_df = gr.getOperationMatrix()
  if m_games.echelon_errors:
    for reaction in m_games.echelon_errors:
      operation_series = operation_df.T[reaction.label]
      result.append(len(operation_series.to_numpy().nonzero()[0]))
  if m_games.type_three_errors:
    for reaction in m_games.type_three_errors:
      operation_series = operation_df.T[reaction.label]
      result.append(len(operation_series.to_numpy().nonzero()[0]) + 1)
  print(result)
  return(result)

In [13]:
m1_1000, time1_1000 = analyzeByGAMES(MODEL_1000)
m2_1000, time2_1000 = analyzeByGAMES(MODEL_1000)
m3_1000, time3_1000 = analyzeByGAMES(MODEL_1000)

Model: iCN718.xml
xGAMES time: 55.397146
xGAMES found an error!
Model: iCN718.xml
xGAMES time: 54.934188
xGAMES found an error!
Model: iCN718.xml
xGAMES time: 58.113262
xGAMES found an error!


In [14]:
ave_1000_time = (time1_1000 + time2_1000 + time3_1000)/3
"average games time for 1000-reaction model: %.02f" % ave_1000_time

'average games time for 1000-reaction model: 56.15'

In [15]:
m1_1000_ris = getRIS(m1_1000)
m2_1000_ris = getRIS(m2_1000)
m3_1000_ris = getRIS(m3_1000)
#calculate mean of each run, and mean of all three

[63]
[106]
[2, 121]


In [16]:
(np.mean(m1_1000_ris) + np.mean(m2_1000_ris) + np.mean(m3_1000_ris))/3

76.83333333333333

In [17]:
(len(m1_1000_ris) + len(m2_1000_ris) + len(m3_1000_ris)) / 3

1.3333333333333333

In [18]:
m2_1000.error_summary

[ErrorSummary(type='echelon', errors=[R_PTAr:  -> 0.05 {M_ara5p_c=M_r1p_c=M_r5p_c=M_ru5p__D_c=M_xu5p__D_c=M_xyl__D_c=M_xyl__D_e}])]

In [19]:
m1_3000, time1_3000 = analyzeByGAMES(MODEL_3000)
m2_3000, time2_3000 = analyzeByGAMES(MODEL_3000)
m3_3000, time3_3000 = analyzeByGAMES(MODEL_3000)

Model: iECIAI1_1343.xml
xGAMES time: 885.113626
xGAMES found an error!
Model: iECIAI1_1343.xml
xGAMES time: 917.250528
xGAMES found an error!
Model: iECIAI1_1343.xml
xGAMES time: 872.143673
xGAMES found an error!


In [20]:
ave_3000_time = (time1_3000 + time2_3000 + time3_3000)/3
"average games time for 1000-reaction model: %.02f" % ave_3000_time

'average games time for 1000-reaction model: 891.50'

In [21]:
m1_3000_ris = getRIS(m1_3000)
m2_3000_ris = getRIS(m2_3000)
m3_3000_ris = getRIS(m3_3000)
(np.mean(m1_3000_ris) + np.mean(m2_3000_ris) + np.mean(m2_3000_ris))/3

[667, 560, 566, 568, 559, 641, 641, 567, 567, 578, 568, 568, 568, 667, 623, 568, 623, 667, 559, 566, 568, 678, 671, 621, 660, 577, 566, 567, 623, 623, 623, 558, 667, 670, 608, 626, 626, 625, 569, 568, 649, 566, 568, 664, 626, 627, 567, 625, 561, 569, 566, 568, 569, 388, 576, 568, 651, 621, 646, 566, 625, 576, 622, 568, 567, 686, 558, 623, 647, 566, 671, 671, 675, 671, 670, 670, 669, 663, 664, 670, 662, 665, 662, 662, 662, 656, 657, 658, 623, 647, 623, 628, 647, 566, 627, 623, 655, 645, 623, 623, 642, 644, 644, 645, 643, 642, 643, 643, 638, 638, 646, 638, 638, 625, 638, 650, 646, 631, 641, 644, 626, 626, 625, 625, 641, 626, 610, 627, 623, 628, 623, 622, 623, 623, 621, 623, 623, 623, 623, 623, 623, 626, 623, 623, 566, 624, 624, 566, 624, 623, 625, 623, 623, 625, 624, 624, 610, 562, 625, 627, 623, 609, 624, 624, 624, 623, 624, 625, 567, 576, 624, 567, 623, 630, 623, 561, 623, 623, 624, 624, 621, 622, 629, 622, 623, 622, 623, 622, 622, 621, 621, 620, 623, 612, 616, 611, 608, 607, 609, 607,

555.2770219718137

In [22]:
(len(m1_3000_ris) + len(m2_3000_ris) + len(m3_3000_ris)) / 3

213.0

In [21]:
# m1_4000, time1_4000 = analyzeByGAMES(MODEL_4000)
# m2_4000, time2_4000 = analyzeByGAMES(MODEL_4000)
# m3_4000, time3_4000 = analyzeByGAMES(MODEL_4000)

Model: iLB1027_lipid.xml
xGAMES time: 1646.834592
xGAMES didn't find an error!
Model: iLB1027_lipid.xml
xGAMES time: 1653.936392
xGAMES didn't find an error!
Model: iLB1027_lipid.xml
xGAMES time: 1661.339419
xGAMES didn't find an error!


In [23]:
m1_6000, time1_6000 = analyzeByGAMES(MODEL_6000)

Model: iCHOv1.xml
xGAMES time: 10967.770757
xGAMES found an error!


In [24]:
m2_6000, time2_6000 = analyzeByGAMES(MODEL_6000)

Model: iCHOv1.xml
xGAMES time: 10224.564604
xGAMES found an error!


In [25]:
m3_6000, time3_6000 = analyzeByGAMES(MODEL_6000)

Model: iCHOv1.xml
xGAMES time: 10545.827261
xGAMES found an error!


In [26]:
# m4_6000, time4_6000 = analyzeByGAMES(MODEL_6000)

In [37]:
ave_6000_time = (time1_6000 + time2_6000 + time3_6000)/3
"average games time for 1000-reaction model: %.02f hours" % (ave_6000_time/3600)

'average games time for 1000-reaction model: 2.94 hours'

In [28]:
# ave_6000_time = (10206.073371 + 7281.181085 + 10132.363248)/3
# "average games time for 1000-reaction model: %.02f" % ave_6000_time

In [29]:
m1_6000_ris = getRIS(m1_6000)
m2_6000_ris = getRIS(m2_6000)
m3_6000_ris = getRIS(m3_6000)
(np.mean(m1_6000_ris) + np.mean(m2_6000_ris) + np.mean(m3_6000_ris))/3

[312, 520, 520, 511, 521, 540, 522, 346, 522, 499, 428, 544, 537, 576, 591, 358, 512, 537, 587, 548, 543, 525, 543, 520, 537, 537, 538, 546, 546, 549, 540, 428, 467, 474, 543, 478, 420, 473, 589, 586, 584, 583, 513, 581, 476, 475, 510, 476, 476, 544, 522, 484, 475, 537, 581, 581, 504, 427, 580, 428, 538, 358, 537, 583, 508, 576, 584, 576, 581, 505, 589, 534, 548, 548, 577, 480, 432, 475, 582, 581, 478, 542, 580, 582, 509, 583, 578, 470, 547, 540, 548, 581, 575, 578, 585, 581, 575, 427, 573, 586, 580, 580, 477, 522, 477, 477, 510, 579, 580, 538, 576, 577, 507, 579, 448, 428, 584, 583, 579, 581, 573, 576, 579, 579, 582, 531, 571, 573, 584, 574, 572, 571, 570, 570, 571, 572, 506, 570, 571, 569, 533, 535, 532, 537, 534, 532, 532, 530, 534, 536, 354, 527, 520, 515, 520, 505, 504, 504, 499, 503, 500, 503, 503, 501, 503, 501, 506, 497, 491, 491, 490, 484, 482, 483, 472, 474, 474, 479, 474, 474, 473, 479, 473, 471, 471, 471, 471, 473, 473, 471, 489, 470, 471, 469, 468, 468, 469, 469, 466, 467,

468.62434201166593

In [30]:
(len(m1_6000_ris) + len(m2_6000_ris) + len(m3_6000_ris)) / 3

235.66666666666666

In [2]:
# games_1000 = analyzeByGAMES(MODEL_1000)

In [3]:
# print(games_1000.error_summary)

In [4]:
# gr_1000 = GAMESReport(games_1000)

In [5]:
# print(gr_1000.reportEchelonError([games_1000.echelon_errors[0]], explain_details=True)[0])

In [6]:
# simple = SimpleSBML()
# simple.initialize(os.path.join(cn.BIGG_DIR, MODEL_1000))
# simple.getReaction("R_TRILIP")

In [31]:
# Finally, Recon3D
m1_10000, time_10000 = analyzeByGAMES(MODEL_10000)

Model: Recon3D.xml
xGAMES time: 24936.693614
xGAMES found an error!


In [38]:
"average games time for Recon3D model: %.02f hours" % (time_10000/3600)

'average games time for Recon3D model: 6.93 hours'

In [32]:
m_10000_ris = getRIS(m1_10000)

[842, 855, 859, 861, 860, 865, 860, 857, 861, 859, 859, 3, 3, 865, 864, 854, 858, 853, 864, 863, 860, 867, 861, 864, 860, 855, 840, 859, 818, 861, 865, 864, 861, 860, 863, 862, 861, 860, 860, 789, 863, 860, 857, 851, 857, 862, 860, 865, 802, 864, 703, 865, 807, 847, 851, 863, 860, 862, 861, 855, 866, 862, 845, 861, 858, 883, 860, 861, 865, 861, 861, 861, 859, 863, 866, 871, 870, 861, 863, 859, 858, 857, 860, 860, 865, 862, 853, 867, 851, 806, 858, 857, 876, 882, 861, 813, 859, 854, 865, 862, 864, 866, 858, 817, 682, 858, 847, 858, 879, 861, 853, 856, 863, 864, 861, 860, 864, 861, 860, 860, 861, 864, 860, 865, 866, 865, 860, 863, 862, 858, 857, 861, 866, 865, 867, 839, 866, 856, 863, 856, 865, 864, 862, 857, 862, 859, 855, 848, 789, 867, 863, 863, 878, 864, 633, 866, 860, 855, 860, 857, 855, 861, 860, 864, 865, 861, 864, 860, 860, 859, 861, 860, 861, 860, 842, 857, 860, 805, 870, 860, 858, 863, 854, 843, 875, 838, 865, 681, 860, 857, 783, 864, 865, 860, 866, 865, 864, 635, 637, 862, 863

In [35]:
print(len(m_10000_ris))
np.mean(m_10000_ris)

528


837.5757575757576

In [21]:
m_1000, s_1000, milp_result_1000, unconserved_metabolites_1000 = analyzeBIGG(MODEL_1000)

Model: iCN718.xml
xGAMES time: 108.660436
xGAMES found an error!




LP time: 2.030797
LP found an error!
MILP time: 23.239786


In [61]:
m_4000, s_4000, milp_result_4000, unconserved_metabolites_4000 = analyzeBIGG(MODEL_4000)

Model: iLB1027_lipid.xml
xGAMES time: 1626.331340
xGAMES found an error!
LP time: 272.211592
LP found an error!
MILP time: 285.884376


In [62]:
m_6000, s_6000, milp_result_6000, unconserved_metabolites_6000 = analyzeBIGG(MODEL_6000)

Model: iCHOv1.xml
xGAMES time: 7200.678418
xGAMES found an error!
LP time: 759.267629
LP found an error!
MILP time: 796.065499


In [68]:
len(unconserved_metabolites_6000)

2370

In [71]:
print(s_4000.result.status)
print(s_6000.result.status)

2
2
