In [1]:
# BiGG analysis - 
# before doing that need to do two things:
# 1. finalize roundup
# 2. test coverage for biomodels
# 3. "Reaction Isolation Set"
# 4. check (text) reporting threshold
# 5. change Report

import init
from SBMLLint.common import constants as cn
from SBMLLint.common.molecule import Molecule, MoleculeStoichiometry
from SBMLLint.common import simple_sbml
from SBMLLint.common.reaction import Reaction
from SBMLLint.tools import sbmllint
from SBMLLint.tools import print_reactions

import os
import numpy as np
import pandas as pd
from pulp import *
import matplotlib.pyplot as plt
import time

from scipy.linalg import lu, inv
from scipy.optimize import linprog

In [2]:
from games_setup import *
from SBMLLint.common import constants as cn
from SBMLLint.common.simple_sbml import SimpleSBML
from SBMLLint.common.stoichiometry_matrix import StoichiometryMatrix

from SBMLLint.games.som import SOM
from SBMLLint.games.games_pp import GAMES_PP, SOMStoichiometry, SOMReaction, TOLERANCE
from SBMLLint.games.games_report import GAMESReport, SimplifiedReaction

Current Directory: /Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebooks


In [3]:
os.getcwd()

'/Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebooks'

In [4]:
# Load relevant bigg models
# bigg constants
BIGG_RESULTS = "results"
BIGG_ID = "bigg_id"
BIGG_GENECOUNT = "gene_count"
BIGG_REACTION_COUNT = "reaction_count"
BIGG_ORGANISM = "organism"
BIGG_METABOLITE_COUNT = "metabolite_count"
import requests
res = requests.get('http://bigg.ucsd.edu/api/v2/models')
bigg_models = res.json()[BIGG_RESULTS]
bigg_ids = [bigg[BIGG_ID] for bigg in bigg_models]
print("number of BiGG models: %d" % len(bigg_models))
bigg_models[:1]

number of BiGG models: 108


[{'bigg_id': 'e_coli_core',
  'gene_count': 137,
  'reaction_count': 95,
  'organism': 'Escherichia coli str. K-12 substr. MG1655',
  'metabolite_count': 72}]

In [5]:
bigg_df = pd.DataFrame(bigg_models).set_index(BIGG_ID)
bigg_df.sort_values(by=BIGG_REACTION_COUNT, 
                    inplace=True)
bigg_df.tail(10)

Unnamed: 0_level_0,gene_count,reaction_count,organism,metabolite_count
bigg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
iECW_1372,1372,2782,Escherichia coli W,1973
iWFL_1372,1372,2782,Escherichia coli W,1973
iJN1463,1462,2927,Pseudomonas putida KT2440,2153
iYS1720,1707,3357,Salmonella pan-reactome,2436
iMM1415,1375,3726,Mus musculus,2775
RECON1,1905,3741,Homo sapiens,2766
iCHOv1_DG44,1184,3942,Cricetulus griseus,2751
iLB1027_lipid,1027,4456,Phaeodactylum tricornutum CCAP 1055/1,2172
iCHOv1,1766,6663,Cricetulus griseus,4456
Recon3D,2248,10600,Homo sapiens,5835


In [6]:
# define four bigg models depending on size
MODEL_1000 = "iCN718.xml"
MODEL_4000 = "iLB1027_lipid.xml"
MODEL_6000 = "iCHOv1.xml"
MODEL_10000 = "Recon3D.xml"
FOUR_BIGG = [MODEL_1000, MODEL_4000, MODEL_6000, MODEL_10000]
print("------------------------------------------------------")
for bigg in FOUR_BIGG:
  print(bigg_df.loc[bigg[:-4]])
  print("------------------------------------------------------")

------------------------------------------------------
gene_count                                  709
reaction_count                             1015
organism            Acinetobacter baumannii AYE
metabolite_count                            888
Name: iCN718, dtype: object
------------------------------------------------------
gene_count                                           1027
reaction_count                                       4456
organism            Phaeodactylum tricornutum CCAP 1055/1
metabolite_count                                     2172
Name: iLB1027_lipid, dtype: object
------------------------------------------------------
gene_count                        1766
reaction_count                    6663
organism            Cricetulus griseus
metabolite_count                  4456
Name: iCHOv1, dtype: object
------------------------------------------------------
gene_count                  2248
reaction_count             10600
organism            Homo sapiens
metabolite

In [7]:
# use Recon3D
# SAMPLE_MODEL = FOUR_BIGG[3]
# fpath = os.path.join(cn.BIGG_DIR, SAMPLE_MODEL)
# simple = SimpleSBML()
# simple.initialize(fpath)

In [8]:
def solveMILP(mat):
  prob = LpProblem("Finding_Unconserved_Metabolites", LpMaximize)
  species = list(mat.index)
  species_inclusion = pulp.LpVariable.dicts("species", species, cat="Binary")
  species_mass = pulp.LpVariable.dicts("mass", species, cat="Continuous")
  # objective function (to maximize the number of species)
  prob += lpSum([species_inclusion[i] for i in species])
  # constraint 1 (for each reaction, the sum(stoichiometry[i]*mass[i])=0)
  for reaction in mat.columns:
    prob += lpSum(sum([mat[reaction][species]*species_mass[species] for species in species_inclusion])) == 0
  # constraint 2 (species_inclusion is less than or equal to mass of each species)
  for species in species_mass.keys():
    prob += species_inclusion[species] <= species_mass[species]
  prob.solve()
  return prob
def getUnconservedMetabolites(milp_result):
  unconserved_metabolites = []
  for v in milp_result.variables():
    if v.varValue==0:
      if v.name[:7]=="species":
        unconserved_metabolites.append(v.name[8:])
  return unconserved_metabolites

In [9]:
# milp_start = time.time()
# s = StoichiometryMatrix(simple)
# milp_result = solveMILP(s.stoichiometry_matrix)
# unconserved_metabolites = getUnconservedMetabolites(milp_result)
# milp_end = time.time()
# milp_time = milp_end - milp_start
# print("MILP time: %f" % milp_time)

In [10]:
def analyzeBIGG(model_file):
  simple = SimpleSBML()
  simple.initialize(os.path.join(cn.BIGG_DIR, model_file))
  print("Model: %s" % model_file)
  xgames_start = time.time()
  m = GAMES_PP(simple)
  res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
  xgames_end = time.time()
  xgames_time = xgames_end - xgames_start
  print("xGAMES time: %f" % xgames_time)
  if res:
    print("xGAMES found an error!")
  else:
    print("xGAMES didn't find an error!")
  lp_start = time.time()
  s = StoichiometryMatrix(simple)
  lp_res = s.isConsistent()
  lp_end = time.time()
  lp_time = lp_end - lp_start
  print("LP time: %f" % lp_time)
  if s.result.status!=0:
    print("LP found an error!")
  milp_start = time.time()
  milp_result = solveMILP(s.stoichiometry_matrix)
  unconserved_metabolites = getUnconservedMetabolites(milp_result)
  milp_end = time.time()
  milp_time = milp_end - milp_start
  print("MILP time: %f" % milp_time)
  return m, s, milp_result, unconserved_metabolites

In [11]:
def analyzeByGAMES(model_file):
  simple = SimpleSBML()
  simple.initialize(os.path.join(cn.BIGG_DIR, model_file))
  print("Model: %s" % model_file)
  xgames_start = time.time()
  m = GAMES_PP(simple)
  res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
  xgames_end = time.time()
  xgames_time = xgames_end - xgames_start
  print("xGAMES time: %f" % xgames_time)
  if res:
    print("xGAMES found an error!")
  else:
    print("xGAMES didn't find an error!")
  lp_start = time.time()
  return m

In [12]:
games_1000 = analyzeByGAMES(MODEL_1000)

Model: iCN718.xml
xGAMES time: 56.937363
xGAMES found an error!


In [13]:
print(games_1000.error_summary)

[ErrorSummary(type='echelon', errors=[R_BIWLIP: {M_coa_c} -> , R_GLYO1:  -> 0.60 {M_no2_c=M_no2_e}, R_NADH10: 0.41 {M_no2_c=M_no2_e} -> ])]


In [14]:
gr_1000 = GAMESReport(games_1000)

In [22]:
print(gr_1000.reportEchelonError([games_1000.echelon_errors[0]], explain_details=True)[0])


We detected a mass imbalance
: M_coa_c -> 

from the following isolation set.

1. R_TRILIP: M_tg160_c -> M_12dgr160_c + 0.04 M_bhdodec_c + 0.01 M_decacid_c + 0.05 M_dodecacid_c + 0.02 M_hepadecacid_c + 0.02 M_hepedecacid_c + 0.28 M_hexadecacid_c + 0.19 M_hexedecacid_c + 0.01 M_octadecacid_c + 0.38 M_octedecacid_c + 0.00 M_pendecacid_c + 0.00 M_tetdecacid_c
2. R_BIWLIP: M_coa_c + M_tg160_c -> M_12dgr160_c + 0.04 M_bhdodec_c + 0.01 M_decacid_c + 0.05 M_dodecacid_c + 0.02 M_hepadecacid_c + 0.02 M_hepedecacid_c + 0.28 M_hexadecacid_c + 0.19 M_hexedecacid_c + 0.01 M_octadecacid_c + 0.38 M_octedecacid_c + 0.00 M_pendecacid_c + 0.00 M_tetdecacid_c

----------------------------------------------------------------------
----------------------------------------------------------------------

These uni-uni reactions created mass-equivalence.
(The chemical species within a curly bracket have the same atomic mass.)

----------------------------------------------------------------------

Based on t

In [18]:
simple = SimpleSBML()
simple.initialize(os.path.join(cn.BIGG_DIR, MODEL_1000))
simple.getReaction("R_TRILIP")

R_TRILIP: M_tg160_c -> M_12dgr160_c + 0.04 M_bhdodec_c + 0.01 M_decacid_c + 0.05 M_dodecacid_c + 0.02 M_hepadecacid_c + 0.02 M_hepedecacid_c + 0.28 M_hexadecacid_c + 0.19 M_hexedecacid_c + 0.01 M_octadecacid_c + 0.38 M_octedecacid_c + 0.00 M_pendecacid_c + 0.00 M_tetdecacid_c

In [19]:
simple.getReaction("R_BIWLIP")

R_BIWLIP: M_coa_c + M_tg160_c -> M_12dgr160_c + 0.04 M_bhdodec_c + 0.01 M_decacid_c + 0.05 M_dodecacid_c + 0.02 M_hepadecacid_c + 0.02 M_hepedecacid_c + 0.28 M_hexadecacid_c + 0.19 M_hexedecacid_c + 0.01 M_octadecacid_c + 0.38 M_octedecacid_c + 0.00 M_pendecacid_c + 0.00 M_tetdecacid_c

In [61]:
limit_repeat = 0
found_error = False
while (found_error is False) and (limit_repeat<20):
  limit_repeat += 1

#   games_1000 = analyzeByGAMES(MODEL_1000)
# for er in games_1000.echelon_errors:
#   x, y = gr_1000.reportEchelonError([er])
#   if y[0]<10:
#     print(x)

False
False
False
False
False
False
False
False
False
False


In [63]:
limit_repeat

10

In [15]:
games_10000 = analyzeByGAMES(MODEL_10000)

Model: Recon3D.xml
xGAMES time: 23346.919296
xGAMES found an error!


[ErrorSummary(type='type3', errors=[R_ECOAH12: {M_2mp2coa_c} -> {M_3hibutcoa_c}, R_FCLPA: {M_fc1p_c} -> {M_lald__L_c}, R_PYDXNO: {M_pydxn_c} -> {M_pydx_c}, R_TDPGDH: {M_dtdpgal_c=M_dtdpglu_c} -> {M_dtdp4d6dg_c=M_dtdp4d6dm_c}]),
 ErrorSummary(type='echelon', errors=[R_ADCS:  -> {M_4adcho_c}, R_TKT1:  -> {M_s7p_c}, R_HPA3MO: {M_4hphac_c} + {M_etoh_c} -> , R_PALDH: {M_Pald_c} -> , R_DHQTi: {M_3dhq_c} -> , R_DRPA:  -> {M_acald_c}, R_GLUt4: {M_glu__L_e} -> , R_PROt4: {M_pro__L_e} -> , R_THRD_L: {M_thr__L_c} -> , R_HBCHLR:  -> {M_b2coa_c}, R_CHORS: {M_3psme_c} -> , R_ETHAAL: {M_etha_c} -> , R_GMNA: {M_gln__D_c} -> , R_ASNN: {M_asn__L_c} + 0.01 {M_2obut_c=M_2obut_e} + 0.27 {M_2mcit_c=M_micit_c} -> , R_HPOXR: 2.00 {M_h2o2_c} -> , R_PGLYCP: {M_2pglyc_c} -> , R_PENDECACID:  -> 0.77 {M_octedecacid_c}, R_ANS2:  -> {M_Lkynr_c}, R_TETDECACID:  -> 0.97 {M_pendecacid_c}, R_BIWLIP:  -> 0.13 {M_tetdecacid_c}, R_FLAVO2: {M_hh3phac_c} -> , R_ACOADH1: {M_2mbcoa_c} -> , R_DHPS:  -> 0.91 {M_itp_c} + 0.13 {M_

In [21]:
m_1000, s_1000, milp_result_1000, unconserved_metabolites_1000 = analyzeBIGG(MODEL_1000)

Model: iCN718.xml
xGAMES time: 108.660436
xGAMES found an error!




LP time: 2.030797
LP found an error!
MILP time: 23.239786


In [61]:
m_4000, s_4000, milp_result_4000, unconserved_metabolites_4000 = analyzeBIGG(MODEL_4000)

Model: iLB1027_lipid.xml
xGAMES time: 1626.331340
xGAMES found an error!
LP time: 272.211592
LP found an error!
MILP time: 285.884376


In [62]:
m_6000, s_6000, milp_result_6000, unconserved_metabolites_6000 = analyzeBIGG(MODEL_6000)

Model: iCHOv1.xml
xGAMES time: 7200.678418
xGAMES found an error!
LP time: 759.267629
LP found an error!
MILP time: 796.065499


In [68]:
len(unconserved_metabolites_6000)

2370

In [71]:
print(s_4000.result.status)
print(s_6000.result.status)

2
2


In [184]:
pulp.cplex_dll_path

'/usr/ilog/cplex/bin/x86_rhel4.0_3.4/libcplex110.so'