In [1]:
from games_setup import *
import SBMLLint.common.constants as cn
from SBMLLint.common.reaction import Reaction
from SBMLLint.common.stoichiometry_matrix import StoichiometryMatrix
from SBMLLint.games.som import SOM
from SBMLLint.games.mesgraph import MESGraph
from SBMLLint.games.games_pp import GAMES_PP, SOMStoichiometry, SOMReaction, TOLERANCE
from SBMLLint.games.games_report import GAMESReport, SimplifiedReaction
import collections
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import time
from scipy.linalg import lu, inv

Current Directory: /Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebook


In [2]:
# The following models are not loadable by simple SBML
EXCEPTIONS = ["BIOMD0000000075_url.xml",
              "BIOMD0000000081_url.xml",
              "BIOMD0000000094_url.xml",
              "BIOMD0000000353_url.xml",
              "BIOMD0000000596_url.xml",
             ]
data_dir=cn.BIOMODELS_DIR
# we can remove EXCEPTIONS from files, as they are not loaded by simpleSBML
raw_files = [f for f in os.listdir(data_dir) if f[:7] == "BIOMD00"]
files = [f for f in raw_files if f not in EXCEPTIONS]
paths = [os.path.join(data_dir, filename) for filename in files]

In [3]:
data_dir

'/Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/data/biomodels'

In [4]:
len(files)

651

In [5]:
# statistics columns
NUM_REACTIONS = "num_reactions(nonbdry)"
LP_ERROR = "lp_error"
GAMES_ERROR = "games_error"
GAMESPP_ERROR = "gamespp_error"
TYPEI_ERROR = "type1_error"
TYPEII_ERROR = "type2_error"
CANCELING_ERROR = "canceling_error"
ECHELON_ERROR = "echelon_error"
TYPEIII_ERROR = "type3_error"
result_columns = [NUM_REACTIONS,
                  LP_ERROR,
                  GAMES_ERROR,
                  GAMESPP_ERROR,
                  TYPEI_ERROR,
                  TYPEII_ERROR,
                  CANCELING_ERROR,
                  ECHELON_ERROR,
                  TYPEIII_ERROR]
## invertible matrix column? 
# INVERTIBLE = "l_inverse"

In [6]:
results = pd.DataFrame(0, index=files, columns=result_columns)
results[:5]

Unnamed: 0,num_reactions(nonbdry),lp_error,games_error,gamespp_error,type1_error,type2_error,canceling_error,echelon_error,type3_error
BIOMD0000000199_url.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000189_url.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000387_url.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000397_url.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000413_url.xml,0,0,0,0,0,0,0,0,0


In [7]:
simple = SimpleSBML()
simple.initialize(os.path.join(data_dir, "BIOMD0000000244_url.xml"))
s = StoichiometryMatrix(simple)
consistent = s.isConsistent()
print("consistent? ", consistent)

consistent?  False



A_eq does not appear to be of full row rank. To improve performance, check the problem formulation for redundant equality constraints.



In [8]:
# LP only
simple = SimpleSBML()
count = 0
lp_start = time.time()
for file in files:
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(data_dir, file))
    s = StoichiometryMatrix(simple)
    num_reactions = s.stoichiometry_matrix.shape[1]
    results.at[file, NUM_REACTIONS] = num_reactions
    if num_reactions:
      consistent = s.isConsistent()
    else:
      consistent = -1
    results.at[file, LP_ERROR] = 1 - int(consistent)
  except:
    results.at[file, LP_ERROR] = -1
lp_end = time.time()
lp_time = lp_end - lp_start
print("Analysis finished!")
print("LP time:", lp_time)


Solving system with option 'cholesky':True failed. It is normal for this to happen occasionally, especially as the solution is approached. However, if you see this frequently, consider setting option 'cholesky' to False.


Solving system with option 'sym_pos':True failed. It is normal for this to happen occasionally, especially as the solution is approached. However, if you see this frequently, consider setting option 'sym_pos' to False.


Ill-conditioned matrix (rcond=5.45464e-39): result may not be accurate.


Ill-conditioned matrix (rcond=1.12904e-35): result may not be accurate.


Ill-conditioned matrix (rcond=3.1411e-35): result may not be accurate.


Ill-conditioned matrix (rcond=5.4561e-36): result may not be accurate.


Ill-conditioned matrix (rcond=4.63069e-36): result may not be accurate.


Ill-conditioned matrix (rcond=1.0886e-36): result may not be accurate.


Ill-conditioned matrix (rcond=1.32519e-38): result may not be accurate.


Ill-conditioned matrix (rcond=1.8947e-42

we are analyzing Model number: 100



Ill-conditioned matrix (rcond=1.04018e-19): result may not be accurate.


Ill-conditioned matrix (rcond=1.88655e-35): result may not be accurate.


Ill-conditioned matrix (rcond=1.05148e-19): result may not be accurate.


Ill-conditioned matrix (rcond=1.6748e-19): result may not be accurate.


Ill-conditioned matrix (rcond=5.83777e-20): result may not be accurate.


Ill-conditioned matrix (rcond=4.65813e-20): result may not be accurate.



we are analyzing Model number: 200



Ill-conditioned matrix (rcond=4.36544e-18): result may not be accurate.


Ill-conditioned matrix (rcond=8.60304e-19): result may not be accurate.


Solving system with option 'sym_pos':False failed. This may happen occasionally, especially as the solution is approached. However, if you see this frequently, your problem may be numerically challenging. If you cannot improve the formulation, consider setting 'lstsq' to True. Consider also setting `presolve` to True, if it is not already.



we are analyzing Model number: 300



Ill-conditioned matrix (rcond=2.5285e-18): result may not be accurate.


Ill-conditioned matrix (rcond=1.09031e-20): result may not be accurate.



we are analyzing Model number: 400



Ill-conditioned matrix (rcond=3.89357e-19): result may not be accurate.


Ill-conditioned matrix (rcond=1.77837e-19): result may not be accurate.


Ill-conditioned matrix (rcond=3.78425e-18): result may not be accurate.


Ill-conditioned matrix (rcond=3.75545e-20): result may not be accurate.



we are analyzing Model number: 500



Ill-conditioned matrix (rcond=1.88272e-19): result may not be accurate.


Ill-conditioned matrix (rcond=1.49388e-18): result may not be accurate.


Ill-conditioned matrix (rcond=3.67095e-36): result may not be accurate.


Ill-conditioned matrix (rcond=1.78232e-19): result may not be accurate.


Ill-conditioned matrix (rcond=1.10128e-35): result may not be accurate.


Ill-conditioned matrix (rcond=3.58365e-19): result may not be accurate.


Ill-conditioned matrix (rcond=2.76768e-18): result may not be accurate.



we are analyzing Model number: 600



Ill-conditioned matrix (rcond=1.31854e-21): result may not be accurate.


Ill-conditioned matrix (rcond=2.42617e-21): result may not be accurate.


Ill-conditioned matrix (rcond=1.94225e-39): result may not be accurate.


Ill-conditioned matrix (rcond=2.8262e-27): result may not be accurate.


Ill-conditioned matrix (rcond=2.34782e-24): result may not be accurate.


Ill-conditioned matrix (rcond=5.24629e-25): result may not be accurate.


Ill-conditioned matrix (rcond=1.36197e-24): result may not be accurate.


Ill-conditioned matrix (rcond=4.37713e-25): result may not be accurate.


Ill-conditioned matrix (rcond=4.41197e-26): result may not be accurate.


Ill-conditioned matrix (rcond=1.46257e-23): result may not be accurate.


Ill-conditioned matrix (rcond=1.31485e-23): result may not be accurate.


Ill-conditioned matrix (rcond=8.01852e-24): result may not be accurate.


Ill-conditioned matrix (rcond=1.10251e-23): result may not be accurate.


Ill-conditioned matrix (rcond=2.97798e

Analysis finished!
LP time: 44.068581104278564


In [9]:
lp_results = results[results[LP_ERROR] == 1]
len(lp_results)
print("(Mean) ISS for LP is:", np.mean(lp_results[NUM_REACTIONS]))
print("(STD) ISS for LP is:", np.std(lp_results[NUM_REACTIONS]))

(Mean) ISS for LP is: 55.51048951048951
(STD) ISS for LP is: 94.38982308045166


In [10]:
len(results[results[LP_ERROR]==1])

143

In [11]:
len(results[results[LP_ERROR]==-1])

0

In [1]:
# GAMES only
simple = SimpleSBML()
count = 0
games_start = time.time()
for file in files:
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(data_dir, file))
    m = GAMES_PP(simple)
    if simple.reactions:
      res = m.analyze(simple_games=True, error_details=False)
      results.at[file, GAMES_ERROR] = int(res)
      if res:
        gr = GAMESReport(m)
        summary = m.error_summary
        if m.type_one_errors:
          results.at[file, TYPEI_ERROR] = len(m.type_one_errors)
          report, error_num = gr.reportTypeOneError(m.type_one_errors, explain_details=True)
        if m.type_two_errors:
          results.at[file, TYPEII_ERROR] = len(m.type_two_errors)
          report, error_num = gr.reportTypeTwoError(m.type_two_errors, explain_details=True)
  except:
    results.at[file, GAMES_ERROR] = -1   
games_end = time.time()
games_time = games_end - games_start
print("Analysis finished!")
print("GAMES time:", games_time)

NameError: name 'SimpleSBML' is not defined

In [13]:
print("number of detected errors: ", len(results[results[GAMES_ERROR]==1]))
print("number of GAMES but not in LP", len(results[(results[GAMES_ERROR]==1) & (results[LP_ERROR]!=1)]))

number of detected errors:  109
number of GAMES but not in LP 0


In [14]:
results[results[GAMES_ERROR]==-1]

Unnamed: 0,num_reactions(nonbdry),lp_error,games_error,gamespp_error,type1_error,type2_error,canceling_error,echelon_error,type3_error


In [15]:
# GAMES+
# file, GAMES_ERROR coding:
# 0; normal - no error found
# -1; not loaded or error found
# 1; normal - error found
# 2; echelon error found, but it is not explainable
# 3; type III error found, but it is not explainable
simple = SimpleSBML()
count = 0
gamespp_start = time.time()
for file in files:
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(data_dir, file))
    m = GAMES_PP(simple)
    if simple.reactions:
      res = m.analyze(simple_games=False, error_details=False)
      results.at[file, GAMESPP_ERROR] = int(res)
      if res:
#         if m.echelon_errors or m.type_three_errors:
#           try:
#             #k = inv(m.lower)
#             k = np.linalg.inv(m.lower)
#           except:
#             print("model %s has as a singular L matrix:" % file)
#         condition_number = np.linalg.cond(m.lower)
#         if condition_number > 300:
#           print("*****The L matrix of the model %s has a condition number %f*****" % (file, condition_number))
        gr = GAMESReport(m)
        summary = m.error_summary
        if m.type_one_errors:
          results.at[file, TYPEI_ERROR] = len(m.type_one_errors)
          report, error_num = gr.reportTypeOneError(m.type_one_errors, explain_details=True)
        if m.type_two_errors:
          results.at[file, TYPEII_ERROR] = len(m.type_two_errors)
          report, error_num = gr.reportTypeTwoError(m.type_two_errors, explain_details=True)
        if m.canceling_errors:
          results.at[file, CANCELING_ERROR] = len(m.canceling_errors)
          report, error_num = gr.reportCancelingError(m.canceling_errors, explain_details=True)
        if m.echelon_errors:
          #print("Model %s has an echelon error:" % file)
          results.at[file, ECHELON_ERROR] = len(m.echelon_errors)
          report, error_num = gr.reportEchelonError(m.echelon_errors, explain_details=True)
          if report is False:
            results.at[file, GAMESPP_ERROR] = 2
#             print("Model %s has an inexplainable Echelon Error" % file)
#             print("As the lower matrix has a condition number %f" % condition_number)
#             print("Decide if the matrix is invertible")
        if m.type_three_errors:
          #print("Model %s has a type III error:" % file)
          results.at[file, TYPEIII_ERROR] = len(m.type_three_errors)
          report, error_num = gr.reportTypeThreeError(m.type_three_errors, explain_details=True)
          if report is False:
            results.at[file, GAMESPP_ERROR] = 3
#             print("Model %s has an inexplainable Type III Error" % file)
#             print("As the lower matrix has a condition number %f" % condition_number)
#             print("Decide if the matrix is invertible")
  except:
    results.at[file, GAMES_ERROR] = -1   
gamespp_end = time.time()
gamespp_time = gamespp_end - gamespp_start
print("\nAnalysis finished!")
print("GAMES++ time:", gamespp_time)

we are analyzing Model number: 100
we are analyzing Model number: 200
we are analyzing Model number: 300
we are analyzing Model number: 400
we are analyzing Model number: 500
we are analyzing Model number: 600

Analysis finished!
GAMES++ time: 192.53740882873535


In [16]:
print("number of detected errors: ", len(results[results[GAMESPP_ERROR]==1]))
print("number of GAMES errors not in LP", len(results[(results[GAMESPP_ERROR]==1) & (results[LP_ERROR]!=1)]))
len(results[results[GAMESPP_ERROR]==-1])

number of detected errors:  135
number of GAMES errors not in LP 0


0

In [17]:
len(results[results[GAMESPP_ERROR]==2])

0

In [18]:
len(results[results[GAMESPP_ERROR]==3])

3

In [19]:
results[results[GAMESPP_ERROR]==3]

Unnamed: 0,num_reactions(nonbdry),lp_error,games_error,gamespp_error,type1_error,type2_error,canceling_error,echelon_error,type3_error
BIOMD0000000014_url.xml,300,0,0,3,0,0,0,31,5
BIOMD0000000574_url.xml,576,0,0,3,0,0,0,15,10
BIOMD0000000243_url.xml,19,1,0,3,0,0,0,2,1


In [None]:
simple = load_file_from_games(574)
m = GAMES_PP(simple)
res = m.analyze(simple_games=False, error_details=True)

In [71]:
m.lower

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [72]:
np.linalg.det(m.lower)

1.0