In [9]:
from games_setup import *
import SBMLLint.common.constants as cn
from SBMLLint.common.reaction import Reaction
from SBMLLint.common.stoichiometry_matrix import StoichiometryMatrix
from SBMLLint.games.som import SOM
from SBMLLint.games.mesgraph import MESGraph
from SBMLLint.games.games_pp import GAMES_PP, SOMStoichiometry, SOMReaction, TOLERANCE
from SBMLLint.games.games_report import GAMESReport, SimplifiedReaction
import collections
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import time
from scipy.linalg import lu, inv

In [10]:
# The following models are not loadable by simple SBML
EXCEPTIONS = ["BIOMD0000000094.xml",
              "BIOMD0000000596.xml",
              "BIOMD0000000786.xml",
              "BIOMD0000000794.xml",
              "BIOMD0000000830.xml",
             ]

# we can exclude EXCEPTIONS from files, as they are not loaded by simpleSBML
raw_files = [f for f in os.listdir(cn.BIOMODELS_DIR) if f[-4:] == ".xml"]
files = [f for f in raw_files if f not in EXCEPTIONS]
paths = [os.path.join(cn.BIOMODELS_DIR, filename) for filename in files]

In [11]:
len(files)

826

In [12]:
# statistics columns
NUM_REACTIONS = "num_reactions(nonbdry)"
LP_ERROR = "lp_error"
GAMES_ERROR = "games_error"
GAMESPP_ERROR = "gamespp_error"
TYPEI_ERROR = "type1_error"
TYPEII_ERROR = "type2_error"
CANCELING_ERROR = "canceling_error"
ECHELON_ERROR = "echelon_error"
TYPEIII_ERROR = "type3_error"
result_columns = [NUM_REACTIONS,
                  LP_ERROR,
                  GAMES_ERROR,
                  GAMESPP_ERROR,
                  TYPEI_ERROR,
                  TYPEII_ERROR,
                  CANCELING_ERROR,
                  ECHELON_ERROR,
                  TYPEIII_ERROR]

In [13]:
results = pd.DataFrame(0, index=files, columns=result_columns)
results[:5]

Unnamed: 0,num_reactions(nonbdry),lp_error,games_error,gamespp_error,type1_error,type2_error,canceling_error,echelon_error,type3_error
BIOMD0000000634.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000152.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000146.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000620.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000608.xml,0,0,0,0,0,0,0,0,0


In [14]:
simple = SimpleSBML()
simple.initialize(os.path.join(cn.BIOMODELS_DIR, "BIOMD0000000244.xml"))
s = StoichiometryMatrix(simple)
consistent = s.isConsistent()
print("consistent? ", consistent)

consistent?  False


In [15]:
import warnings
warnings.filterwarnings('ignore')
# LP only
simple = SimpleSBML()
count = 0
lp_start = time.time()
for file in files:
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(cn.BIOMODELS_DIR, file))
    s = StoichiometryMatrix(simple)
    num_reactions = s.stoichiometry_matrix.shape[1]
    results.at[file, NUM_REACTIONS] = num_reactions
    if num_reactions:
      consistent = s.isConsistent()
    else:
      consistent = -1
    results.at[file, LP_ERROR] = 1 - int(consistent)
  except:
    results.at[file, LP_ERROR] = -1
lp_end = time.time()
lp_time = lp_end - lp_start
print("Analysis finished!")
print("LP time:", lp_time)

we are analyzing Model number: 100
we are analyzing Model number: 200
we are analyzing Model number: 300
we are analyzing Model number: 400
we are analyzing Model number: 500
we are analyzing Model number: 600
we are analyzing Model number: 700
we are analyzing Model number: 800
Analysis finished!
LP time: 52.44812822341919


In [16]:
lp_results = results[results[LP_ERROR] == 1]
len(lp_results)
print("(Mean) ISS for LP is:", np.mean(lp_results[NUM_REACTIONS]))
print("(STD) ISS for LP is:", np.std(lp_results[NUM_REACTIONS]))

(Mean) ISS for LP is: 53.11392405063291
(STD) ISS for LP is: 90.84825118201051


In [17]:
len(results[results[LP_ERROR]==1])

158

In [18]:
len(results[results[LP_ERROR]==-1])

0

In [19]:
models_test_by_games = results[results[LP_ERROR]==1].index
models_test_by_games[:3]

Index(['BIOMD0000000634.xml', 'BIOMD0000000608.xml', 'BIOMD0000000344.xml'], dtype='object')

In [20]:
# bGAMES 
simple = SimpleSBML()
count = 0
games_start = time.time()
# for file in files:
for file in models_test_by_games:
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(cn.BIOMODELS_DIR, file))
    m = GAMES_PP(simple)
    if simple.reactions:
      res = m.analyze(simple_games=True, error_details=False, suppress_message=True)
      results.at[file, GAMES_ERROR] = int(res)
#       if res:
#         gr = GAMESReport(m)
#         summary = m.error_summary
#         if m.type_one_errors:
#           results.at[file, TYPEI_ERROR] = len(m.type_one_errors)
#           report, error_num = gr.reportTypeOneError(m.type_one_errors, explain_details=True)
#         if m.type_two_errors:
#           results.at[file, TYPEII_ERROR] = len(m.type_two_errors)
#           report, error_num = gr.reportTypeTwoError(m.type_two_errors, explain_details=True)
  except:
    results.at[file, GAMES_ERROR] = -1   
games_end = time.time()
games_time = games_end - games_start
print("Analysis finished with total %d models!" % count)
print("GAMES time:", games_time)

we are analyzing Model number: 100
Analysis finished with total 158 models!
GAMES time: 54.53418588638306


In [21]:
print("number of detected errors: ", len(results[results[GAMES_ERROR]==1]))
print("number of GAMES but not in LP", len(results[(results[GAMES_ERROR]==1) & (results[LP_ERROR]!=1)]))

number of detected errors:  123
number of GAMES but not in LP 0


In [24]:
results[results[GAMES_ERROR]==-1]

Unnamed: 0,num_reactions(nonbdry),lp_error,games_error,gamespp_error,type1_error,type2_error,canceling_error,echelon_error,type3_error
BIOMD0000000634.xml,112,1,-1,0,0,0,0,0,0
BIOMD0000000608.xml,96,1,-1,0,0,0,0,0,0
BIOMD0000000344.xml,80,1,-1,0,0,0,0,0,0
BIOMD0000000542.xml,82,1,-1,0,0,0,0,0,0
BIOMD0000000230.xml,64,1,-1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
BIOMD0000000573.xml,3,1,-1,0,0,0,0,0,0
BIOMD0000000407.xml,34,1,-1,0,0,0,0,0,0
BIOMD0000000413.xml,5,1,-1,0,0,0,0,0,0
BIOMD0000000188.xml,19,1,-1,0,0,0,0,0,0


In [27]:
# GAMES+
# file, GAMES_ERROR coding:
# 0; normal - no error found
# -1; not loaded or error found
# 1; normal - error found
# 2; echelon error found, but it is not explainable
# 3; type III error found, but it is not explainable
simple = SimpleSBML()
count = 0
failed_to_find_error = list()
gamespp_start = time.time()
# for file in files:
for file in models_test_by_games:
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(cn.BIOMODELS_DIR, file))
    m = GAMES_PP(simple)
    if simple.reactions:
      res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
      results.at[file, GAMESPP_ERROR] = int(res)
      if not res:
        failed_to_find_error.append(file)
#       if res:
#         gr = GAMESReport(m)
#         summary = m.error_summary
#         if m.type_one_errors:
#           results.at[file, TYPEI_ERROR] = len(m.type_one_errors)
#           report, error_num = gr.reportTypeOneError(m.type_one_errors, explain_details=True)
#         if m.type_two_errors:
#           results.at[file, TYPEII_ERROR] = len(m.type_two_errors)
#           report, error_num = gr.reportTypeTwoError(m.type_two_errors, explain_details=True)
#         if m.canceling_errors:
#           results.at[file, CANCELING_ERROR] = len(m.canceling_errors)
#           report, error_num = gr.reportCancelingError(m.canceling_errors, explain_details=True)
#         if m.echelon_errors:
#           #print("Model %s has an echelon error:" % file)
#           results.at[file, ECHELON_ERROR] = len(m.echelon_errors)
#           report, error_num = gr.reportEchelonError(m.echelon_errors, explain_details=True)
#           if report is False:
#             results.at[file, GAMESPP_ERROR] = 2
# #             print("Model %s has an inexplainable Echelon Error" % file)
# #             print("As the lower matrix has a condition number %f" % condition_number)
# #             print("Decide if the matrix is invertible")
#         if m.type_three_errors:
#           #print("Model %s has a type III error:" % file)
#           results.at[file, TYPEIII_ERROR] = len(m.type_three_errors)
#           report, error_num = gr.reportTypeThreeError(m.type_three_errors, explain_details=True)
#           if report is False:
#             results.at[file, GAMESPP_ERROR] = 3
# #             print("Model %s has an inexplainable Type III Error" % file)
# #             print("As the lower matrix has a condition number %f" % condition_number)
# #             print("Decide if the matrix is invertible")
  except:
    results.at[file, GAMES_ERROR] = -1   
gamespp_end = time.time()
initial_xgames_time = gamespp_end - gamespp_start
print("\nInitial analysis finished with total %d models!" % count)
print("number of (initially) detected errors: ", len(results[results[GAMESPP_ERROR]==1]))
print("(INITIAL) xGAMES time:", initial_xgames_time)
print("\n\nThe following have not been detected by games")
print(failed_to_find_error)
print("\n\nNow start secondary iteration")
MAX_ITR = 15
found_error = False
retest_begin = time.time()
NO_RETEST = ["BIOMD0000000070.xml",
             "BIOMD0000000247.xml",
             "BIOMD0000000473.xml",
             "BIOMD0000000503.xml"]
retest_models = [f for f in failed_to_find_error if f not in NO_RETEST]
for f in retest_models:
  print("We are working on model %s" % f)
  itr = 0
  found_error = False
  while (itr<MAX_ITR) and (found_error is False):
    itr += 1
    if itr%5==0:
      print("Current iteration is %d" % itr)
    simple.initialize(os.path.join(cn.BIOMODELS_DIR, f))
    m = GAMES_PP(simple)
    res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
    if res:
      found_error = True
      results.at[f, GAMESPP_ERROR] = 2
      print("Found error at iteration %d!" %itr)
    if itr==MAX_ITR:
      print("max itr reached!")
retest_end = time.time()
retest_xgames_time = retest_end - retest_begin
print("Total analysis finished, with additional iteration for %d models!" % len(retest_models))
print("(excluding four errors) total xGAMES time: %f" % (initial_xgames_time + retest_xgames_time))
print("number of (finally) detected errors: ", len(results[results[GAMESPP_ERROR]>=1]))

we are analyzing Model number: 100

Initial analysis finished with total 158 models!
number of (initially) detected errors:  152
(INITIAL) xGAMES time: 87.6510899066925


The following have not been detected by games
['BIOMD0000000469.xml', 'BIOMD0000000247.xml', 'BIOMD0000000281.xml', 'BIOMD0000000070.xml', 'BIOMD0000000503.xml', 'BIOMD0000000473.xml']


Now start secondary iteration
We are working on model BIOMD0000000469.xml
Found error at iteration 2!
We are working on model BIOMD0000000281.xml
Found error at iteration 1!
Total analysis finished, with additional iteration for 2 models!
(excluding four errors) total xGAMES time: 125.809475
number of (finally) detected errors:  154


In [41]:
failed_to_find_error

['BIOMD0000000247.xml',
 'BIOMD0000000281.xml',
 'BIOMD0000000070.xml',
 'BIOMD0000000503.xml',
 'BIOMD0000000473.xml',
 'BIOMD0000000470.xml',
 'BIOMD0000000572.xml']

In [38]:
print("number of detected errors: ", len(results[results[GAMESPP_ERROR]==1]))
print("number of GAMES errors not in LP", len(results[(results[GAMESPP_ERROR]==1) & (results[LP_ERROR]!=1)]))
len(results[results[GAMESPP_ERROR]==-1])

number of detected errors:  151
number of GAMES errors not in LP 0


0

In [28]:
df = m.lower_inverse
df1 = m.lower
df[df > .000001].min(axis=1)

r_9     1.00
r_3     1.00
r_1     1.00
r_11    1.00
r_15    1.00
r_12    1.00
r_8     1.00
r_2     0.50
r_10    1.00
r_5     1.00
r_7     1.00
r_21    1.00
r_6     1.00
r_22    0.50
r_17    1.00
r_4     0.50
r_18    1.00
r_13    1.00
r_14    1.00
r_16    0.25
dtype: float64

In [29]:
df1[df1 > .01].min()

0.5

In [30]:
print("number of LP errors not in GAMES")
print(results[(results[GAMESPP_ERROR]!=1) & (results[LP_ERROR]==1)].index)

number of LP errors not in GAMES
Index(['BIOMD0000000469.xml', 'BIOMD0000000247.xml', 'BIOMD0000000281.xml',
       'BIOMD0000000070.xml', 'BIOMD0000000503.xml', 'BIOMD0000000473.xml'],
      dtype='object')


In [87]:
# The following is to make sure to not be able to found an error though GAMES
# re_retest_files = ["BIOMD0000000070.xml",
#                   "BIOMD0000000247.xml",
#                   "BIOMD0000000473.xml",
#                   "BIOMD0000000503.xml"]
# MAX_ITR = 25
# found_error = False
# for f in re_retest_files:
#   print("We are working on model %s" % f)
#   itr = 0
#   found_error = False
#   while (itr<MAX_ITR) and (found_error is False):
#     itr += 1
#     if itr%5==0:
#       print("Current iteration is %d" % itr)
#     simple = SimpleSBML()
#     simple.initialize(os.path.join(data_dir, f))
#     m = GAMES_PP(simple)
#     res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
#     if res:
#       found_error = True
#       print("Found error!")
#     if itr==MAX_ITR:
#       print("max itr reached!")

We are working on model BIOMD0000000070.xml
Current iteration is 5
Current iteration is 10
Current iteration is 15
Current iteration is 20
Current iteration is 25
max itr reached!
We are working on model BIOMD0000000247.xml
Current iteration is 5
Current iteration is 10
Current iteration is 15
Current iteration is 20
Current iteration is 25
max itr reached!
We are working on model BIOMD0000000473.xml
Current iteration is 5
Current iteration is 10
Current iteration is 15
Current iteration is 20
Current iteration is 25
max itr reached!
We are working on model BIOMD0000000503.xml
Current iteration is 5
Current iteration is 10
Current iteration is 15
Current iteration is 20
Current iteration is 25
max itr reached!


In [36]:
# to test if P-matrix is stable for the same error/LP, we test it
simple = SimpleSBML()
MAX_ITR = 25
found_same_error_count = 0
for i1 in range(MAX_ITR):
  simple.initialize(os.path.join(cn.BIOMODELS_DIR, "BIOMD0000000572.xml"))
  m = GAMES_PP(simple)
  res = m.analyze()
  print(m.error_summary)
  if res and m.type_three_errors[0].label=="re21":
    print("found re21 error!")
    break

Model analyzed...
At least one error found.

[ErrorSummary(type='type3', errors=[re13: {ATP} -> {ADP}]), ErrorSummary(type='echelon', errors=[re14: {Pext} -> , re25:  -> {Pint}])]
Model analyzed...
At least one error found.

[ErrorSummary(type='type3', errors=[re21: {F6P=G6P} -> {FBP}])]
found re21 error!


In [37]:
# test if p-matrix is applied (perm_df), resulting l/u are stable
# from scipy.linalg import lu
# p1, l1, u1 = lu(m.permuted_matrix)
# p2, l2, u2 = lu(m.permuted_matrix)
# p3, l3, u3 = lu(m.permuted_matrix)
# print((l1==l2).sum())
# print((l2==l3).sum())
# print((u1==u2).sum())
# print((u2==u3).sum())