In [1]:
from games_setup import *
import SBMLLint.common.constants as cn
from SBMLLint.common.reaction import Reaction
from SBMLLint.common.stoichiometry_matrix import StoichiometryMatrix
from SBMLLint.games.som import SOM
from SBMLLint.games.mesgraph import MESGraph
from SBMLLint.games.games_pp import GAMES_PP, SOMStoichiometry, SOMReaction, TOLERANCE
from SBMLLint.games.games_report import GAMESReport, SimplifiedReaction
import collections
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import time
from scipy.linalg import lu, inv

Current Directory: /Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebooks


In [2]:
# The following models are not loadable by simple SBML
EXCEPTIONS = ["BIOMD0000000094.xml",
              "BIOMD0000000596.xml",
              "BIOMD0000000786.xml",
              "BIOMD0000000794.xml",
              "BIOMD0000000830.xml",
             ]

# we can exclude EXCEPTIONS from files, as they are not loaded by simpleSBML
raw_files = [f for f in os.listdir(cn.BIOMODELS_DIR) if f[-4:] == ".xml"]
files = [f for f in raw_files if f not in EXCEPTIONS]
paths = [os.path.join(cn.BIOMODELS_DIR, filename) for filename in files]

In [3]:
len(files)

826

In [4]:
# statistics columns
NUM_REACTIONS = "num_reactions(nonbdry)"
LP_ERROR = "lp_error"
BGAMES_ERROR = "bgames_error"
XGAMES_ERROR = "xgame_error"
TYPEI_ERROR = "type1_error"
TYPEII_ERROR = "type2_error"
CANCELING_ERROR = "canceling_error"
ECHELON_ERROR = "echelon_error"
TYPEIII_ERROR = "type3_error"
BGAMES_RIS = "bGAMES_ris_size"
XGAMES_RIS = "xGAMES_ris_size"
result_columns = [NUM_REACTIONS,
                  LP_ERROR,
                  BGAMES_ERROR,
                  XGAMES_ERROR,
                  BGAMES_RIS,
                  XGAMES_RIS,
                 ]
#                   TYPEI_ERROR,
#                   TYPEII_ERROR,
#                   CANCELING_ERROR,
#                   ECHELON_ERROR,
#                   TYPEIII_ERROR]

In [5]:
results = pd.DataFrame(0, index=files, columns=result_columns)
results[:5]

Unnamed: 0,num_reactions(nonbdry),lp_error,bgames_error,xgame_error,bGAMES_ris_size,xGAMES_ris_size
BIOMD0000000634.xml,0,0,0,0,0,0
BIOMD0000000152.xml,0,0,0,0,0,0
BIOMD0000000146.xml,0,0,0,0,0,0
BIOMD0000000620.xml,0,0,0,0,0,0
BIOMD0000000608.xml,0,0,0,0,0,0


In [6]:
simple = SimpleSBML()
simple.initialize(os.path.join(cn.BIOMODELS_DIR, "BIOMD0000000244.xml"))
s = StoichiometryMatrix(simple)
consistent = s.isConsistent()
print("consistent? ", consistent)

consistent?  False




In [7]:
import warnings
warnings.filterwarnings('ignore')
# LP only
simple = SimpleSBML()
count = 0
lp_start = time.time()
for file in files:
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(cn.BIOMODELS_DIR, file))
    s = StoichiometryMatrix(simple)
    num_reactions = s.stoichiometry_matrix.shape[1]
    results.at[file, NUM_REACTIONS] = num_reactions
    if num_reactions:
      consistent = s.isConsistent()
    else:
      consistent = -1
    results.at[file, LP_ERROR] = 1 - int(consistent)
  except:
    results.at[file, LP_ERROR] = -1
lp_end = time.time()
lp_time = lp_end - lp_start
print("Analysis finished!")
print("LP time:", lp_time)

we are analyzing Model number: 100
we are analyzing Model number: 200
we are analyzing Model number: 300
we are analyzing Model number: 400
we are analyzing Model number: 500
we are analyzing Model number: 600
we are analyzing Model number: 700
we are analyzing Model number: 800
Analysis finished!
LP time: 50.37086272239685


In [8]:
lp_results = results[results[LP_ERROR] == 1]
len(lp_results)
print("(Mean) ISS for LP is:", np.mean(lp_results[NUM_REACTIONS]))
print("(STD) ISS for LP is:", np.std(lp_results[NUM_REACTIONS]))

(Mean) ISS for LP is: 53.11392405063291
(STD) ISS for LP is: 90.84825118201051


In [9]:
len(results[results[LP_ERROR]==1])

158

In [10]:
len(results[results[LP_ERROR]==-1])

0

In [11]:
models_test_by_games = results[results[LP_ERROR]==1].index
models_test_by_games[:3]

Index(['BIOMD0000000634.xml', 'BIOMD0000000608.xml', 'BIOMD0000000344.xml'], dtype='object')

In [12]:
results.head()

Unnamed: 0,num_reactions(nonbdry),lp_error,bgames_error,xgame_error,bGAMES_ris_size,xGAMES_ris_size
BIOMD0000000634.xml,112,1,0,0,0,0
BIOMD0000000152.xml,120,0,0,0,0,0
BIOMD0000000146.xml,34,0,0,0,0,0
BIOMD0000000620.xml,0,2,0,0,0,0
BIOMD0000000608.xml,96,1,0,0,0,0


In [13]:
[1, 2] + [3, 4]

[1, 2, 3, 4]

In [14]:
detected_by_lp = results.loc[results[LP_ERROR]==1,].index
detected_by_lp

Index(['BIOMD0000000634.xml', 'BIOMD0000000608.xml', 'BIOMD0000000344.xml',
       'BIOMD0000000542.xml', 'BIOMD0000000230.xml', 'BIOMD0000000218.xml',
       'BIOMD0000000581.xml', 'BIOMD0000000219.xml', 'BIOMD0000000231.xml',
       'BIOMD0000000392.xml',
       ...
       'BIOMD0000000410.xml', 'BIOMD0000000612.xml', 'BIOMD0000000189.xml',
       'BIOMD0000000406.xml', 'BIOMD0000000572.xml', 'BIOMD0000000573.xml',
       'BIOMD0000000407.xml', 'BIOMD0000000413.xml', 'BIOMD0000000188.xml',
       'BIOMD0000000163.xml'],
      dtype='object', length=158)

In [15]:
# bGAMES 
simple = SimpleSBML()
count = 0
detected_by_lp = results.loc[results[LP_ERROR]==1,].index
bgames_ris = []
games_start = time.time()
# for file in files:
for file in detected_by_lp:
  count += 1
  if (count%50)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(cn.BIOMODELS_DIR, file))
    m = GAMES_PP(simple)
    if simple.reactions:
      res = m.analyze(simple_games=True, error_details=False, suppress_message=True)
      results.at[file, BGAMES_ERROR] = int(res)
      if res:
        gr = GAMESReport(m)
        summary = m.error_summary
        if m.type_one_errors:
#           results.at[file, TYPEI_ERROR] = len(m.type_one_errors)
          report, error_num1 = gr.reportTypeOneError(m.type_one_errors, explain_details=True)
        if m.type_two_errors:
#           results.at[file, TYPEII_ERROR] = len(m.type_two_errors)
          report, error_num2 = gr.reportTypeTwoError(m.type_two_errors, explain_details=True)
        results.at[file, BGAMES_RIS] = np.mean(error_num1 + error_num2)
        bgames_ris.append(np.mean(error_num1 + error_num2))
  except:
    print("issue found when working on error_num!")
    results.at[file, BGAMES_ERROR] = -1   
games_end = time.time()
games_time = games_end - games_start
print("Analysis finished with total %d models!" % count)
print("GAMES time:", games_time)

we are analyzing Model number: 50
we are analyzing Model number: 100
we are analyzing Model number: 150
Analysis finished with total 158 models!
GAMES time: 54.16451406478882


In [16]:
np.mean(bgames_ris)

5.556743628463718

In [17]:
print("number of detected errors: ", len(results[results[BGAMES_ERROR]==1]))
print("number of GAMES but not in LP", len(results[(results[BGAMES_ERROR]==1) & (results[LP_ERROR]!=1)]))

number of detected errors:  123
number of GAMES but not in LP 0


In [18]:
games_time / 123

0.4403619029657628

In [19]:
results[results[BGAMES_ERROR]==-1]

Unnamed: 0,num_reactions(nonbdry),lp_error,bgames_error,xgame_error,bGAMES_ris_size,xGAMES_ris_size


In [20]:
results.head()

Unnamed: 0,num_reactions(nonbdry),lp_error,bgames_error,xgame_error,bGAMES_ris_size,xGAMES_ris_size
BIOMD0000000634.xml,112,1,1,0,3,0
BIOMD0000000152.xml,120,0,0,0,0,0
BIOMD0000000146.xml,34,0,0,0,0,0
BIOMD0000000620.xml,0,2,0,0,0,0
BIOMD0000000608.xml,96,1,1,0,2,0


In [21]:
# xGAMES for LP-detected models
simple = SimpleSBML()
count = 0
detected_by_lp = results.loc[results[LP_ERROR]==1,].index
xgames_ris = []
gamespp_start = time.time()
# for file in files:
for file in detected_by_lp:
  count += 1
  if (count%50)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(cn.BIOMODELS_DIR, file))
    total_errornum = []
    m = GAMES_PP(simple)
    if simple.reactions:
      res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
      results.at[file, XGAMES_ERROR] = int(res)
      if res:
        gr = GAMESReport(m)
        summary = m.error_summary
        if m.type_one_errors:
          report, error_num1 = gr.reportTypeOneError(m.type_one_errors, explain_details=True)
          total_errornum = total_errornum + error_num1
        if m.type_two_errors:
          report, error_num2 = gr.reportTypeTwoError(m.type_two_errors, explain_details=True)
          total_errornum = total_errornum + error_num2
        if m.canceling_errors:
          report, error_num3 = gr.reportCancelingError(m.canceling_errors, explain_details=True)
          total_errornum = total_errornum + error_num3
        if m.echelon_errors:
          report, error_num4 = gr.reportEchelonError(m.echelon_errors, explain_details=True)
          total_errornum = total_errornum + error_num4
        if m.type_three_errors:
          report, error_num5 = gr.reportTypeThreeError(m.type_three_errors, explain_details=True)
          total_errornum = total_errornum + error_num5
        results.at[file, XGAMES_RIS] = np.mean(total_errornum)
        xgames_ris.append(np.mean(total_errornum))
  except:
    print("issue found with error_num!")
    results.at[file, XGAMES_ERROR] = -1   
gamespp_end = time.time()
initial_xgames_time = gamespp_end - gamespp_start
print("\nInitial analysis finished with total %d models!" % count)
print("number of (initially) detected errors: ", len(results[results[XGAMES_ERROR]==1]))
print("(INITIAL) xGAMES time:", initial_xgames_time)
print("Current length of xgames_ris", len(xgames_ris))

we are analyzing Model number: 50
we are analyzing Model number: 100
we are analyzing Model number: 150

Initial analysis finished with total 158 models!
number of (initially) detected errors:  150
(INITIAL) xGAMES time: 86.92453289031982
Current length of xgames_ris 150


In [22]:
len(results[(results[XGAMES_ERROR]!=1) & (results[LP_ERROR]==1)])

8

In [23]:
failed_to_find_error = results[(results[XGAMES_ERROR]!=1) & (results[LP_ERROR]==1)]
print("\n\nThe following have not been detected by games")
failed_to_find_error



The following have not been detected by games


Unnamed: 0,num_reactions(nonbdry),lp_error,bgames_error,xgame_error,bGAMES_ris_size,xGAMES_ris_size
BIOMD0000000353.xml,9,1,0,0,0,0
BIOMD0000000469.xml,399,1,0,0,0,0
BIOMD0000000247.xml,25,1,0,0,0,0
BIOMD0000000243.xml,19,1,0,0,0,0
BIOMD0000000070.xml,38,1,0,0,0,0
BIOMD0000000503.xml,32,1,0,0,0,0
BIOMD0000000473.xml,298,1,0,0,0,0
BIOMD0000000470.xml,399,1,0,0,0,0


In [24]:
np.mean(xgames_ris)

2.9431450820347673

In [25]:
results.T["BIOMD0000000353.xml"]

num_reactions(nonbdry)    9
lp_error                  1
bgames_error              0
xgame_error               0
bGAMES_ris_size           0
xGAMES_ris_size           0
Name: BIOMD0000000353.xml, dtype: int64

In [26]:
# deleted, but using MAX_ITR=15 and retest remaining models..
# re-run of xGAMES
simple = SimpleSBML()
MAX_ITR = 15
retest_xgames_ris = []
retest_model_start = time.time()
# for file in files:
for file in failed_to_find_error.index:
  found_error = False
  itr = 0
  print("we are analyzing model:", file)
  while (found_error is False) and (itr<MAX_ITR):
    itr += 1
    #
    if (itr%5)==0:
      print("we are analyzing with iteration: %d" % (itr))
    simple.initialize(os.path.join(cn.BIOMODELS_DIR, file))
    total_errornum = []
    m = GAMES_PP(simple)
    res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
    results.at[file, XGAMES_ERROR] = int(res)
    if res:
      print("Found error at iteration %d" % itr)
      found_error = True
      gr = GAMESReport(m)
      summary = m.error_summary
      if m.type_one_errors:
        report, error_num1 = gr.reportTypeOneError(m.type_one_errors, explain_details=True)
        total_errornum = total_errornum + error_num1
      if m.type_two_errors:
        report, error_num2 = gr.reportTypeTwoError(m.type_two_errors, explain_details=True)
        total_errornum = total_errornum + error_num2
      if m.canceling_errors:
        report, error_num3 = gr.reportCancelingError(m.canceling_errors, explain_details=True)
        total_errornum = total_errornum + error_num3
      if m.echelon_errors:
        report, error_num4 = gr.reportEchelonError(m.echelon_errors, explain_details=True)
        total_errornum = total_errornum + error_num4
      if m.type_three_errors:
        report, error_num5 = gr.reportTypeThreeError(m.type_three_errors, explain_details=True)
        total_errornum = total_errornum + error_num5
      results.at[file, XGAMES_RIS] = np.mean(total_errornum)  
      retest_xgames_ris.append(np.mean(total_errornum))
    if itr==MAX_ITR:
      print("max itr reached!")
retest_model_end = time.time()
retest_xgames_time = retest_model_end - retest_model_start
print("number of (finally) detected errors: ", len(results[results[XGAMES_ERROR]==1]))
print("(TOTAL) xGAMES time:", initial_xgames_time + retest_xgames_time)

we are analyzing model: BIOMD0000000353.xml
Found error at iteration 1
we are analyzing model: BIOMD0000000469.xml
Found error at iteration 3
we are analyzing model: BIOMD0000000247.xml
we are analyzing with iteration: 5
we are analyzing with iteration: 10
we are analyzing with iteration: 15
max itr reached!
we are analyzing model: BIOMD0000000243.xml
Found error at iteration 1
we are analyzing model: BIOMD0000000070.xml
we are analyzing with iteration: 5
we are analyzing with iteration: 10
we are analyzing with iteration: 15
max itr reached!
we are analyzing model: BIOMD0000000503.xml
we are analyzing with iteration: 5
we are analyzing with iteration: 10
we are analyzing with iteration: 15
max itr reached!
we are analyzing model: BIOMD0000000473.xml
we are analyzing with iteration: 5
Found error at iteration 7
we are analyzing model: BIOMD0000000470.xml
we are analyzing with iteration: 5
we are analyzing with iteration: 10
Found error at iteration 11
number of (finally) detected error

In [27]:
np.mean(xgames_ris + retest_xgames_ris)

3.3191726600336455

In [28]:
retest_xgames_time + initial_xgames_time

429.696861743927

In [29]:
print("number of (finally) detected errors by xGAMES: ", sum(results[XGAMES_ERROR]))

number of (finally) detected errors by xGAMES:  155


In [30]:
print("number of detected errors: ", len(results[results[XGAMES_ERROR]==1]))
print("number of GAMES errors not in LP", len(results[(results[XGAMES_ERROR]==1) & (results[LP_ERROR]!=1)]))
len(results[results[XGAMES_ERROR]==-1])

number of detected errors:  155
number of GAMES errors not in LP 0


0

In [31]:
# final results
bgames_results = results.loc[results[BGAMES_ERROR]==1,]
xgames_results = results.loc[results[XGAMES_ERROR]==1,]
print("mean ris for bGAMES: %f" % np.mean(bgames_results[BGAMES_RIS]))
print("mean ris for xGAMES: %f" % np.mean(xgames_results[XGAMES_RIS]))

mean ris for bGAMES: 5.195122
mean ris for xGAMES: 3.129032


In [32]:
# simple.initialize(os.path.join(cn.BIOMODELS_DIR, "BIOMD0000000175.xml"))
# total_errornum = []
# m = GAMES_PP(simple)
# res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
# results.at[file, XGAMES_ERROR] = int(res)
# if res:
#   print("Found error with this model")
# found_error = True
# gr = GAMESReport(m)
# summary = m.error_summary
# if m.type_one_errors:
#   report, error_num1 = gr.reportTypeOneError(m.type_one_errors, explain_details=True)
#   total_errornum = total_errornum + error_num1
# if m.type_two_errors:
#   report, error_num2 = gr.reportTypeTwoError(m.type_two_errors, explain_details=True)
#   total_errornum = total_errornum + error_num2
# if m.canceling_errors:
#   report, error_num3 = gr.reportCancelingError(m.canceling_errors, explain_details=True)
#   total_errornum = total_errornum + error_num3
# if m.echelon_errors:
#   report, error_num4 = gr.reportEchelonError(m.echelon_errors, explain_details=True)
#   total_errornum = total_errornum + error_num4
# if m.type_three_errors:
#   report, error_num5 = gr.reportTypeThreeError(m.type_three_errors, explain_details=True)
#   total_errornum = total_errornum + error_num5
# results.at[file, XGAMES_RIS] = np.mean(total_errornum) 

In [33]:
# m.error_summary

In [34]:
# # The following is to make sure to not be able to found an error though GAMES
# re_retest_files = ["BIOMD0000000070.xml",
#                   "BIOMD0000000247.xml",
#                   "BIOMD0000000473.xml",
#                   "BIOMD0000000503.xml",
#                   "BIOMD0000000469.xml"]
# MAX_ITR = 25
# found_error = False
# for f in re_retest_files:
#   print("We are working on model %s" % f)
#   itr = 0
#   found_error = False
#   while (itr<MAX_ITR) and (found_error is False):
#     itr += 1
#     if itr%5==0:
#       print("Current iteration is %d" % itr)
#     simple = SimpleSBML()
#     simple.initialize(os.path.join(cn.BIOMODELS_DIR, f))
#     m = GAMES_PP(simple)
#     res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
#     if res:
#       found_error = True
#       print("Found error!")
#     if itr==MAX_ITR:
#       print("max itr reached!")