In [1]:
import init
from SBMLLint.common import constants as cn
from SBMLLint.common.molecule import Molecule, MoleculeStoichiometry
from SBMLLint.common import simple_sbml
from SBMLLint.common.reaction import Reaction
from SBMLLint.tools import sbmllint
from SBMLLint.tools import print_reactions

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from games_setup import *
from SBMLLint.common import constants as cn
from SBMLLint.common.simple_sbml import SimpleSBML
from SBMLLint.common.stoichiometry_matrix import StoichiometryMatrix

from SBMLLint.games.som import SOM
from SBMLLint.games.games_pp import GAMES_PP, SOMStoichiometry, SOMReaction, TOLERANCE
from SBMLLint.games.games_report import GAMESReport, SimplifiedReaction
import matplotlib.pyplot as plt
%matplotlib inline
import time
from scipy.linalg import lu, inv

Current Directory: /Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebook


In [3]:
data_dir = '/Users/woosubs/Desktop/ModelEngineering/ext_biomodels'
# we can remove EXCEPTIONS from files, as they are not loaded by simpleSBML
files = [f for f in os.listdir(data_dir) if f[-4:] == ".xml"]
len(files)

831

In [4]:
# data frame structure
# statistics columns
NUM_REACTIONS = "num_reactions(nonbdry)"
LP_ERROR = "lp_error"
GAMES_ERROR = "games_error"
GAMESPP_ERROR = "gamespp_error"
TYPEI_ERROR = "type1_error"
TYPEII_ERROR = "type2_error"
CANCELING_ERROR = "canceling_error"
ECHELON_ERROR = "echelon_error"
TYPEIII_ERROR = "type3_error"
result_columns = [NUM_REACTIONS,
                  LP_ERROR,
                  GAMES_ERROR,
                  GAMESPP_ERROR,
                  TYPEI_ERROR,
                  TYPEII_ERROR,
                  CANCELING_ERROR,
                  ECHELON_ERROR,
                  TYPEIII_ERROR]
results = pd.DataFrame(0, index=files, columns=result_columns)
results[:5]

Unnamed: 0,num_reactions(nonbdry),lp_error,games_error,gamespp_error,type1_error,type2_error,canceling_error,echelon_error,type3_error
BIOMD0000000634.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000152.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000146.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000620.xml,0,0,0,0,0,0,0,0,0
BIOMD0000000608.xml,0,0,0,0,0,0,0,0,0


In [5]:
# cannot be initialized by simpleSBML; bad antimony string
EXCEPTIONS = ["BIOMD0000000146_url.xml",
              "BIOMD0000000152_url.xml",
              "BIOMD0000000608_url.xml",
              "BIOMD0000000620_url.xml",
              "BIOMD0000000634_url.xml",
             ]

# simple.initialize(os.path.join(data_dir, EXCEPTIONS[4]))
# s = StoichiometryMatrix(simple)
# num_reactions = s.stoichiometry_matrix.shape[1]
# results.at[file, NUM_REACTIONS] = num_reactions
# if num_reactions:
#   consistent = s.isConsistent()

In [6]:
files[0]

'BIOMD0000000634.xml'

In [7]:
files[0][-7:-4]

'634'

In [8]:
simple = SimpleSBML()
model_147 = [x for x in files if x[-7:-4] == '147'][0]
simple.initialize(os.path.join(data_dir, model_147))
m = GAMES_PP(simple)
res = m.analyze(simple_games=False, error_details=True, suppress_message=False)

Model analyzed...
Type I error:  [PathComponents(node1='IkBa_mRNA', node2='IkBa_mRNA', reactions=['tsl_a']), PathComponents(node1='IkBb_mRNA', node2='IkBb_mRNA', reactions=['tsl_b']), PathComponents(node1='IkBe_mRNA', node2='IkBe_mRNA', reactions=['tsl_e']), PathComponents(node1='IKK', node2='IkBaIKK', reactions=['int_ai']), PathComponents(node1='NFkB_cytoplasm', node2='IkBaNFkB_cytoplasm', reactions=['int_an']), PathComponents(node1='NFkB_nucleus', node2='IkBaNFkB_nucleus', reactions=['int_an_n']), PathComponents(node1='NFkB_nucleus', node2='IkBbNFkB_nucleus', reactions=['int_bn_n']), PathComponents(node1='NFkB_cytoplasm', node2='IkBbNFkB_cytoplasm', reactions=['int_bn']), PathComponents(node1='IKK', node2='IkBbIKK', reactions=['int_bi']), PathComponents(node1='IKK', node2='IkBeIKK', reactions=['int_ei']), PathComponents(node1='NFkB_cytoplasm', node2='IkBeNFkB_cytoplasm', reactions=['int_en']), PathComponents(node1='NFkB_nucleus', node2='IkBeNFkB_nucleus', reactions=['int_en_n'])]
Typ

In [17]:
gr = GAMESReport(m)
for reaction in m.reactions:
  print(reaction)

tsl_a: IkBa_mRNA -> IkBa_mRNA + IkBa_cytoplasm; nucleus * tr1a * IkBa_mRNA
int_ai: IkBa_cytoplasm + IKK -> IkBaIKK; cytoplasm * (a1 * IkBa_cytoplasm * IKK - d1_1 * IkBaIKK)
int_an: IkBa_cytoplasm + NFkB_cytoplasm -> IkBaNFkB_cytoplasm; cytoplasm * (a4_1 * IkBa_cytoplasm * NFkB_cytoplasm - d4_1 * IkBaNFkB_cytoplasm)
int_an_n: IkBa_nucleus + NFkB_nucleus -> IkBaNFkB_nucleus; nucleus * (a4_2 * IkBa_nucleus * NFkB_nucleus - d4_2 * IkBaNFkB_nucleus)
int_2ani: IkBaNFkB_cytoplasm + IKK -> IkBaIKKNFkB; cytoplasm * (a7 * IkBaNFkB_cytoplasm * IKK - d1_2 * IkBaIKKNFkB)
int_2ain: IkBaIKK + NFkB_cytoplasm -> IkBaIKKNFkB; cytoplasm * (a4_3 * IkBaIKK * NFkB_cytoplasm - d4_3 * IkBaIKKNFkB)
deg_an_n: IkBaNFkB_nucleus -> NFkB_nucleus; nucleus * deg4_n * IkBaNFkB_nucleus
deg_an: IkBaNFkB_cytoplasm -> NFkB_cytoplasm; cytoplasm * deg4_c * IkBaNFkB_cytoplasm
deg_ai: IkBaIKK -> IKK; cytoplasm * r1 * IkBaIKK
deg_ain: IkBaIKKNFkB -> NFkB_cytoplasm + IKK; cytoplasm * r4 * IkBaIKKNFkB
loc_a: IkBa_cytoplasm -> Ik

In [13]:
for str_obj in gr.reportTypeOneError(m.type_one_errors):
  print(str_obj)

1. tsl_a: IkBa_mRNA -> IkBa_mRNA + IkBa_cytoplasm


----------------------------------------------------------------------

1. tsl_b: IkBb_mRNA -> IkBb_mRNA + IkBb_cytoplasm


----------------------------------------------------------------------

1. tsl_e: IkBe_mRNA -> IkBe_mRNA + IkBe_cytoplasm


----------------------------------------------------------------------

1. deg_ai: IkBaIKK -> IKK
2. int_ai: IkBa_cytoplasm + IKK -> IkBaIKK


----------------------------------------------------------------------

1. deg_an: IkBaNFkB_cytoplasm -> NFkB_cytoplasm
2. int_an: IkBa_cytoplasm + NFkB_cytoplasm -> IkBaNFkB_cytoplasm


----------------------------------------------------------------------

1. deg_an_n: IkBaNFkB_nucleus -> NFkB_nucleus
2. int_an_n: IkBa_nucleus + NFkB_nucleus -> IkBaNFkB_nucleus


----------------------------------------------------------------------

1. deg_an_n: IkBaNFkB_nucleus -> NFkB_nucleus
2. loc_an: IkBaNFkB_nucleus -> IkBaNFkB_cytoplasm
3. deg_an: IkBaNFkB_c

In [18]:
s = StoichiometryMatrix(simple)
s.isConsistent()

False

In [24]:
col_of_interest = s.stoichiometry_matrix.T['IkBa_cytoplasm']
nonzero_reactions = col_of_interest[col_of_interest.to_numpy().nonzero()[0]]

In [34]:
s.stoichiometry_matrix[nonzero_reactions.index[3]]

IkBaIKK               0.0
IkBbNFkB_nucleus      0.0
IkBe_nucleus          0.0
IKK                   0.0
IkBb_nucleus          0.0
IkBbIKK               0.0
IkBeIKK               0.0
IkBeIKKNFkB           0.0
IkBa_cytoplasm       -1.0
IkBeNFkB_nucleus      0.0
IkBaNFkB_cytoplasm    0.0
IkBbNFkB_cytoplasm    0.0
IkBb_mRNA             0.0
IkBa_mRNA             0.0
IkBe_cytoplasm        0.0
IkBaNFkB_nucleus      0.0
IkBe_mRNA             0.0
IkBeNFkB_cytoplasm    0.0
NFkB_nucleus          0.0
IkBbIKKNFkB           0.0
IkBa_nucleus          1.0
NFkB_cytoplasm        0.0
IkBb_cytoplasm        0.0
IkBaIKKNFkB           0.0
Name: loc_a, dtype: float64

In [6]:
# LP only
# suppress warnings; 
import warnings
warnings.filterwarnings('ignore')

simple = SimpleSBML()
count = 0
lp_start = time.time()
for file in files:
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(data_dir, file))
    s = StoichiometryMatrix(simple)
    num_reactions = s.stoichiometry_matrix.shape[1]
    results.at[file, NUM_REACTIONS] = num_reactions
    if num_reactions:
      consistent = s.isConsistent()
    else:
      consistent = -1
    results.at[file, LP_ERROR] = 1 - int(consistent)
  except:
    results.at[file, LP_ERROR] = -1
lp_end = time.time()
lp_time = lp_end - lp_start
print("Analysis finished!")
print("LP time:", lp_time)

we are analyzing Model number: 100
we are analyzing Model number: 200
we are analyzing Model number: 300
we are analyzing Model number: 400
we are analyzing Model number: 500
we are analyzing Model number: 600
we are analyzing Model number: 700
we are analyzing Model number: 800
Analysis finished!
LP time: 53.225467920303345


In [7]:
lp_results = results[results[LP_ERROR] == 1]
len(lp_results)
print("(Mean) ISS for LP is:", np.mean(lp_results[NUM_REACTIONS]))
print("(STD) ISS for LP is:", np.std(lp_results[NUM_REACTIONS]))

(Mean) ISS for LP is: 53.11392405063291
(STD) ISS for LP is: 90.84825118201053


In [8]:
len(results[results[LP_ERROR]==1])

158

In [20]:
results[results[LP_ERROR]==-1]

Unnamed: 0,num_reactions(nonbdry),lp_error,games_error,gamespp_error,type1_error,type2_error,canceling_error,echelon_error,type3_error
BIOMD0000000596.xml,0,-1,-1,0,0,0,0,0,0
BIOMD0000000794.xml,0,-1,-1,0,0,0,0,0,0
BIOMD0000000786.xml,0,-1,-1,0,0,0,0,0,0
BIOMD0000000094.xml,0,-1,-1,0,0,0,0,0,0
BIOMD0000000830.xml,0,-1,-1,0,0,0,0,0,0


In [14]:
# simple bGAMES only
simple = SimpleSBML()
count = 0
games_start = time.time()
for file in files:
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(data_dir, file))
    m = GAMES_PP(simple)
    if simple.reactions:
      res = m.analyze(simple_games=True, error_details=False, suppress_message=True)
      results.at[file, GAMES_ERROR] = int(res)
      if res:
        gr = GAMESReport(m)
        summary = m.error_summary
        if m.type_one_errors:
          results.at[file, TYPEI_ERROR] = len(m.type_one_errors)
          report, error_num = gr.reportTypeOneError(m.type_one_errors, explain_details=True)
        if m.type_two_errors:
          results.at[file, TYPEII_ERROR] = len(m.type_two_errors)
          report, error_num = gr.reportTypeTwoError(m.type_two_errors, explain_details=True)
  except:
    results.at[file, GAMES_ERROR] = -1   
games_end = time.time()
games_time = games_end - games_start
print("Analysis finished!")
print("GAMES time:", games_time)

we are analyzing Model number: 100
we are analyzing Model number: 200
we are analyzing Model number: 300
we are analyzing Model number: 400
we are analyzing Model number: 500
we are analyzing Model number: 600
we are analyzing Model number: 700
we are analyzing Model number: 800
Analysis finished!
GAMES time: 124.9515151977539


In [42]:
print("number of detected errors: ", len(results[results[GAMES_ERROR]==1]))
print("number of simple GAMES but not in LP", len(results[(results[GAMES_ERROR]==1) & (results[LP_ERROR]!=1)]))

number of detected errors:  123
number of simple GAMES but not in LP 0


In [16]:
123/158

0.7784810126582279

In [17]:
# GAMES+
# file, GAMES_ERROR coding:
# 0; normal - no error found
# -1; not loaded or error found
# 1; normal - error found
# 2; echelon error found, but it is not explainable
# 3; type III error found, but it is not explainable
simple = SimpleSBML()
count = 0
gamespp_start = time.time()
for file in files:
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  try:
    simple.initialize(os.path.join(data_dir, file))
    m = GAMES_PP(simple)
    if simple.reactions:
      res = m.analyze(simple_games=False, error_details=False, suppress_message=True)
      results.at[file, GAMESPP_ERROR] = int(res)
      if res:
#         if m.echelon_errors or m.type_three_errors:
#           try:
#             #k = inv(m.lower)
#             k = np.linalg.inv(m.lower)
#           except:
#             print("model %s has as a singular L matrix:" % file)
#         condition_number = np.linalg.cond(m.lower)
#         if condition_number > 300:
#           print("*****The L matrix of the model %s has a condition number %f*****" % (file, condition_number))
        gr = GAMESReport(m)
        summary = m.error_summary
        if m.type_one_errors:
          results.at[file, TYPEI_ERROR] = len(m.type_one_errors)
          report, error_num = gr.reportTypeOneError(m.type_one_errors, explain_details=True)
        if m.type_two_errors:
          results.at[file, TYPEII_ERROR] = len(m.type_two_errors)
          report, error_num = gr.reportTypeTwoError(m.type_two_errors, explain_details=True)
        if m.canceling_errors:
          results.at[file, CANCELING_ERROR] = len(m.canceling_errors)
          report, error_num = gr.reportCancelingError(m.canceling_errors, explain_details=True)
        if m.echelon_errors:
          #print("Model %s has an echelon error:" % file)
          results.at[file, ECHELON_ERROR] = len(m.echelon_errors)
          report, error_num = gr.reportEchelonError(m.echelon_errors, explain_details=True)
          if report is False:
            results.at[file, GAMESPP_ERROR] = 2
#             print("Model %s has an unexplainable Echelon Error" % file)
#             print("As the lower matrix has a condition number %f" % condition_number)
#             print("Decide if the matrix is invertible")
        if m.type_three_errors:
          #print("Model %s has a type III error:" % file)
          results.at[file, TYPEIII_ERROR] = len(m.type_three_errors)
          report, error_num = gr.reportTypeThreeError(m.type_three_errors, explain_details=True)
          if report is False:
            results.at[file, GAMESPP_ERROR] = 3
#             print("Model %s has an unexplainable Type III Error" % file)
#             print("As the lower matrix has a condition number %f" % condition_number)
#             print("Decide if the matrix is invertible")
  except:
    results.at[file, GAMES_ERROR] = -1   
gamespp_end = time.time()
gamespp_time = gamespp_end - gamespp_start
print("\nAnalysis finished!")
print("GAMES++ time:", gamespp_time)

we are analyzing Model number: 100
we are analyzing Model number: 200
we are analyzing Model number: 300
we are analyzing Model number: 400
we are analyzing Model number: 500
we are analyzing Model number: 600
we are analyzing Model number: 700
we are analyzing Model number: 800

Analysis finished!
GAMES++ time: 216.51195526123047


In [43]:
print("number of detected errors: ", len(results[results[GAMESPP_ERROR]==1]))
print("number of extended GAMES errors not in LP", len(results[(results[GAMESPP_ERROR]==1) & (results[LP_ERROR]!=1)]))
len(results[results[GAMESPP_ERROR]==-1])

number of detected errors:  150
number of extended GAMES errors not in LP 0


0

In [44]:
len(results[results[GAMESPP_ERROR]==3])

3

In [45]:
results[results[GAMES_ERROR]==-1]

Unnamed: 0,num_reactions(nonbdry),lp_error,games_error,gamespp_error,type1_error,type2_error,canceling_error,echelon_error,type3_error
BIOMD0000000596.xml,0,-1,-1,0,0,0,0,0,0
BIOMD0000000794.xml,0,-1,-1,0,0,0,0,0,0
BIOMD0000000786.xml,0,-1,-1,0,0,0,0,0,0
BIOMD0000000094.xml,0,-1,-1,0,0,0,0,0,0
BIOMD0000000830.xml,0,-1,-1,0,0,0,0,0,0


In [41]:
150/158

0.9493670886075949

In [17]:
# Finally, model statistics
MODEL_ID = "model_id"
NUM_TOTAL_REACTIONS = "num_total_reactions"
NUM_BDRY_REACTIONS = "num_bdry_reactions"
NUM_UNIUNI_REACTIONS = "num_uniuni_reactions"
NUM_UMMU_REACTIONS = "num_ummu_reactions"
NUM_MULTIMULTI_REACTIONS = "num_multimulti_reactions"
# exceptions for model
EXCEPTIONS = ["BIOMD0000000094.xml",
              "BIOMD0000000596.xml",
             "BIOMD0000000786.xml",
             "BIOMD0000000794.xml",
             "BIOMD0000000830.xml"]
# Checking all models to calculate the number of reactions per type 
count = 0
simple = SimpleSBML()
res_list = []
false_errors = set()
for file in files:
  if file in EXCEPTIONS:
    continue
  simple.initialize(os.path.join(data_dir, file))
  num_uniuni = 0
  num_ummu = 0
  num_multimulti = 0
  num_bdry = 0
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  simple.initialize(os.path.join(data_dir, file))
  num_dic = {cn.REACTION_1_1: 0,
             cn.REACTION_1_n: 0,
             cn.REACTION_n_1: 0,
             cn.REACTION_n_n: 0,
             cn.REACTION_BOUNDARY: 0
            }  
  for reaction in simple.reactions:
    for category in num_dic.keys():
      if reaction.category == category:
        num_dic[category] += 1
  res_dic = dict({MODEL_ID: file[:-8],
                  NUM_TOTAL_REACTIONS: len(simple.reactions),
                  NUM_BDRY_REACTIONS: num_dic[cn.REACTION_BOUNDARY],
                  NUM_UNIUNI_REACTIONS: num_dic[cn.REACTION_1_1],
                  NUM_UMMU_REACTIONS: num_dic[cn.REACTION_1_n] + num_dic[cn.REACTION_n_1],
                  NUM_MULTIMULTI_REACTIONS: num_dic[cn.REACTION_n_n]})
  sum = res_dic[NUM_BDRY_REACTIONS] + res_dic[NUM_UNIUNI_REACTIONS] + res_dic[NUM_UMMU_REACTIONS] + res_dic[NUM_MULTIMULTI_REACTIONS]
  if sum != res_dic[NUM_TOTAL_REACTIONS]:
    print("Error! The sum doens't match with model %s" % file)
  res_list.append(res_dic)

# summarize the results
stats_df = pd.DataFrame(res_list)
reaction_num_cat = [NUM_UNIUNI_REACTIONS,
                    NUM_UMMU_REACTIONS,
                    NUM_MULTIMULTI_REACTIONS
]
reaction_prop = [(stats_df[x]/stats_df[NUM_TOTAL_REACTIONS]).mean()*100
                 for x in
                 reaction_num_cat]
ext_biomodels_reaction_prop = pd.Series(reaction_prop, index=reaction_num_cat)
ext_biomodels_reaction_prop

we are analyzing Model number: 100
we are analyzing Model number: 200
we are analyzing Model number: 300
we are analyzing Model number: 400
we are analyzing Model number: 500
we are analyzing Model number: 600
we are analyzing Model number: 700
we are analyzing Model number: 800


num_uniuni_reactions        32.582602
num_ummu_reactions          21.681572
num_multimulti_reactions     7.942943
dtype: float64

In [16]:
EXCEPTIONS

['BIOMD0000000094.xml',
 'BIOMD0000000596.xml',
 'BIOMD0000000786.xml',
 'BIOMD0000000794.xmlBIOMD0000000830.xml']