In [1]:
from games_setup import *
from SBMLLint.common import constants as cn
from SBMLLint.common.molecule import Molecule, MoleculeStoichiometry
from SBMLLint.common.reaction import Reaction
from SBMLLint.games.som import SOM
from SBMLLint.common.simple_sbml import SimpleSBML

import collections
import copy
import itertools
import networkx as nx
import numpy as np
import pandas as pd
#
from SBMLLint.common.stoichiometry_matrix import StoichiometryMatrix
from SBMLLint.games.mesgraph import MESGraph
from SBMLLint.games.games_pp import GAMES_PP, SOMStoichiometry, SOMReaction, TOLERANCE
from SBMLLint.games.games_report import GAMESReport, SimplifiedReaction

Current Directory: /Users/woosubs/Desktop/ModelEngineering/SBMLLint/SBMLLint/notebooks


In [2]:
# model files that were not compatible with libsbml
EXCEPTIONS = ["BIOMD0000000094.xml",
              "BIOMD0000000596.xml",
              "BIOMD0000000786.xml",
              "BIOMD0000000794.xml",
              "BIOMD0000000830.xml",
             ]
data_dir=cn.BIOMODELS_DIR
files = [f for f in os.listdir(cn.BIOMODELS_DIR) if f[-4:] == ".xml"]
net_files = [f for f in files if f not in EXCEPTIONS]
print(len(net_files))

826


In [3]:
# model statistics columns
MODEL_ID = "model_id"
NUM_TOTAL_REACTIONS = "num_total_reactions"
NUM_BDRY_REACTIONS = "num_bdry_reactions"
NUM_UNIUNI_REACTIONS = "num_uniuni_reactions"
NUM_UMMU_REACTIONS = "num_ummu_reactions"
NUM_MULTIMULTI_REACTIONS = "num_multimulti_reactions"
#

In [4]:
# Checking all graphs 
count = 0
simple = SimpleSBML()
res_list = []
false_errors = set()
for file in net_files:
  num_uniuni = 0
  num_ummu = 0
  num_multimulti = 0
  num_bdry = 0
  count += 1
  if (count%100)==0:
    print("we are analyzing Model number:", count)
  simple.initialize(os.path.join(data_dir, file))
  num_dic = {cn.REACTION_1_1: 0,
             cn.REACTION_1_n: 0,
             cn.REACTION_n_1: 0,
             cn.REACTION_n_n: 0,
             cn.REACTION_BOUNDARY: 0
            }  
  for reaction in simple.reactions:
    for category in num_dic.keys():
      if reaction.category == category:
        num_dic[category] += 1
  res_dic = dict({MODEL_ID: file[:-4],
                  NUM_TOTAL_REACTIONS: len(simple.reactions),
                  NUM_BDRY_REACTIONS: num_dic[cn.REACTION_BOUNDARY],
                  NUM_UNIUNI_REACTIONS: num_dic[cn.REACTION_1_1],
                  NUM_UMMU_REACTIONS: num_dic[cn.REACTION_1_n] + num_dic[cn.REACTION_n_1],
                  NUM_MULTIMULTI_REACTIONS: num_dic[cn.REACTION_n_n]})
  total_reactions = res_dic[NUM_BDRY_REACTIONS] + res_dic[NUM_UNIUNI_REACTIONS] + res_dic[NUM_UMMU_REACTIONS] + res_dic[NUM_MULTIMULTI_REACTIONS]
  if total_reactions != res_dic[NUM_TOTAL_REACTIONS]:
    print("Error! The sum doens't match with model %s" % file)
  res_list.append(res_dic)

we are analyzing Model number: 100
we are analyzing Model number: 200
we are analyzing Model number: 300
we are analyzing Model number: 400
we are analyzing Model number: 500
we are analyzing Model number: 600
we are analyzing Model number: 700
we are analyzing Model number: 800


In [5]:
res_df = pd.DataFrame(res_list)
res_df.head()

Unnamed: 0,model_id,num_total_reactions,num_bdry_reactions,num_uniuni_reactions,num_ummu_reactions,num_multimulti_reactions
0,BIOMD0000000634,112,0,24,37,51
1,BIOMD0000000152,120,0,5,115,0
2,BIOMD0000000146,34,0,25,9,0
3,BIOMD0000000620,20,20,0,0,0
4,BIOMD0000000608,140,44,15,45,36


In [6]:
len(res_df[NUM_TOTAL_REACTIONS])

826

In [7]:
def printNumStatistics(category=NUM_UNIUNI_REACTIONS, df=res_df):
  category_num = res_df[category]
  denom = res_df[NUM_TOTAL_REACTIONS]
  n_models = len(denom)
  print("Average number of %s: %.01f" % (category, np.mean(category_num))
       )
  print("se Number of %s: %.01f" % (category,
                                 np.std(category_num) / np.sqrt(n_models)
                                )
       )
  print("Average fraction of %s: %.03f" % (category,
                                       np.mean( category_num/denom )
                                       )
       )
  print("se of fraction, %s: %.02f" % (category,
                                    np.std(category_num/denom) / np.sqrt(n_models)
                                   )
       )

In [8]:
printNumStatistics(category=NUM_UNIUNI_REACTIONS)

Average number of num_uniuni_reactions: 10.6
se Number of num_uniuni_reactions: 1.4
Average fraction of num_uniuni_reactions: 0.326
se of fraction, num_uniuni_reactions: 0.01


In [9]:
printNumStatistics(category=NUM_UMMU_REACTIONS)

Average number of num_ummu_reactions: 11.6
se Number of num_ummu_reactions: 1.7
Average fraction of num_ummu_reactions: 0.217
se of fraction, num_ummu_reactions: 0.01


In [10]:
printNumStatistics(category=NUM_MULTIMULTI_REACTIONS)

Average number of num_multimulti_reactions: 4.0
se Number of num_multimulti_reactions: 0.8
Average fraction of num_multimulti_reactions: 0.079
se of fraction, num_multimulti_reactions: 0.01


In [11]:
printNumStatistics(category=NUM_BDRY_REACTIONS)

Average number of num_bdry_reactions: 6.4
se Number of num_bdry_reactions: 0.4
Average fraction of num_bdry_reactions: 0.378
se of fraction, num_bdry_reactions: 0.01


In [12]:
printNumStatistics(category=NUM_TOTAL_REACTIONS)

Average number of num_total_reactions: 32.6
se Number of num_total_reactions: 2.9
Average fraction of num_total_reactions: 1.000
se of fraction, num_total_reactions: 0.00


In [13]:
0.326 + 0.217 + 0.079 + 0.378

1.0

In [14]:
print("Number of models with no boundary reactions", len(res_df[res_df[NUM_BDRY_REACTIONS]==0]))

Number of models with no boundary reactions 383


In [15]:
print("Number of models with no uniuni reactions", len(res_df[res_df[NUM_UNIUNI_REACTIONS]==0]))

Number of models with no uniuni reactions 280


In [16]:
print("Number of models with no multimulti reactions", len(res_df[res_df[NUM_MULTIMULTI_REACTIONS]==0]))

Number of models with no multimulti reactions 655


In [17]:
print("Number of models with no UMMU reactions", len(res_df[res_df[NUM_UMMU_REACTIONS]==0]))

Number of models with no UMMU reactions 417
