In [3]:
from games_setup import *
import SBMLLint.common.constants as cn
from SBMLLint.common.reaction import Reaction
from SBMLLint.common.stoichiometry_matrix import StoichiometryMatrix
from SBMLLint.games.som import SOM
from SBMLLint.games.mesgraph import MESGraph
import collections
import tesbml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Current Directory: /Users/woosubshin/Desktop/ModelEngineering/SBMLLint/notebook


In [4]:
data_dir=cn.DATA_DIR
files = [f for f in os.listdir(data_dir) if f[-4:] == ".xml"]
paths = [os.path.join(data_dir, filename) for filename in files]

In [5]:
# statistics columns
NUM_REACTIONS = "non_bdry_reactions"
LP_ERROR = "lp_error"
MESGRAPH_ERROR = "mesgraph_error"
TYPE_I = "type1"
TYPE_II = "type2"
result_columns = [NUM_REACTIONS, LP_ERROR, MESGRAPH_ERROR, TYPE_I, TYPE_II]

In [6]:
results = pd.DataFrame(0, index=files, columns=result_columns)
results[:5]

Unnamed: 0,non_bdry_reactions,lp_error,mesgraph_error,type1,type2
BIOMD0000000199_url.xml,0,0,0,0,0
BIOMD0000000189_url.xml,0,0,0,0,0
BIOMD0000000387_url.xml,0,0,0,0,0
BIOMD0000000397_url.xml,0,0,0,0,0
BIOMD0000000413_url.xml,0,0,0,0,0


In [7]:
# Checking all graphs 
simple = SimpleSBML()
for file in files:
  try:
    simple.initialize(os.path.join(data_dir, file))
    m = MESGraph(simple)
    s = StoichiometryMatrix(simple)
    num_reactions = s.stoichiometry_matrix.shape[1]
    results.at[file, NUM_REACTIONS] = num_reactions
    if num_reactions:
      consistent = s.isConsistent()
    else:
      consistent = -1
    results.at[file, LP_ERROR] = 1 - int(consistent)
    if simple.reactions:
      m.analyze(simple.reactions, error_details=False)
      results.at[file, TYPE_I] = len(m.type_one_errors)
      results.at[file, TYPE_II] = len(m.type_two_errors)
      results.at[file, MESGRAPH_ERROR] = len(m.type_one_errors) + len(m.type_two_errors)
  except:
    results.at[file, NUM_REACTIONS] = -1
    results.at[file, LP_ERROR] = -1
    results.at[file, MESGRAPH_ERROR] = -1    

In [8]:
results.head()

Unnamed: 0,non_bdry_reactions,lp_error,mesgraph_error,type1,type2
BIOMD0000000199_url.xml,10,0,0,0,0
BIOMD0000000189_url.xml,13,1,4,4,0
BIOMD0000000387_url.xml,4,0,0,0,0
BIOMD0000000397_url.xml,29,0,0,0,0
BIOMD0000000413_url.xml,5,1,1,1,0


In [9]:
print("Total models: ", len(results[results[LP_ERROR]!=-1]))
print("Number of Errors by LP:", len(results[results[LP_ERROR]==1]))
print("Number of Errors by MESGraph:", len(results[results[MESGRAPH_ERROR]>0]))
print("All MESGraph errors included in LP Errors...")
# len((results[(results[LP_ERROR]==0) & (results[MESGRAPH_ERROR]>0)]))
# len((results[(results[LP_ERROR]==1) & (results[MESGRAPH_ERROR]==0)]))

Total models:  724
Number of Errors by LP: 150
Number of Errors by MESGraph: 110
All MESGraph errors included in LP Errors...


In [10]:
# task 1. logistic regression: num_non_bdry_reactions with LP error
# task 2. scatter(or linear afterwards): num_non_bdry_reactions with MESGraph Error

In [15]:
type_two_errors = results[(results[LP_ERROR]==1) & (results[MESGRAPH_ERROR]==0)]
print(len(type_two_errors))
type_two_errors.index

40


Index(['BIOMD0000000471_url.xml', 'BIOMD0000000468_url.xml',
       'BIOMD0000000190_url.xml', 'BIOMD0000000572_url.xml',
       'BIOMD0000000051_url.xml', 'BIOMD0000000479_url.xml',
       'BIOMD0000000469_url.xml', 'BIOMD0000000175_url.xml',
       'BIOMD0000000247_url.xml', 'BIOMD0000000049_url.xml',
       'BIOMD0000000143_url.xml', 'BIOMD0000000470_url.xml',
       'BIOMD0000000235_url.xml', 'BIOMD0000000052_url.xml',
       'BIOMD0000000503_url.xml', 'BIOMD0000000426_url.xml',
       'BIOMD0000000105_url.xml', 'BIOMD0000000167_url.xml',
       'BIOMD0000000472_url.xml', 'BIOMD0000000245_url.xml',
       'BIOMD0000000496_url.xml', 'BIOMD0000000473_url.xml',
       'BIOMD0000000497_url.xml', 'BIOMD0000000248_url.xml',
       'BIOMD0000000565_url.xml', 'BIOMD0000000364_url.xml',
       'BIOMD0000000070_url.xml', 'BIOMD0000000232_url.xml',
       'BIOMD0000000467_url.xml', 'BIOMD0000000383_url.xml',
       'BIOMD0000000453_url.xml', 'BIOMD0000000112_url.xml',
       'BIOMD0000000217_

In [35]:
import networkx as nx
som_example = list(m.nodes)[-1]
PathNodesReactions = collections.namedtuple('PathNodesReactions',
    'node1 node2 reactions')
def getSOMPath(som, mole1, mole2):
  """
  Create an undirected graph between
  two molecules within a SOM
  and find the shortest path
  :param SOM som:
  :param str mole1:
  :param str mole2:
  :return PathNodesReactions result_data:
  """   
  molecule1 = simple.getMolecule(mole1).name
  moelcule2 = simple.getMolecule(mole2).name
  # construct undirected graph
  subg = nx.Graph()
  # here, every reaction is 1-1 reaction
  for reaction in list(som.reactions):
    print(reaction.makeIdentifier(is_include_kinetics=False))
    node1 = reaction.reactants[0].molecule.name
    node2 = reaction.products[0].molecule.name
    if subg.has_edge(node1, node2):
      reaction_label = subg.get_edge_data(node1, node2)[cn.REACTION]
      # if reaction.label is not already included in the attribute,
      if reaction.label not in set(reaction_label):
        reaction_label = reaction_label + [reaction.label]
    else:
      reaction_label = [reaction.label]    
    subg.add_edge(node1, node2, reaction=reaction_label)
  print(list(subg.edges))
  path = [p for p in nx.shortest_path(subg, 
                                      source=mole1, 
                                      target=mole2)]
  # if result has more than 1 element need a for loop
  print("We found the shortest path from " + mole1 + " to " + mole2)
  result_data = []
  for idx in range(len(path)-1):
    print(path[idx] + "=" + path[idx+1], end=" ")
    edge_reactions = subg.get_edge_data(path[idx], path[idx+1])[cn.REACTION]
    print("by reaction(s)", edge_reactions)
    result_data.append(PathNodesReactions(node1=path[idx], 
                                          node2=path[idx+1],
                                          reactions=edge_reactions))
  return result_data

In [131]:
som_path = getSOMPath(som_example, 's260', 's270')
type(som_path)

r58: s36 -> s232
re65: s260 -> s232
re64: s270 -> s232
[('s36', 's232'), ('s232', 's260'), ('s232', 's270')]
We found the shortest path from s260 to s270
s260=s232 by reaction(s) ['re65']
s232=s270 by reaction(s) ['re64']


list

In [134]:
for pat in som_path:

  print(pat.node1 + " = " + pat.node2 + " by", end=" ")
  for r in pat.reactions:
    reaction = simple.getReaction(r)
    print(reaction.makeIdentifier(is_include_kinetics=False))

s260 = s232 by re65: s260 -> s232
s232 = s270 by re64: s270 -> s232


In [124]:
print(subg.edges)
print(subg.has_edge('s260', 's232'))
print(subg.has_edge('s232', 's260'))

[('s36', 's232'), ('s232', 's260'), ('s232', 's270')]
True
True


In [87]:
som = som_example
mole1 = 's260'
mole2 = 's270'
molecule1 = simple.getMolecule(mole1).name
moelcule2 = simple.getMolecule(mole2).name
# construct undirected graph
subg = nx.Graph()
# subg.add_nodes_from(som.molecules)
# here, every reaction is 1-1 reaction
for reaction in list(som.reactions):
  print(reaction.makeIdentifier(is_include_kinetics=False))
  node1 = reaction.reactants[0].molecule.name
  node2 = reaction.products[0].molecule.name
  if subg.has_edge(node1, node2):
    reaction_label = subg.get_edge_data(node1, node2)[cn.REACTION]
    # if reaction.label is not already included in the attribute,
    if reaction.label not in set(reaction_label):
      reaction_label = reaction_label + [reaction.label]
  else:
    reaction_label = [reaction.label]    
  subg.add_edge(node1, node2, reaction=reaction_label)


r58: s36 -> s232
re65: s260 -> s232
re64: s270 -> s232


In [11]:
import tesbml
for error_path in error_paths:
  document = tesbml.readSBML(error_path)
  model = document.getModel()
  #pm.print_model(model)
  try:
    simple.initialize(error_path)
  except:
    print(error_path)
    print("showed error")

/Users/woosubshin/Desktop/ModelEngineering/SBMLLint/data/BIOMD0000000596_url.xml
showed error
/Users/woosubshin/Desktop/ModelEngineering/SBMLLint/data/MODEL0568648427_url.xml
showed error
/Users/woosubshin/Desktop/ModelEngineering/SBMLLint/data/BIOMD0000000410_url.xml
showed error
/Users/woosubshin/Desktop/ModelEngineering/SBMLLint/data/BIOMD0000000081_url.xml
showed error
/Users/woosubshin/Desktop/ModelEngineering/SBMLLint/data/BIOMD0000000075_url.xml
showed error
/Users/woosubshin/Desktop/ModelEngineering/SBMLLint/data/BIOMD0000000094_url.xml
showed error
/Users/woosubshin/Desktop/ModelEngineering/SBMLLint/data/BIOMD0000000353_url.xml
showed error
/Users/woosubshin/Desktop/ModelEngineering/SBMLLint/data/BIOMD0000000627_url.xml
showed error
/Users/woosubshin/Desktop/ModelEngineering/SBMLLint/data/MODEL0072364382_url.xml
showed error


In [7]:
#pm.print_model(model)
for rct in model.getReaction(0).getListOfReactants():
  print(rct.getSpecies())
  print("similar, ", rct.species)
  print(rct.stoichiometry)
  print("similar, ", rct.getStoichiometry())
print("rct done! now pdt")
for pdt in model.getReaction(0).getListOfProducts():
  print(pdt.getSpecies())
  print(pdt.stoichiometry)
  print("similar, ", pdt.getStoichiometry())
pm.print_model(model)

model.getReaction(3).getId()
[rct.stoichiometry for r in model.getReaction(3).getListOfReactants() \
if rct.species != "haha"]
#r = Reaction(model.getReaction(3))
#r.category

rct done! now pdt
s51
1.0
similar,  1.0
<Model MODEL1508180000 "Philipson2015 - Innate immune response modulated by NLRX1">
MyD88_a_HP:  -> s51;
MyD88_d: s51 -> ;
TRAF_d: s7 -> ;
NFkB_a1:  -> s4;
NFkB_d: s4 -> ;
CytoL_a:  -> s34;
CytoL_d: s34 -> ;
NLRX1_d: s49 -> ;
MyD88_a:  -> s51;
HP_a:  -> s18;
HP_CytoL: s18 -> ;
NLRX1_TF:  -> s49;
NFkB_a2:  -> s4;
RIG_a_HP:  -> RIG;
RIG_d: RIG -> ;
MAVS_a:  -> MAVS;
MAVS_d: MAVS -> ;
IRF_a1:  -> IRF;
IRF_a2:  -> IRF;
IRF_a3:  -> IRF;
IRF_d: IRF -> ;
IFN_a:  -> IFN;
IFN_d: IFN -> ;
NOD_a:  -> NOD1;
NOD_d: NOD1 -> ;
HP_IFN: s18 -> ;
TRAF_a:  -> s7;
TFa_a:  -> TFa;
TFa_d: TFa -> ;
TFi_a:  -> TFi;
TFi_d: TFi -> ;
CytoE_a:  -> CytoE;
CytoE_d: CytoE -> ;
X_a:  -> X;
X_d: X -> ;
HP_CytoE: s18 -> ;


[]

In [10]:
model.getLgetListOfReactants()

AttributeError: getListOfReactants

In [56]:
REACTION_1_1 = "reaction_1_1"
REACTION_n_1 = "reaction_n_1"
REACTION_1_n = "reaction_1_n"
REACTION_n_n = "reaction_n_n"
REACTION_BOUNDARY = "reaction_boundary"
ReactionCategory = collections.namedtuple('ReactionCategory',
    'category predicate')
REACTION_CATEGORIES = [
    ReactionCategory(category=REACTION_1_1,
        predicate=lambda x,y,z,w: (x==1) and (y==1) and (z==w)),
    ReactionCategory(category=REACTION_1_n,
        predicate=lambda x,y,z,w: ((x==1) and (y>1) and (z==1.00)) or ((x==1) and (y==1) and (z<w))),
    ReactionCategory(category=REACTION_n_1,
        predicate=lambda x,y,z,w: ((x>1) and (y==1) and (w==1.00)) or ((x==1) and (y==1) and (z>w))),
    ReactionCategory(category=REACTION_n_n,
        predicate=lambda x,y,z,w: ((x>1) and (y>1)) or ((x==1) and (y>1) and (z!=1.00)) or ((x>1) and (y==1) and (w!=1.00))),
    ReactionCategory(category=REACTION_BOUNDARY,
        predicate=lambda x,y,z,w: (x==0) or (y==0) or (z==0) or (w==0)),
    ]

In [57]:
for reaction in model.getListOfReactions():
  num_rct = len([r.species for r in reaction.getListOfReactants()])
  num_pdt = len([p.species for p in reaction.getListOfProducts()])
  stoi_rct = sum([r.stoichiometry for r in reaction.getListOfReactants()])
  stoi_pdt = sum([p.stoichiometry for p in reaction.getListOfProducts()])
  print("x =", num_rct, ";y =", num_pdt, ";z =", stoi_rct, ";w =", stoi_pdt)
  for reaction_category in REACTION_CATEGORIES:
    if reaction_category.predicate(num_rct, num_pdt, 
                                 stoi_rct, stoi_pdt):
      print(reaction_category.category)

x = 3 ;y = 1 ;z = 4.0 ;w = 2.0
reaction_n_n
x = 3 ;y = 1 ;z = 4.0 ;w = 1.5
reaction_n_n
x = 1 ;y = 1 ;z = 1.0 ;w = 1.0
reaction_1_1
x = 1 ;y = 1 ;z = 1.0 ;w = 1.0
reaction_1_1


In [49]:
pm.print_model(model)

<Model "Arnold2011_Damour2007_RuBisCO-CalvinCycle">
PGA_prod_Vc: RuBP + CO2 + NADPH -> PGA;
PGA_prod_Vo: RuBP + O2 + NADPH -> PGA;
PGA_cons: PGA -> RuBP;
NADPH_prod: NADP -> NADPH;


In [51]:
x = 3 ;y = 1 ;z = 4.0 ;w = 2.0
((x>1) and (y==1) and (w!=1.00))

True

In [1]:
def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        return '{p:.1f}%  ({v:d})'.format(p=pct,v=val)
    return my_autopct

labels = ['Mass Balance Issue', 
          'No Mass Balance Issue', 
          'Only Boundary Reactions', 'Not Loaded']
sizes = [146, 365, 209, 9]
explode = (0, 0.0, 0, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')
colors = ['orangered', 'green', 'lightgrey', 'black']
fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct=make_autopct(sizes),
        colors=colors, shadow=True, startangle=50)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()

NameError: name 'plt' is not defined

In [2]:
plt.pie(sizes, labels=labels, colors=colors, 
        autopct=make_autopct(sizes), startangle=70, radius=1.3)
txt = "[Analysis of Mass Balance for BioModels]"
plt.figtext(0.5, 0.01, txt, wrap=True, horizontalalignment='center', fontsize=12)

NameError: name 'plt' is not defined