# Structured Names
This notebook describes how to use structured names to check SBML models for moiety balance.

In [1]:
import init
from SBMLLint.common import constants as cn
from SBMLLint.common.molecule import Molecule
from SBMLLint.common import simple_sbml
from SBMLLint.common.reaction import Reaction
from SBMLLint.tools import sbmllint
from SBMLLint.tools import print_reactions

import os
import pandas as pd
import tellurium as te

In [2]:
from SBMLLint.common.simple_sbml import modelIterator

In [7]:
iterator = modelIterator(initial=19, final=25)
for item in iterator:
    print(item.filename)

BIOMD0000000241_url.xml
BIOMD0000000350_url.xml
MODEL0847712949_url.xml
BIOMD0000000346_url.xml
BIOMD0000000256_url.xml
BIOMD0000000402_url.xml


In [4]:
import init
from SBMLLint.tools import sbmllint

# The first step in glycolysis, expressed
# in the Antimony language.
model = """
Glu + A__P_3 -> Glu_P + A_P_P; 1
Glu = 0
A_P_P_P = 0
Glu_P = 0
A_P_P = 0
"""
_ = sbmllint.lint(model)


0 of 1 reactions have imbalances.



In [5]:
# The first step in glycolysis, with an error (wrongly goes to A instead of A_P_P)
model = """
Glu + A_P_P_P -> Glu_P + A_P; 1
"""
_ = sbmllint.lint(model)


1 of 1 reactions have imbalances.

***_J0: Glu + A_P_P_P -> Glu_P + A_P
Excess moieties in reactants
  P: 1.00




## Sturctured Names in BioModels

Find models that already use "\_" as a name separator

In [6]:
sbmliter = simple_sbml.modelIterator(final=200)
for item in sbmliter:
    simple = simple_sbml.SimpleSBML()
    simple.initialize(item.model)
    for reaction in simple.reactions:
        m_ss = set(reaction.reactants).union(reaction.products)
        if any([cn.MOIETY_SEPARATOR in m.molecule.name for m in m_ss]):
            path = os.path.join(cn.DATA_DIR, item.filename)
            print("\n\n***%s\n" % item.filename)
            #print(molecules)
            print_reactions.prettyPrint(path, is_include_kinetics=False)
            break
            



***BIOMD0000000198_url.xml

r1fast: NO + sGCfast -> NO_sGCfast
r2fast: NO_sGCfast -> NO_sGCfast_6coord
r3fast: NO_sGCfast_6coord -> NO_sGCfast_5coord
r1slow: NO + sGCslow -> NO_sGCslow
r2slow: NO_sGCslow -> NO_sGCslow_6coord
r3slow: NO + NO_sGCslow_6coord -> NO_sGCslow_6coord_NO_int
r4slow: NO_sGCslow_6coord_NO_int -> NO_sGCslow_5coord


***BIOMD0000000352_url.xml

reac_DIA: A + I -> D_IA
reac_DII: I + I -> D_II
reac_degrI: I -> 
reac_degrA: A -> 
reac_degrDIA: D_IA -> 
reac_degrDII: D_II -> 
reac_DIAxA: D_IA -> A
reac_DIIxI: D_II -> I
reac_degrR: R -> 
reac_prodI:  -> I
reac_prodA:  -> A
reac_prodR:  -> R


***BIOMD0000000570_url.xml

reaction_1:  -> species_1
reaction_2: 3.00 species_1 + species_2 -> species_3
reaction_3: 100.00 species_17 -> species_4
reaction_4: species_4 + species_2 -> 2.00 species_5 + species_3
reaction_5: species_5 + species_6 -> species_7 + species_9
reaction_6: species_3 + species_9 -> species_2 + species_8
reaction_7: species_8 + species_7 -> species_10 + s

Mdm2Synthesis: Mdm2_mRNA -> Mdm2_mRNA + Mdm2 + mdm2syn
Mdm2mRNASynthesis: p53 -> p53 + Mdm2_mRNA + Mdm2mRNAsyn
Mdm2mRNADegradation: Mdm2_mRNA -> Sink + Mdm2mRNAdeg
Mdm2Degradation: Mdm2 -> Sink + mdm2deg
p53Synthesis: Source -> p53 + p53syn
p53Degradation: Mdm2_p53 -> Mdm2 + p53deg
P53_Mdm2Binding: p53 + Mdm2 -> Mdm2_p53
P53_Mdm2Release: Mdm2_p53 -> p53 + Mdm2
DNAdamage:  -> damDNA + totdamDNA
DNArepair: damDNA -> Sink
ARFactivation: damDNA -> damDNA + ARF
ARF_Mdm2Binding: ARF + Mdm2 -> ARF_Mdm2
ARF_Mdm2Degradation: ARF_Mdm2 -> ARF + mdm2deg
ARFDegradation: ARF -> Sink


***BIOMD0000000140_url.xml

v1: NFkB + IkBalpha -> IkBalpha_NFkB
v2: NFkB + IkBbeta -> IkBbeta_NFkB
v3: NFkB + IkBeps -> IkBeps_NFkB
v4: NFkB + IKK_IkBalpha -> IKK_IkBalpha_NFkB
v5: IKK_IkBalpha_NFkB -> NFkB + IKK
v6: NFkB + IKK_IkBbeta -> IKK_IkBbeta_NFkB
v7: IKK_IkBbeta_NFkB -> NFkB + IKK
v8: NFkB + IKK_IkBeps -> IKK_IkBeps_NFkB
v9: IKK_IkBeps_NFkB -> NFkB + IKK
v10: IkBalpha_NFkB -> NFkB
v11: IkBbeta_NFkB -> NFkB
v1

R47: Grb2 + pSOS -> Grb2_pSOS
R48: Grb2_SOS + pShc -> Grb2_SOS_pShc
R49: pShc_pTrkA + Grb2_SOS -> Grb2_SOS_pShc_pTrkA
R50: pShc_pTrkA_endo + Grb2_SOS -> Grb2_SOS_pShc_pTrkA_endo
R51: pShc -> Shc
R52: Grb2_SOS_pShc -> Shc + Grb2_SOS
R53: pSOS -> SOS
R54: Grb2_pSOS -> Grb2_SOS
R55: pDok + RasGAP -> pDok_RasGAP
R56: pDok -> Dok
R57: Ras_GTP -> Ras_GDP
R58: Ras_GDP + Grb2_SOS_pShc_pTrkA -> Ras_GTP + Grb2_SOS_pShc_pTrkA
R59: SOS + dppERKcyt -> pSOS + dppERKcyt
R60: Grb2_SOS_pShc + dppERKcyt -> Grb2_pSOS + pShc + dppERKcyt
R61: Grb2_SOS + dppERKcyt -> Grb2_pSOS + dppERKcyt
R62: Ras_GTP + pDok_RasGAP -> Ras_GDP + pDok_RasGAP
R63: B_Raf_Ras_GTP + pDok_RasGAP -> Ras_GDP + B_Raf + pDok_RasGAP
R64: c_Raf_Ras_GTP + pDok_RasGAP -> Ras_GDP + c_Raf + pDok_RasGAP
R65: Crk + C3G -> Crk_C3G
R66: pFRS2_pTrkA + Crk_C3G -> Crk_C3G_pFRS2_pTrkA
R67: pFRS2_pTrkA_endo + Crk_C3G -> Crk_C3G_pFRS2_pTrkA_endo
R68: pFRS2 -> FRS2
R69: Rap1_GTP -> Rap1_GDP
R70: Rap1_GDP + Crk_C3G_pFRS2_pTrkA_endo -> Rap1_GTP + Crk_C3



***BIOMD0000000362_url.xml

R1: TF + VII -> TF_VII
R2: TF + VIIa -> TF_VIIa
R3: TF_VIIa + VII -> TF_VIIa + VIIa
R4: Xa + VII -> Xa + VIIa
R5: IIa + VII -> IIa + VIIa
R6: TF_VIIa + X -> TF_VIIa_X
R7: TF_VIIa + Xa -> TF_VIIa_Xa
R8: TF_VIIa + IX -> TF_VIIa_IX
R9: Xa + II -> Xa + IIa
R10: IIa + VIII -> IIa + VIIIa
R11: IXa + VIIIa -> IXa_VIIIa
R12: IXa_VIIIa + X -> IXa_VIIIa_X
R13: VIIIa -> VIIIa1_L + VIIIa2
R14: IXa_VIIIa_X -> VIIIa1_L + VIIIa2 + X + IXa
R15: IXa_VIIIa -> VIIIa1_L + VIIIa2 + IXa
R16: IIa + V -> IIa + Va
R17: Xa + Va -> Xa_Va
R18: Xa_Va + II -> Xa_Va_II
R19: mIIa + Xa_Va -> IIa + Xa_Va
R20: Xa + TFPI -> Xa_TFPI
R21: TF_VIIa_Xa + TFPI -> TF_VIIa_Xa_TFPI
R22: TF_VIIa + Xa_TFPI -> TF_VIIa_Xa_TFPI
R23: Xa + ATIII -> Xa_ATIII
R24: mIIa + ATIII -> mIIa_ATIII
R25: IXa + ATIII -> IXa_ATIII
R26: IIa + ATIII -> IIa_ATIII
R27: TF_VIIa + ATIII -> TF_VIIa_ATIII
R6b: TF_VIIa_X -> TF_VIIa_Xa
R8b: TF_VIIa_IX -> TF_VIIa + IXa
R12b: IXa_VIIIa_X -> IXa_VIIIa + Xa
R18b: Xa_Va_II -> Xa_Va + 

r1:  -> S
r2: S -> 
r3: I_1 -> 
r4: I_2 -> 
r5: R_1 -> 
r6: R_2 -> 
r7: I_1p -> 
r8: I_2p -> 
r9: R_p -> 
r10: S -> I_1
r11: S -> I_2
r12: R_2 -> I_1p
r13: R_1 -> I_2p
r14: I_1 -> R_1
r15: I_2 -> R_2
r16: I_1p -> R_p
r17: I_2p -> R_p
r18: R_1 -> S
r19: R_2 -> S
r20: R_p -> S


***BIOMD0000000639_url.xml

FolK: ATP + H2_HMPt -> AMP + H2_HMPterinPP
FolP: p_ABA + H2_HMPterinPP -> PPi + H2_Pteroate
FolC: L_Glutamate + ATP + H2_Pteroate -> DHF + ADP + Phosphate
GlyA: THF + L_serine -> CH2_THF + Glycine
FolA: DHF + NADPH -> NADP + THF
PanB: CH2_THF -> H2_HMPt + p_ABA
p_ABA_production:  -> p_ABA
H2_HMPt_production:  -> H2_HMPt
THF_drain: THF -> 
CH2_THF_drain: CH2_THF -> 


***BIOMD0000000144_url.xml

R_1:  -> MPFc
R_2: preMPFc -> MPFc
R_3: MPFc -> preMPFc
R_6: preMPFc -> 
R_7: MPFc -> 
R_8: Wee1c -> Wee1Pc
R_9: Wee1Pc -> Wee1c
R_10: Stgm -> 
R_12:  -> Stgc
R_13: Stgc -> StgPc
R_14: StgPc -> Stgc
R_15: Stgc -> 
R_16: StgPc -> 
R_19: MPFc -> 
importofMPFintocytoplasm_1:  -> MPFc
_16:  -> MPFn


ValueError: Reaction category not found.

In [None]:
# Analysis of BIOMD0000000611_url.xml
filename = "BIOMD0000000611_url.xml"

path = os.path.join(cn.DATA_DIR, filename)
sbmllint.lint(path)

**To do:**
1. Table of counts of total reactions, number imbalance (less boundary reactions). Do as bar plot.

In [None]:
def isStructuredName(name):
    moietys = name.split(cn.MOIETY_SEPARATOR)
    if len(moietys) == 1:
        return False
    # See if it has a numeric suffix
    try:
        _ = float(moietys[1])
        return False
    except:
        pass
    if "species" == moietys[0]:
        return False
    return True

print ("1: %d" % isStructuredName("a_b"))
print ("0: %d" % isStructuredName("species_b"))
print ("0: %d" % isStructuredName("speces_3"))

In [None]:
# Construct dataframe of statistics on names and imbalances

FILENAME = "filename"
HAS_DASH = "has_dash"
NUM_REACTIONS = "num_reactions"
NUM_BOUNDARY_REACTIONS = "num_boundary_reactions"
NUM_BAD = "num_imbalance_reactions"
NUM_BALANCED_REACTIONS = "num_balanced_reactions"
FRC_BALANCED = "frc_balanced"
def calcStats(initial=1, final=20):
    dfs = []
    sbmliter = simple_sbml.modelIterator(initial=initial, final=final)
    for item in sbmliter:
        simple = simple_sbml.SimpleSBML(item.model)
        row = {FILENAME: [item.filename], 
               HAS_DASH: [False], 
               NUM_BOUNDARY_REACTIONS: [0],
               NUM_REACTIONS: [0],
               NUM_BAD: [None],
               NUM_BAD: [0],
               }
        Reaction.initialize(simple)
        for reaction in Reaction.reactions:
            if (len(reaction.reactants) == 0) or (len(reaction.products) == 0):
                row[NUM_BOUNDARY_REACTIONS] = [row[NUM_BOUNDARY_REACTIONS][0] + 1]
            molecules = set(reaction.reactants).union(reaction.products)
            if any([isStructuredName(m.name) for m in molecules]):
                row[HAS_DASH] = [True]
        num_reactions, num_bad = sbmllint.lint(item.model, is_report=False)
        row[NUM_REACTIONS] = [num_reactions]
        row[NUM_BAD] = [num_bad]
        dfs.append(pd.DataFrame(row))
    df_count = pd.concat(dfs)
    df_count[NUM_BALANCED_REACTIONS] = df_count[NUM_REACTIONS] - df_count[NUM_BAD]
    df_count[FRC_BALANCED] = 1.0*df_count[NUM_BALANCED_REACTIONS] / (
        df_count[NUM_REACTIONS] - df_count[NUM_BOUNDARY_REACTIONS])
    df = df_count[df_count["frc_balanced"] > 0.0]
    df.sort_values("frc_balanced")
    return df

In [None]:
calcStats(initial=1, final=200)

In [None]:
calcStats(initial=201, final=300)