# Structured Names
This notebook describes how to use structured names to check SBML models for moiety balance.

In [1]:
import init
from SBMLLint.common import constants as cn
from SBMLLint.common.molecule import Molecule
from SBMLLint.common import simple_sbml
from SBMLLint.common.reaction import Reaction
from SBMLLint.tools import sbmllint
from SBMLLint.tools import print_reactions

import os
import pandas as pd
import tellurium as te

In [2]:
# The first step in glycolysis
model = """
2Glu + 2A_P_P_P -> 2Glu_P + 2A_P_P; 1
Glu = 0
A_P_P_P = 0
Glu_P = 0
A_P_P = 0
"""
num_react, num_bad = sbmllint.lint(model)
#simple = simple_sbml.SimpleSBML()
#simple.initialize(model)

TypeError: stat: path should be string, bytes, os.PathLike or integer, not Model

In [None]:
simple.reactions

In [None]:
# The first step in glycolysis, with an error (wrongly goes to A instead of A_P_P)
model = """
Glu + A_P_P_P -> Glu_P + A_P; 1
"""
_ = sbmllint.lint(model)

## Sturctured Names in BioModels

Find models that already use "\_" as a name separator

In [None]:
sbmliter = simple_sbml.modelIterator(final=200)
for item in sbmliter:
    simple = simple_sbml.SimpleSBML(item.model)
    Reaction.initialize(simple)
    for reaction in Reaction.reactions:
        molecules = set(reaction.reactants).union(reaction.products)
        if any([cn.MOIETY_SEPARATOR in m.name for m in molecules]):
            path = os.path.join(cn.DATA_DIR, item.filename)
            print("\n\n***%s\n" % item.filename)
            #print(molecules)
            print_reactions.prettyPrint(path, is_include_kinetics=False)
            break
            

In [None]:
# Analysis of BIOMD0000000611_url.xml
filename = "BIOMD0000000611_url.xml"
filename = "BIOMD0000000198_url.xml"
filename = "BIOMD0000000140_url.xml"
filename = "BIOMD0000000611_url.xml"
filename = "BIOMD0000000139_url.xml"
filename = "BIOMD0000000362_url.xml" # 16/33 don't comply
filename = "BIOMD0000000571_url.xml"
filename = "BIOMD0000000140_url.xml"

path = os.path.join(cn.DATA_DIR, filename)
sbmllint.lint(path)

**To do:**
1. Table of counts of total reactions, number imbalance (less boundary reactions). Do as bar plot.

In [None]:
def isStructuredName(name):
    moietys = name.split(cn.MOIETY_SEPARATOR)
    if len(moietys) == 1:
        return False
    # See if it has a numeric suffix
    try:
        _ = float(moietys[1])
        return False
    except:
        pass
    if "species" == moietys[0]:
        return False
    return True

print ("1: %d" % isStructuredName("a_b"))
print ("0: %d" % isStructuredName("species_b"))
print ("0: %d" % isStructuredName("speces_3"))

In [None]:
# Construct dataframe of statistics on names and imbalances

FILENAME = "filename"
HAS_DASH = "has_dash"
NUM_REACTIONS = "num_reactions"
NUM_BOUNDARY_REACTIONS = "num_boundary_reactions"
NUM_BAD = "num_imbalance_reactions"
NUM_BALANCED_REACTIONS = "num_balanced_reactions"
FRC_BALANCED = "frc_balanced"
def calcStats(initial=1, final=20):
    dfs = []
    sbmliter = simple_sbml.modelIterator(initial=initial, final=final)
    for item in sbmliter:
        simple = simple_sbml.SimpleSBML(item.model)
        row = {FILENAME: [item.filename], 
               HAS_DASH: [False], 
               NUM_BOUNDARY_REACTIONS: [0],
               NUM_REACTIONS: [0],
               NUM_BAD: [None],
               NUM_BAD: [0],
               }
        Reaction.initialize(simple)
        for reaction in Reaction.reactions:
            if (len(reaction.reactants) == 0) or (len(reaction.products) == 0):
                row[NUM_BOUNDARY_REACTIONS] = [row[NUM_BOUNDARY_REACTIONS][0] + 1]
            molecules = set(reaction.reactants).union(reaction.products)
            if any([isStructuredName(m.name) for m in molecules]):
                row[HAS_DASH] = [True]
        num_reactions, num_bad = sbmllint.lint(item.model, is_report=False)
        row[NUM_REACTIONS] = [num_reactions]
        row[NUM_BAD] = [num_bad]
        dfs.append(pd.DataFrame(row))
    df_count = pd.concat(dfs)
    df_count[NUM_BALANCED_REACTIONS] = df_count[NUM_REACTIONS] - df_count[NUM_BAD]
    df_count[FRC_BALANCED] = 1.0*df_count[NUM_BALANCED_REACTIONS] / (
        df_count[NUM_REACTIONS] - df_count[NUM_BOUNDARY_REACTIONS])
    df = df_count[df_count["frc_balanced"] > 0.0]
    df.sort_values("frc_balanced")
    return df

In [None]:
calcStats(initial=1, final=200)

In [None]:
calcStats(initial=201, final=300)