# Structured Names
This notebook describes how to use structured names to check SBML models for moiety balance.

In [1]:
import init
from SBMLLint.common import constants as cn
from SBMLLint.common.molecule import Molecule
from SBMLLint.common import simple_sbml
from SBMLLint.common.reaction import Reaction
from SBMLLint.tools import sbmllint
from SBMLLint.tools import print_reactions
from SBMLLint.common.simple_sbml import SimpleSBML

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tellurium as te

In [None]:
from SBMLLint.common.simple_sbml import modelIterator

In [None]:
iterator = modelIterator(initial=19, final=25)
for item in iterator:
    print(item.filename)

In [None]:
import init
from SBMLLint.tools import sbmllint

# The first step in glycolysis, expressed
# in the Antimony language.
model = """
Glu + A__P_3 -> Glu_P + A_P_P; 1
Glu = 0
A_P_P_P = 0
Glu_P = 0
A_P_P = 0
"""
_ = sbmllint.lint(model)

In [None]:
# The first step in glycolysis, with an error (wrongly goes to A instead of A_P_P)
model = """
Glu + A_P_P_P -> Glu_P + A_P; 1
"""
_ = sbmllint.lint(model)

## Sturctured Names in BioModels

In [None]:
path = os.path.join(cn.ANALYSIS_STRUCTURED_NAMES_DIR, "analyze_structured_names.csv")
df_data = pd.read_csv(path)
df_data.head()

In [None]:
# Basic Statistics
num_structured = len(df_data[df_data[cn.IS_STRUCTURED]])
num_not_structured = len(df_data) - num_structured
_ = plt.pie([num_structured, num_not_structured], 
            labels=["Structured (%d)" % num_structured, "Not Structured (%d)" % num_not_structured])

In [None]:
df_sort = df_data[df_data[cn.IS_STRUCTURED]]
df_sort = df_sort.sort_values(cn.FRAC_BALANCED_REACTIONS)
df_sort = df_sort.reset_index()
yv = [1.0*i/len(df_sort) for i in df_sort.index]
_ = plt.plot(df_sort[cn.FRAC_BALANCED_REACTIONS], yv)
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel("Fraction Balanced Reactions")
plt.ylabel("Fraction of Models")

In [None]:
def plotIsStructured(df, val):
    df1 = df[df[cn.IS_STRUCTURED] == val]
    plt.hist(df1[cn.TOTAL_REACTIONS], bins=100, range=(0, 500))
    if val:
        title = "Structured Names"
    else:
        title = "Not Structured Names"
    plt.title(title)
    plt.xlabel("Number Reactions")
    plt.show()
plotIsStructured(df_data, True)
plotIsStructured(df_data, False)

In [None]:
def plotFrcBoundary(df, val):
    df1 = df[df[cn.IS_STRUCTURED] == val]
    plt.scatter(df1[cn.TOTAL_REACTIONS], df1[cn.FRAC_BOUNDARY_REACTIONS])
    if val:
        title = "Structured Names"
    else:
        title = "Not Structured Names"
    plt.title(title)
    plt.xlabel("Number Reactions")
    plt.ylabel("Fraction Boundary")
    plt.show()
plotFrcBoundary(df_data, True)
plotFrcBoundary(df_data, False)

In [None]:
def plotFracBalanced(df):
    df1 = df[df[cn.IS_STRUCTURED]]
    xv = (1.0*df1[cn.TOTAL_REACTIONS] - df1[cn.NUM_BOUNDARY_REACTIONS]) / df1[cn.TOTAL_REACTIONS]
    plt.scatter(xv, df1[cn.FRAC_BALANCED_REACTIONS])
    title = "Structured Names"
    plt.title(title)
    plt.xlabel("Fraction Non-Boundary Reactions")
    plt.ylabel("Fraction Balanced")
    plt.xlim([0, 1.1])
    plt.show()
plotFracBalanced(df_data)

In [None]:
# Histogram of fraction balanced for structured names
def plotFracBalancedHist(df):
    df1 = df[df[cn.IS_STRUCTURED]]
    plt.hist(df1[cn.FRAC_BALANCED_REACTIONS], bins=50)
    title = "Models With Structured Names"
    plt.title(title)
    plt.xlabel("Fraction Balanced Reactions")
    plt.show()
plotFracBalancedHist(df_data)

## Detailed Look at Putative Structured Names

**Observations**
1. Some cases where doing ad hoc structured names that with little change can balance. For example:   
   1. BIOMD0000000192_url.xml: RCC1_RanGDP -> RCC1_Ran + GDP, RCC1_Ran + GTP -> RCC1_RanGTP
   1. BIOMD0000000010_url.xml: MKKK -> MKKK_P, MKKK_P -> MKKK

In [None]:
df = df_data[df_data[cn.IS_STRUCTURED]]
for _, row in df.iterrows():
    path = os.path.join(cn.DATA_DIR, row[cn.FILENAME])
    print("\n%s" % row[cn.FILENAME])
    print_reactions.prettyPrint(path, is_include_kinetics=False, is_include_label=False)

In [2]:
simple = SimpleSBML()
path = os.path.join(cn.DATA_DIR, "BIOMD0000000353_url.xml")
simple.initialize(path)

NameError: name 'model_reference' is not defined

**To Do**
1. Identify the missing files. Likely a problem with the iterator?