# Reading ModelSEED Biochemistry from ModelSEED Database

This notebook provides examples to read data from the biochemistry database

The biochemistry database is represented by the `modelseedpy.biochem.modelseed_biochem.ModelSEEDDatabase` class

The database can be instantiated from either a local github copy of the database repository or loaded via web url from github.
* `from_local` - from_local(*path to repository*)
    * Example:
        1. first we obtain a copy of the repository: `git clone https://github.com/ModelSEED/ModelSEEDDatabase.git`
        2. then we can load the repository with `from_local`: `from_local('/home/user/ModelSEEDDatabase')`
* `from_github` - from_github(*commit version*)
    * Example:
        * load the dev branch from github (default: https://github.com/ModelSEED/ModelSEEDDatabase): `from_github('dev')`
        * load a specific commit version from github: `from_github('194ac8afe48f8a606c0dd07ba3c7af10c02ba2fd')`
        * load from another fork/repository: `from_github('master', 'https://raw.githubusercontent.com/Fxe/ModelSEEDDatabase')`

In [1]:
import logging
import pandas as pd
from modelseedpy.biochem.modelseed_biochem import get_structures_from_df, get_aliases_from_df, get_names_from_df, process_aliases, load_metabolites_from_df
from modelseedpy.biochem.modelseed_biochem import ALIAS_RXN_IDENTIFIERS_ORG, ALIAS_MODELS
from modelseedpy.biochem.modelseed_biochem import from_local2, from_local, from_github
from modelseedpy.biochem.modelseed_reaction import ModelSEEDReaction, ModelSEEDReaction2
logger = logging.getLogger(__name__)

In [2]:
%%time
modelseed_local2 = from_local2('/home/fliu/workspace/python/ModelSEEDDatabase/')

CPU times: user 16.8 s, sys: 668 ms, total: 17.5 s
Wall time: 17.5 s


In [3]:
%%time
modelseed_local1 = from_local('/home/fliu/workspace/python/ModelSEEDDatabase/')

CPU times: user 42.2 s, sys: 503 ms, total: 42.7 s
Wall time: 42.7 s


In [4]:
%%time
modelseed_git = from_github('dev')

CPU times: user 18.8 s, sys: 1.12 s, total: 19.9 s
Wall time: 24.3 s


In [5]:
inchi_key_lookup = {}
for cpd in modelseed_git.compounds:
    inchi_key = cpd.inchi_key
    inchi_key = None if pd.isna(inchi_key) or len(inchi_key) == 0 else inchi_key
    cpd.inchi_key = inchi_key
    if cpd.inchi_key:
        a, b, p = cpd.inchi_key.split('-')
        if a not in inchi_key_lookup:
            inchi_key_lookup[a] = {}
        if b not in inchi_key_lookup[a]:
            inchi_key_lookup[a][b] = set()
        inchi_key_lookup[a][b].add((cpd.id, p))

In [6]:
metabolite_reactions = {}
for rxn in modelseed_git.reactions:
    if not rxn.is_obsolete:
        for m in rxn.metabolites:
            if m.seed_id not in metabolite_reactions:
                metabolite_reactions[m.seed_id] = set()
            metabolite_reactions[m.seed_id].add(rxn.id)

In [7]:
import cobra
model = cobra.io.load_json_model('/home/fliu/workspace/data/sbml/iAnC892/iAnC892_single.json')

In [8]:
model.reactions.rxn08335_p_p

0,1
Reaction identifier,rxn08335_p_p
Name,rxn08335_p
Memory address,0x07fcbbaba06d0
Stoichiometry,cpd00282_c + cpd07274_p --> cpd00247_c + cpd16503_p  S-Dihydroorotate + Plastoquinone A --> Orotate + Plastoquinol-9
GPR,A3776_RS12690
Lower bound,0.0
Upper bound,1000.0


In [9]:
model

0,1
Name,M_iAnC892
Memory address,0x07fcc386bc3a0
Number of metabolites,934
Number of reactions,909
Number of groups,0
Objective expression,0
Compartments,"Cytoplasm, Lumen, periplasm, Extracellular space, Carboxysome, Pseudo compartment"


In [10]:
#rxn37623: cpd00005_0 + 3.0 cpd00067_0 + cpd07274_0 <=> cpd00006_0 + cpd29046_0
#rxn37623 NADPH + 3.0 H+ + Plastoquinone A <=> NADP + plastoquinol A
#rxn36824 NADPH + H+ + Plastoquinone A <=> NADP + Plastoquinol-9
#rxn36824: cpd00005_0 + cpd00067_0 + cpd07274_0 <=> cpd00006_0 + cpd16503_0
super_c = {
    'NDH1_1u_k': ('NDH1_1u_k', {'0': 'c', '1': 'k'}),
    'NDH1_1p_p': ('NDH1_1u_k', {'0': 'c', '1': 'p'}),
    'NDH1_4pp_p': ('NDH1_3u_k', {'0': 'c', '1': 'k'}),
    'NDH1_3u_k': ('NDH1_3u_k', {'0': 'c', '1': 'p'}),
    'NDH1_2p_p': ('NDH1_2u_k', {'0': 'c', '1': 'p'}),
    'NDH1_2u_k': ('NDH1_2u_k', {'0': 'c', '1': 'k'}),
    
    'rxn08335_p_p': ('DHORD_PQ9_k', {'0': 'c', '1': 'p'}),
    'DHORD_PQ9_k': ('DHORD_PQ9_k', {'0': 'c', '1': 'k'}),
}
#           cpd00005_c + 4.0 cpd00067_c + cpd07274_k --> cpd00006_c + 3.0 cpd00067_k + cpd16503_k
#NDH1_1p_p: cpd00005_c + 4.0 cpd00067_c + cpd07274_p --> cpd00006_c + 3.0 cpd00067_p + cpd16503_p
#NDH1_1u_k: cpd00005_c + 4.0 cpd00067_c + cpd07274_k --> cpd00006_c + 3.0 cpd00067_k + cpd16503_k
super_s = {
    'NDH1_1u_k': {
        ('cpd00005', '0'): -1, 
        ('cpd00067', '0'): -4, 
        ('cpd07274', '1'): -1,
        ('cpd00006', '0'): 1, 
        ('cpd00067', '1'): 3, 
        ('cpd16503', '1'): 1
    },

#NDH1_4pp_p: cpd00001_c + cpd00005_c + cpd00011_p + 3.0 cpd00067_c + cpd07274_p --> cpd00006_c + 3.0 cpd00067_p + cpd00242_c + cpd16503_p
#NDH1_3u_k : cpd00001_c + cpd00005_c + cpd00011_p + 3.0 cpd00067_c + cpd07274_k --> cpd00006_c + 3.0 cpd00067_k + cpd00242_c + cpd16503_k
    'NDH1_3u_k': {
        ('cpd00001', '0'): -1, 
        ('cpd00005', '0'): -1, 
        ('cpd00011', '0'): -1,
        ('cpd00067', '0'): -3,
        ('cpd07274', '1'): -1,
        ('cpd00006', '0'): 1, 
        ('cpd00067', '1'): 3, 
        ('cpd00242', '0'): 1,
        ('cpd16503', '1'): 1
    },
#NDH1_2p_p: cpd00004_c + 4.0 cpd00067_c + cpd07274_p --> cpd00003_c + 3.0 cpd00067_p + cpd16503_p
#NDH1_2u_k: cpd00004_c + 4.0 cpd00067_c + cpd07274_k --> cpd00003_c + 3.0 cpd00067_k + cpd16503_k
    'NDH1_2u_k': {
        ('cpd00004', '0'): -1, 
        ('cpd00067', '0'): -4, 
        ('cpd07274', '1'): -1,
        ('cpd00003', '0'): 1,
        ('cpd00067', '1'): 3, 
        ('cpd16503', '1'): 1
    },
#NDH2_syn_k: cpd00004_c + cpd00067_c + cpd07274_k --> cpd00003_c + cpd16503_k
    'NDH2_syn_k': {
        ('cpd00004', 'c'): -1, 
        ('cpd00067', 'c'): -1, 
        ('cpd07274', 'k'): -1,
        ('cpd00003', 'c'): 1,
        ('cpd16503', 'k'): 1
    },
#rxn08335_p_p: cpd00282_c + cpd07274_p --> cpd00247_c + cpd16503_p
#DHORD_PQ9_k : cpd00282_c + cpd07274_k --> cpd00247_c + cpd16503_k
    'DHORD_PQ9_k': {
        ('cpd00282', '0'): -1, 
        ('cpd07274', '1'): -1, 
        ('cpd00247', '0'): 1,
        ('cpd16503', '1'): 1
    }
}

In [11]:
for rxn_id in super_c:
    if rxn_id in model.reactions:
        stoich_key, compartment_setup = super_c[rxn_id]
        rxn_model = model.reactions.get_by_id(rxn_id)
        #print(rxn_id, rxn_model)
        #print(compartment_setup)
        rxn_s_model = dict(map(lambda x: (x[0].id, x[1]), rxn_model.metabolites.items()))
        rxn_s_match = {}
        for (cpd_id, cmp_token), v in super_s[stoich_key].items():
            _cpd_id = f'{cpd_id}_{compartment_setup[cmp_token]}'
            rxn_s_match[_cpd_id] = v

        valid = rxn_s_model == rxn_s_match
        print(rxn_id, valid)
        if not valid:
            print('rxn_s_match', rxn_s_match)
            print('rxn_s_model', rxn_s_model)

NDH1_1u_k True
NDH1_1p_p True
NDH1_4pp_p False
rxn_s_match {'cpd00001_c': -1, 'cpd00005_c': -1, 'cpd00011_c': -1, 'cpd00067_c': -3, 'cpd07274_k': -1, 'cpd00006_c': 1, 'cpd00067_k': 3, 'cpd00242_c': 1, 'cpd16503_k': 1}
rxn_s_model {'cpd16503_p': 1.0, 'cpd00067_c': -3.0, 'cpd00067_p': 3.0, 'cpd00242_c': 1.0, 'cpd00011_p': -1.0, 'cpd00006_c': 1.0, 'cpd00005_c': -1.0, 'cpd07274_p': -1.0, 'cpd00001_c': -1.0}
NDH1_3u_k False
rxn_s_match {'cpd00001_c': -1, 'cpd00005_c': -1, 'cpd00011_c': -1, 'cpd00067_c': -3, 'cpd07274_p': -1, 'cpd00006_c': 1, 'cpd00067_p': 3, 'cpd00242_c': 1, 'cpd16503_p': 1}
rxn_s_model {'cpd00067_c': -3.0, 'cpd00067_k': 3.0, 'cpd00242_c': 1.0, 'cpd00011_p': -1.0, 'cpd00006_c': 1.0, 'cpd07274_k': -1.0, 'cpd00005_c': -1.0, 'cpd16503_k': 1.0, 'cpd00001_c': -1.0}
NDH1_2p_p True
NDH1_2u_k True
rxn08335_p_p True
DHORD_PQ9_k True


In [12]:
def finddd(a, b):
    c = set(a) | set(b)
    super_match = None
    for i in c:
        if i in metabolite_reactions:
            if super_match is None:
                super_match = set(metabolite_reactions[i])
            super_match &= set(metabolite_reactions[i])
    return super_match

#finddd(['cpd00003'], ['cpd00004'])
for r in finddd(['cpd11421', 'cpd11621'], ['cpd11420', 'cpd11620']):
    rxn = modelseed_git.reactions.get_by_id(r)
    print(rxn)
    print(rxn.id, rxn.build_reaction_string(True))
    print()

rxn16344: 2.0 cpd00067_0 + cpd11420_0 + 2.0 cpd11620_0 <=> cpd11421_0 + 2.0 cpd11621_0
rxn16344 2.0 H+ + trdox + 2.0 Reducedferredoxin <=> trdrd + 2.0 Oxidizedferredoxin

