
# Build a basic ME model

We will try to build an ME model from the NC_000913.2 Genbank file, the iJO1366 M model, and the complex reconstruction from iJL1650-ME

In [1]:
# python imports
import re
import json
from os.path import join

# third party imports
import pandas
import escher
import cobra.test

# ecoli me
from ecolime.flat_files import *
from ecolime.ecoli_k12 import *

from minime import *
from minime.util.building import add_ecoli_M_model_content, add_ribosomes, add_RNA_polymerase
from minime.util.building import add_TUs_and_translation, add_dummy_reactions
from minime.util.building import add_complex_stoichiometry_data, add_transcription_reaction
from minime.util.building import add_complex_modification_data
from minime.util.building import add_metabolic_reactions
from minime.solve.algorithms import binary_search, fva, solve_at_growth_rate
from minime.solve.symbolic import compile_expressions

In [2]:
met_info = pandas.read_csv(join(ecoli_files_dir, "metabolites.txt"),
                           delimiter="\t", header=None, index_col=0,
                           names=["id", "name", "formula", "compartment",
                                  "data_source"])
rxn_info = get_reaction_info_frame()
rxn_dict = get_reaction_matrix_dict()
sources_sinks_info = pandas.read_csv(join(ecoli_files_dir, "reaction_matrix_sources_and_sinks.txt"), delimiter="\t",
                                     header=None, names=["rxn_id", "met_id", "compartment", "stoic"], index_col=1)

source_amounts = pandas.read_csv(join(ecoli_files_dir, "exchange_bounds.txt"),
                                 delimiter="\t", index_col=0, names=["met_id", "amount"])

## Begin by loading metabolites and build Metabolic reactions

In [3]:
me = MEmodel('iJO1366-ME')
add_ecoli_M_model_content(me, 'iJO1366', met_info, rxn_info, rxn_dict, 
                          sources_sinks_info, source_amounts,generic_ions=True)

# if the bounds of this metabolite aren't open, model uses wrong reactions
me.reactions.EX_pqq_e.lower_bound = -1000

Assigned tl to e
Assigned hemed to e
Assigned pqq to e
Assigned dpm to e
Assigned 23bpg to e
Assigned tqn to e


## Build ribosome and RNA Polymerase

In [4]:
gb_filename = join(ecoli_files_dir,'NC_000913.2.gb')                                                                                    
TU_df = pandas.read_csv(join(ecoli_files_dir,'TUs_from_ecocyc.txt'), delimiter="\t")            

TU_pieces = add_TUs_and_translation(me, gb_filename, TU_frame=TU_df, generic_flag=True)

add_ribosomes(me)

add_RNA_polymerase(me)



TODO deal with selenocystine
unknown metabolite 'RNase_T_dim_mod_4:generic_divalent' created
unknown metabolite 'generic_RNase' created
unknown metabolite 'RNase_BN_dim_mod_2:generic_divalent' created
unknown metabolite 'Rnd_mono_mod_5:generic_divalent' created
unknown metabolite 'Rnb_mono_mod_1:generic_divalent' created
unknown metabolite 'Rph_mono_mod_generic_divalent' created
Created <Metabolite InfA_mono at 0x7f212dfe7910> in <ComplexFormation formation_ribosome at 0x7f212e519910>
Created <Metabolite RimM_mono at 0x7f212dfe7a10> in <ComplexFormation formation_ribosome at 0x7f212e519910>
Created <Metabolite Tig_mono at 0x7f212dfe7ad0> in <ComplexFormation formation_ribosome at 0x7f212e519910>
Created <Metabolite InfC_mono at 0x7f212dfe7a90> in <ComplexFormation formation_ribosome at 0x7f212e519910>
Created <Metabolite InfB_mono at 0x7f212dfe7b10> in <ComplexFormation formation_ribosome at 0x7f212e519910>
Created <Metabolite RbfA_mono at 0x7f212dfe7b90> in <ComplexFormation formation

In [5]:
# There genes have no TU information so need transcribed seperately
no_TU_info_list = ['b3247', 'b3166', 'b3167', 'b3705', 'b3980']
for TU in no_TU_info_list:
    RNA = me.metabolites.get_by_id('RNA_' + TU)
    add_transcription_reaction(me, 'mRNA_' + TU, {TU}, RNA.seq)

Make a dummy reactions. Add a dummy protein in as well.

In [6]:
dna_sequence = "ATG" + "TTT" * 12 + "TAT"*12+ "ACG"*12 + "GAT" *12 + "AGT"*12+ "TAA"
add_dummy_reactions(me, dna_sequence)

## Associate the tRNA synthetases

The tRNA charging reactions were automatically added when loading the genome from the genbank file. However, the charging reactions still need to be made aware of the tRNA synthetases which are responsible.

In [7]:
with open(join(ecoli_files_dir, "amino_acid_tRNA_synthetase.json"), "rb") as infile:
    aa_synthetase_dict = json.load(infile)
for data in me.tRNA_data:
    data.synthetase = str(aa_synthetase_dict[data.amino_acid])

### Add in complex Formation with modifications

In [8]:
# ME_complex_dict is a dict of {'complex_id': [{'bnum' : count}]}
rna_components = {"b3123"} # component id should have RNA_ instead of protein_
ME_complex_dict = get_complex_to_bnum_dict(rna_components)
# some entries in the file need to be renamed.
# Colton 7/8/15 made changes directly to flat file
#renames = {"MnmE_": "b3706", "MnmG_": "b3741", "YheM_": "b3344", "YheL_": "b3343", "YheN_": "b3345"}
add_complex_stoichiometry_data(me, ME_complex_dict)


In [9]:
met_list = []
for met_stoich in rxn_dict.values():
    for met in met_stoich:
        met_list.append(met.replace('_c',''))
modification_dict = get_protein_modification_dict('protein_modification.txt', met_list, 
                                                  generic=True)
add_complex_modification_data(me, modification_dict)

In [10]:
# two different reactiond can add a lipoate modification.
# We create a separate ModificationData for each one
lipo = me.modification_data.get_by_id("mod_lipo_c")
alt_lipo = ModificationData("mod_lipo_c_alt", me)
#alt_lipo.stoichiometry = lipo.stoichiometry

lipo.stoichiometry = {"lipoamp_c": -1, "amp_c": 1}
lipo.enzyme = 'EG11796-MONOMER'
lipo.keff = 65.

alt_lipo.stoichiometry = {'EG50003-MONOMER_mod_pan4p_mod_lipo':-1,
                          'EG50003-MONOMER_mod_pan4p':1,
                          'h_c':-1,}
alt_lipo.enzyme = 'EG11591-MONOMER'
alt_lipo.keff = 65.

for cplx_data in lipo.get_complex_data():
    alt_cplx_data = ComplexData(cplx_data.id + "alt", me)
    alt_cplx_data.complex_id = cplx_data.complex_id
    alt_cplx_data.translocation = cplx_data.translocation
    alt_cplx_data.chaperones = cplx_data.chaperones
    alt_cplx_data.modifications = cplx_data.modifications
    alt_cplx_data.modifications[alt_lipo.id] = \
        alt_cplx_data.modifications.pop(lipo.id)

In [11]:
# todo bmocogdp_c mods
# todo FE-S modifications
mod_catalysts = {'CPLX0-1762':'G6712-MONOMER', # FE-S modification
                 'TMAOREDUCTI-CPLX':'EG12195-MONOMER',
                 'DIMESULFREDUCT-CPLX':'G6849-MONOMER',
                 'NITRATREDUCTA-CPLX':'NARJ-MONOMER', 
                 'NITRATREDUCTZ-CPLX':'NARW-MONOMER',
                 'NAP-CPLX':'NAPD-MONOMER',
                 'NAPAB-CPLX_NAPC-MONOMER':'NAPD-MONOMER'}

In [12]:
target_list = ['4fe4s_c', 'LI_c', '2fe2s_c', '3fe4s_c',
               'NiFeCoCN2_c', 'acetyl_c',
               'RNase_m5','RNase_m16','RNase_m23'] # RNAses are gaps in model

for met_id in target_list:
    r = cobra.Reaction("EX_" + met_id)
    me.add_reaction(r)
    r.reaction = met_id + " <=> "

unknown metabolite '4fe4s_c' created
unknown metabolite 'LI_c' created
unknown metabolite '2fe2s_c' created
unknown metabolite '3fe4s_c' created
unknown metabolite 'NiFeCoCN2_c' created
unknown metabolite 'acetyl_c' created


Build all complex formation reactions

In [13]:
for cplx_data in me.complex_data:
    formation = cplx_data.formation
    if formation:
        formation.update()
    else:
        cplx_data.create_complex_formation()

Created <Metabolite cdec3e_c at 0x7f212de42a10> in <ComplexFormation formation_EG50003-MONOMER_mod_pan4p_mod_cdec3e at 0x7f212de42990>
Created <Metabolite pqq_c at 0x7f212dbaad90> in <ComplexFormation formation_GLUCDEHYDROG-MONOMER_mod_pqq at 0x7f212dbaad10>
Created <Metabolite cddec5e_c at 0x7f212dbd2c90> in <ComplexFormation formation_EG50003-MONOMER_mod_pan4p_mod_cddec5e at 0x7f212dbd2c10>
Created <Metabolite hemed_c at 0x7f212dbd2dd0> in <ComplexFormation formation_CYT-D-UBIOX-CPLX_mod_pheme_mod_hemed at 0x7f212dbd2d50>
Created <Metabolite cosh_c at 0x7f212dab8410> in <ComplexFormation formation_EG11597-MONOMER_mod_cosh at 0x7f212dab8390>
Created <Metabolite EG50003-MONOMER_mod_pan4p at 0x7f212dab8cd0> in <ComplexFormation formation_GCVMULTI-CPLX_mod_lipo at 0x7f212dab8c50>
Created <Metabolite EG50003-MONOMER_mod_pan4p_mod_lipo at 0x7f212dab8d10> in <ComplexFormation formation_GCVMULTI-CPLX_mod_lipo at 0x7f212dab8c50>
Created <Metabolite 23bpg_c at 0x7f212d21db50> in <ComplexFormat

Crutch reactions for mets that are blocked. TODO remove

## Associate Complexes with Reactions

In [14]:
# associate reaction id with the old ME complex id (including modifications)
rxnToModCplxDict = get_reaction_to_modified_complex(generic=True)

Fixed _DASH:  D__LACtex
Fixed _DASH:  L__LACD2
Fixed _DASH:  L__LACD3
Fixed _DASH:  D__LACt2pp
Fixed _DASH:  L__LACtex
Fixed _DASH:  L__LACt2rpp


In [15]:
for reaction_data in me.stoichiometric_data:
    add_metabolic_reactions(me, reaction_data, rxnToModCplxDict, rxn_info, update=True,
                            create_new=True)

Created <Metabolite CPLX0-7817_mod_Oxidized at 0x7f212e096990> in <MetabolicReaction RNDR1b3_FWD_RIBONUCLEOSIDE-DIP-REDUCTII-CPLX at 0x7f212e096950>
Created <Metabolite GLUTAREDOXIN-MONOMER_mod_Oxidized at 0x7f212e096a10> in <MetabolicReaction RNDR1b1_FWD_RIBONUCLEOSIDE-DIP-REDUCTII-CPLX at 0x7f212e0969d0>
Created <Metabolite GRXC-MONOMER_mod_Oxidized at 0x7f212e096a90> in <MetabolicReaction RNDR1b4_FWD_RIBONUCLEOSIDE-DIP-REDUCTII-CPLX at 0x7f212e096a50>
Created <Metabolite GRXB-MONOMER_mod_Oxidized at 0x7f212e096c10> in <MetabolicReaction RNDR1b2_FWD_RIBONUCLEOSIDE-DIP-REDUCTII-CPLX at 0x7f212e096bd0>
Created <Metabolite EG50003-MONOMER_mod_pan4p_mod_3ha at 0x7f212dffa250> in <MetabolicReaction 3OAR401_REV_3-OXOACYL-ACP-REDUCT-MONOMER at 0x7f212dffa210>
Created <Metabolite EG50003-MONOMER_mod_pan4p_mod_act at 0x7f212dffa290> in <MetabolicReaction 3OAR401_REV_3-OXOACYL-ACP-REDUCT-MONOMER at 0x7f212dffa210>
Created <Metabolite EG50003-MONOMER_mod_pan4p_mod_ddca at 0x7f212dffa410> in <Me

In [16]:
# This reaction is weird
me.reactions.get_by_id('CITLY-CPLX_2tpr3dpcoa_FWD_G6340-MONOMER').reaction

'2tpr3dpcoa_c + 4.27350427350427e-6*mu G6340-MONOMER --> '

Rebuild transcription and translation to use tRNA (now that tRNA synthetase complexes are in the model

In [17]:
for r in me.reactions:
    if isinstance(r, tRNAChargingReaction):
        r.update()
for r in me.reactions:
    if isinstance(r, TranslationReaction):
        r.update()
    if isinstance(r, TranscriptionReaction):
        r.update()
    if isinstance(r, MetabolicReaction):
        r.update()

In [18]:
me.reactions.transcription_TU0_1191_from_RPOH_MONOMER_slice_69.reaction

'7 h2o_c + 0.0229401207374776*mu + 0.00896958720835373 RNA_Polymerase + 1703 ctp_c + 1488 utp_c + 4.27350427350427e-6*mu rRNA_containing_excision_set + 1252 gtp_c + 1181 atp_c --> RNA_b3275 + excised_TU_M_3424806_3424978_False + excised_TU_M_3425055_3427069_True + RNA_b3274 + RNA_b3276 + excised_TU_M_3421810_3421900_False + RNA_b3272 + 7 h_c + 1790.80534926 biomass + 5623 ppi_c + excised_TU_M_3421565_3421688_False'

## Add in translocation

In [19]:
translocPath = pandas.read_csv(join(ecoli_files_dir, "translocation_pathways.txt"),sep='\t')

for index, row in translocPath.iterrows():
    translocRxn = ProteinTranslocationData(row.Reaction_name, me)
    translocRxn.keff = float(row.Keff)
    translocRxn.costs_complexes = row.Complexes.split(' AND ')

In [20]:
proteins_sa_coeff_inner={}

transloc = pandas.read_csv(join(ecoli_files_dir, "peptide_compartment_and_pathways2.txt"), sep='\t', comment="#")
for index, row in transloc.iterrows():
    me.metabolites.get_by_id(row.Complex).compartment = row.Complex_compartment
    me.metabolites.get_by_id('protein_'+row.Protein.split('(')[0]).compartment = row.Protein_compartment
    #if index > 316:
    #    continue
    if row.translocase_pathway=='s':
        me.translocation_pathways.srp_translocation.add_translocation_cost(me,row.Complex,row.Protein)
        
        ## This is in preparation for membrane constraint
        mass = me.translation_data.get_by_id(row.Protein.split('(')[0]).mass
        if row.Complex in proteins_sa_coeff_inner.keys():
            proteins_sa_coeff_inner[row.Complex]+=mass*1.21/42.*2
        else:
            proteins_sa_coeff_inner[row.Complex]=mass*1.21/42.*2
    elif row.translocase_pathway == 'r':
        me.translocation_pathways.srp_translocation.add_translocation_cost(me,row.Complex,row.Protein)       
    elif row.translocase_pathway == 'p':
        me.translocation_pathways.srp_yidC_translocation.add_translocation_cost(me,row.Complex,row.Protein)         
    elif row.translocase_pathway =='t':
        me.translocation_pathways.tat_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway=='a':
        me.translocation_pathways.srp_translocation.add_translocation_cost(me,row.Complex,row.Protein)
        me.translocation_pathways.secA_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway =='l':
        me.translocation_pathways.lol_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway =='b':
        me.translocation_pathways.bam_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway =='y':
        me.translocation_pathways.yidC_translocation.add_translocation_cost(me,row.Complex,row.Protein)        
    elif row.translocase_pathway!='n':
        print row.translocase_pathway        

sb
pla
sb
sb
sb
pa
sb
sb
pla
pla
sb
sb
sb
pla
pa
sb
sb
sb
pa
sb
sb
sb
sb
sb
sb
sb
pla
pa
sb
sb
sb
pla
sb
sb
pla
sb
pla
pla
pla
pla
sb
pla
ra


In [21]:
me.complex_data.get_by_id('TatBC_octa').stoichiometry

defaultdict(<type 'float'>, {'gdp_c': -3.0, 'SRP-CPLX': 0.0476666666666667*mu, 'protein_b3839': 8.0, 'protein_b3838': 8.0, 'FtsY_MONOMER': 3.41880341880342e-5*mu, 'gtp_c': 3.0, 'pi_c': -3.0, 'Sec-CPLX': 0.0476666666666667*mu, 'YidC_MONOMER': 0.0286666666666667*mu})

Remove unused protein and mRNA to make the model solve faster (TODO remove unused complexes too)

In [22]:
for c_d in me.complex_data:
    c = c_d.complex
    if len(c.reactions) == 1:
        list(c.reactions)[0].delete(remove_orphans=True)
for p in me.metabolites.query("protein"):
    if len(p._reaction) == 1:
        list(p._reaction)[0].delete(remove_orphans=True)
for m in me.metabolites.query("RNA"):
    if len(m._reaction) == 1:
        list(m._reaction)[0].delete(remove_orphans=True)


This gives the total number of genes included

In [23]:
len(me.reactions.query("transcription"))

5276

In [24]:
len(me.reactions)

18249

In [25]:
len(me.metabolites)

9187

In [26]:
len(me.metabolites.query('RNA_'))

4274

In [27]:
len(me.metabolites.query('protein_'))

1444

## Attempt to set keffs

In [28]:
divalent_list = divalent_list
monovalent_list = ['_mod_k','_mod_na1']
from pickle import load
with open("test_keffs.pickle", "rb") as infile:
    old_keffs = load(infile)
keffs = {}

for keff, value in old_keffs.items():
    for i in divalent_list: 
        keff = keff.replace(i, 'generic_divalent')
    for i in monovalent_list: 
        keff = keff.replace(i, '_mod_generic_monovalent')
    keffs[keff] = value
    
for r in me.reactions:
    if isinstance(r, MetabolicReaction) and r.complex_data is None:
        continue
    if isinstance(r, MetabolicReaction) and r.complex_data.id != "CPLX_dummy":
        met_rxn = r
        key = met_rxn.id.replace("-", "_DASH_").replace("__", "_DASH_").replace(":","_COLON_")
        #key = met_rxn.id
        key = "keff_" + key.replace("_FWD_", "_").replace("_REV_", "_")

        matches = [i for i in keffs if key in i]
        # get the direction
        if met_rxn.reverse:
            matches = [i for i in matches if i.endswith("_reverse_priming_keff")]
        else:
            matches = [i for i in matches if i.endswith("_forward_priming_keff")]
        if len(matches) == 1:
            met_rxn.keff = keffs[matches[0]]
        elif len(matches) > 0:
            if len(matches) == len([i for i in matches if key + "_mod_"]):
                met_rxn.keff = keffs[matches[0]]
            else:
                print key, len(matches)
        else:  # len(matches) == 0
            print "no keff found for", key

no keff found for keff_METAT_S_DASH_ADENMETSYN_DASH_CPLX_mod_4_COLON_generic_monovalent_mod_1_COLON_generic_divalent
no keff found for keff_GLUTRR_CPLX0_DASH_3741


## Solve

In [29]:
me.reactions.dummy_reaction_FWD_CPLX_dummy.objective_coefficient = 1.

In [30]:
# Turn off reactions that throw off results as in iOL
KO_list = ['DHPTDNR','DHPTDNRN', 'SUCASPtpp','SUCFUMtpp', 'SUCMALtpp', 'SUCTARTtpp', 
           'CAT', 'FHL', 'SPODM', 'SPODMp']
for reaction in KO_list:
    a = me.reactions.query(reaction + '_')
    for rxn in a:
        rxn.upper_bound = 0
        rxn.lower_bound = 0

In [31]:
me.reactions.EX_glc__D_e.lower_bound = -100

In [32]:
expressions = compile_expressions(me)

In [33]:
solve_at_growth_rate(me, 0.1, compiled_expressions=expressions)

<Solution 0.10 at 0x7f2129db9c90>

In [34]:
binary_search(me, min_mu=0, max_mu=2, mu_accuracy=1e-6,
              compiled_expressions=expressions)

KeyboardInterrupt: 

In [None]:
for cplx in generic_RNase_list:
    print me.metabolites.get_by_id(cplx).reactions

In [None]:
import escher
view = escher.Builder("iJO1366.Central metabolism")
view.reaction_data = me.get_metabolic_flux()
view.display_in_notebook()