
# Build a basic ME model

We will try to build an ME model from the NC_000913.2 Genbank file, the iJO1366 M model, and the complex reconstruction from iJL1650-ME

In [None]:
# python imports
import re
import json
from os.path import join

# third party imports
import pandas
import escher
import cobra.test

# ecoli me
from ecolime.flat_files import *
from ecolime.ecoli_k12 import *

from minime import *
from minime.util.building import add_ecoli_M_model_content, add_ribosomes, add_RNA_polymerase
from minime.util.building import add_TUs_and_translation, add_dummy_reactions
from minime.util.building import add_complex_stoichiometry_data, add_transcription_reaction
from minime.util.building import add_complex_modification_data
from minime.solve.algorithms import binary_search, fva, solve_at_growth_rate
from minime.solve.symbolic import compile_expressions

In [None]:
met_info = pandas.read_csv(join(ecoli_files_dir, "metabolites.txt"),
                           delimiter="\t", header=None, index_col=0,
                           names=["id", "name", "formula", "compartment",
                                  "data_source"])
rxn_info = get_reaction_info_frame()
rxn_dict = get_reaction_matrix_dict()
sources_sinks_info = pandas.read_csv(join(ecoli_files_dir, "reaction_matrix_sources_and_sinks.txt"), delimiter="\t",
                                     header=None, names=["rxn_id", "met_id", "compartment", "stoic"], index_col=1)

source_amounts = pandas.read_csv(join(ecoli_files_dir, "exchange_bounds.txt"),
                                 delimiter="\t", index_col=0, names=["met_id", "amount"])

## Begin by loading metabolites and build Metabolic reactions

In [None]:
me = MEmodel('iJO1366-ME')
add_ecoli_M_model_content(me, 'iJO1366', met_info, rxn_info, rxn_dict, 
                          sources_sinks_info, source_amounts,generic_ions=True)

# if the bounds of this metabolite aren't open, model uses wrong reactions
me.reactions.EX_pqq_e.lower_bound = -1000

## Build ribosome and RNA Polymerase

In [None]:
gb_filename = join(ecoli_files_dir,'NC_000913.2.gb')                                                                                    
TU_df = pandas.read_csv(join(ecoli_files_dir,'TUs_from_ecocyc.txt'), delimiter="\t")            

TU_pieces = add_TUs_and_translation(me, gb_filename, TU_frame=TU_df, generic_flag=True)

add_ribosomes(me)

add_RNA_polymerase(me)

In [None]:
# There genes have no TU information so need transcribed seperately
no_TU_info_list = ['b3247', 'b3166', 'b3167', 'b3705', 'b3980']
for TU in no_TU_info_list:
    RNA = me.metabolites.get_by_id('RNA_' + TU)
    add_transcription_reaction(me, 'mRNA_' + TU, {TU}, RNA.seq)

Make a dummy reactions. Add a dummy protein in as well.

In [None]:
dna_sequence = "ATG" + "TTT" * 12 + "TAT"*12+ "ACG"*12 + "GAT" *12 + "AGT"*12+ "TAA"
add_dummy_reactions(me, dna_sequence)

## Associate the tRNA synthetases

The tRNA charging reactions were automatically added when loading the genome from the genbank file. However, the charging reactions still need to be made aware of the tRNA synthetases which are responsible.

In [None]:
with open(join(ecoli_files_dir, "amino_acid_tRNA_synthetase.json"), "rb") as infile:
    aa_synthetase_dict = json.load(infile)
for data in me.tRNA_data:
    data.synthetase = str(aa_synthetase_dict[data.amino_acid])

### Add in complex Formation with modifications

In [None]:
# ME_complex_dict is a dict of {'complex_id': [{'bnum' : count}]}
rna_components = {"b3123"} # component id should have RNA_ instead of protein_
ME_complex_dict = get_complex_to_bnum_dict(rna_components)
# some entries in the file need to be renamed.
# Colton 7/8/15 made changes directly to flat file
#renames = {"MnmE_": "b3706", "MnmG_": "b3741", "YheM_": "b3344", "YheL_": "b3343", "YheN_": "b3345"}
add_complex_stoichiometry_data(me, ME_complex_dict)


In [None]:
met_list = []
for met_stoich in rxn_dict.values():
    for met in met_stoich:
        met_list.append(met.replace('_c',''))
modification_dict = get_protein_modification_dict('protein_modification.txt', met_list, 
                                                  generic=True)
add_complex_modification_data(me, modification_dict)

In [None]:
# two different reactiond can add a lipoate modification.
# We create a separate ModificationData for each one
lipo = me.modification_data.get_by_id("mod_lipo_c")
alt_lipo = ModificationData("mod_lipo_c_alt", me)
#alt_lipo.stoichiometry = lipo.stoichiometry

lipo.stoichiometry = {"lipoamp_c": -1, "amp_c": 1}
lipo.enzyme = 'EG11796-MONOMER'
lipo.keff = 65.

alt_lipo.stoichiometry = {'EG50003-MONOMER_mod_pan4p_mod_lipo':-1,
                          'EG50003-MONOMER_mod_pan4p':1,
                          'h_c':-1,}
alt_lipo.enzyme = 'EG11591-MONOMER'
alt_lipo.keff = 65.

for cplx_data in lipo.get_complex_data():
    alt_cplx_data = ComplexData(cplx_data.id + "alt", me)
    alt_cplx_data.complex_id = cplx_data.complex_id
    alt_cplx_data.translocation = cplx_data.translocation
    alt_cplx_data.chaperones = cplx_data.chaperones
    alt_cplx_data.modifications = cplx_data.modifications
    alt_cplx_data.modifications[alt_lipo.id] = \
        alt_cplx_data.modifications.pop(lipo.id)

In [None]:
# todo bmocogdp_c mods
# todo FE-S modifications
mod_catalysts = {'CPLX0-1762':'G6712-MONOMER', # FE-S modification
                 'TMAOREDUCTI-CPLX':'EG12195-MONOMER',
                 'DIMESULFREDUCT-CPLX':'G6849-MONOMER',
                 'NITRATREDUCTA-CPLX':'NARJ-MONOMER', 
                 'NITRATREDUCTZ-CPLX':'NARW-MONOMER',
                 'NAP-CPLX':'NAPD-MONOMER',
                 'NAPAB-CPLX_NAPC-MONOMER':'NAPD-MONOMER'}

In [None]:
target_list = ['4fe4s_c', 'LI_c', '2fe2s_c', '3fe4s_c',
               'NiFeCoCN2_c', 'acetyl_c',
               'RNase_m5','RNase_m16','RNase_m23'] # RNAses are gaps in model

for met_id in target_list:
    r = cobra.Reaction("EX_" + met_id)
    me.add_reaction(r)
    r.reaction = met_id + " <=> "

Build all complex formation reactions

In [None]:
for cplx_data in me.complex_data:
    formation = cplx_data.formation
    if formation:
        formation.update()
    else:
        cplx_data.create_complex_formation()

Crutch reactions for mets that are blocked. TODO remove

## Associate Complexes with Reactions

In [None]:
# associate reaction id with the old ME complex id (including modifications)
rxnToModCplxDict = get_reaction_to_modified_complex(generic=True)

In [None]:
for reaction_data in me.stoichiometric_data:
    # Some reactions have no complex associated. Determine if they are
    # spontaneous or orphan reactions.
    try:
        complexes = rxnToModCplxDict[reaction_data.id]
    except KeyError:
        # These are orphans catalyzed by a dummy
        if reaction_data.id == "dummy_reaction" or \
                not rxn_info.is_spontaneous[reaction_data.id]:
            complexes = ["CPLX_dummy"]
        # These are truly spontaneous
        else:
            complexes = [None]
    for complex_id in complexes:
        complex_data = me.complex_data.get_by_id(complex_id) if complex_id else None
        if reaction_data.lower_bound < 0:
            r = MetabolicReaction(reaction_data.id + "_REV_" + str(complex_id))
            me.add_reaction(r)
            r.keff = 65
            r.stoichiometric_data = reaction_data
            if complex_data is not None:
                r.complex_data = complex_data
            r.reverse = True
            r.update(create_new=True)
        if reaction_data.upper_bound > 0:
            r = MetabolicReaction(reaction_data.id + "_FWD_" + str(complex_id))
            me.add_reaction(r)
            r.keff = 65
            r.stoichiometric_data = reaction_data
            if complex_data is not None:
                r.complex_data = complex_data
            r.reverse = False
            r.update(create_new=True)

In [None]:
# This reaction is weird
me.reactions.get_by_id('CITLY-CPLX_2tpr3dpcoa_FWD_G6340-MONOMER').reaction

Rebuild transcription and translation to use tRNA (now that tRNA synthetase complexes are in the model

In [None]:
for r in me.reactions:
    if isinstance(r, tRNAChargingReaction):
        r.update()
for r in me.reactions:
    if isinstance(r, TranslationReaction):
        r.update()
    if isinstance(r, TranscriptionReaction):
        r.update()
    if isinstance(r, MetabolicReaction):
        r.update()

## Add in translocation

In [None]:
translocPath = pandas.read_csv(join(ecoli_files_dir, "translocation_pathways.txt"),sep='\t')

for index, row in translocPath.iterrows():
    translocRxn = ProteinTranslocationData(row.Reaction_name, me)
    translocRxn.keff = float(row.Keff)
    translocRxn.costs_complexes = row.Complexes.split(' AND ')

In [None]:
proteins_sa_coeff_inner={}

transloc = pandas.read_csv(join(ecoli_files_dir, "peptide_compartment_and_pathways2.txt"), sep='\t', comment="#")
for index, row in transloc.iterrows():
    me.metabolites.get_by_id(row.Complex).compartment = row.Complex_compartment
    me.metabolites.get_by_id('protein_'+row.Protein.split('(')[0]).compartment = row.Protein_compartment
    #if index > 316:
    #    continue
    if row.translocase_pathway=='s':
        me.translocation_pathways.srp_translocation.add_translocation_cost(me,row.Complex,row.Protein)
        
        ## This is in preparation for membrane constraint
        mass = me.translation_data.get_by_id(row.Protein.split('(')[0]).mass
        if row.Complex in proteins_sa_coeff_inner.keys():
            proteins_sa_coeff_inner[row.Complex]+=mass*1.21/42.*2
        else:
            proteins_sa_coeff_inner[row.Complex]=mass*1.21/42.*2
    elif row.translocase_pathway == 'r':
        me.translocation_pathways.srp_translocation.add_translocation_cost(me,row.Complex,row.Protein)       
    elif row.translocase_pathway == 'p':
        me.translocation_pathways.srp_yidC_translocation.add_translocation_cost(me,row.Complex,row.Protein)         
    elif row.translocase_pathway =='t':
        me.translocation_pathways.tat_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway=='a':
        me.translocation_pathways.srp_translocation.add_translocation_cost(me,row.Complex,row.Protein)
        me.translocation_pathways.secA_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway =='l':
        me.translocation_pathways.lol_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway =='b':
        me.translocation_pathways.bam_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway =='y':
        me.translocation_pathways.yidC_translocation.add_translocation_cost(me,row.Complex,row.Protein)        
    elif row.translocase_pathway!='n':
        print row.translocase_pathway        

In [None]:
me.complex_data.get_by_id('TatBC_octa').stoichiometry

Remove unused protein and mRNA to make the model solve faster (TODO remove unused complexes too)

In [None]:
for c_d in me.complex_data:
    c = c_d.complex
    if len(c.reactions) == 1:
        list(c.reactions)[0].delete(remove_orphans=True)
for p in me.metabolites.query("protein"):
    if len(p._reaction) == 1:
        list(p._reaction)[0].delete(remove_orphans=True)
for m in me.metabolites.query("RNA"):
    if len(m._reaction) == 1:
        list(m._reaction)[0].delete(remove_orphans=True)


This gives the total number of genes included

In [None]:
len(me.reactions.query("transcription"))

In [None]:
len(me.reactions)

In [None]:
len(me.metabolites)

In [None]:
len(me.metabolites.query('RNA_'))

In [None]:
len(me.metabolites.query('protein_'))

## Attempt to set keffs

In [None]:
divalent_list = divalent_list
monovalent_list = ['_mod_k','_mod_na1']
from pickle import load
with open("test_keffs.pickle", "rb") as infile:
    old_keffs = load(infile)
keffs = {}

for keff, value in old_keffs.items():
    for i in divalent_list: 
        keff = keff.replace(i, 'generic_divalent')
    for i in monovalent_list: 
        keff = keff.replace(i, '_mod_generic_monovalent')
    keffs[keff] = value
    
for r in me.reactions:
    if isinstance(r, MetabolicReaction) and r.complex_data is None:
        continue
    if isinstance(r, MetabolicReaction) and r.complex_data.id != "CPLX_dummy":
        met_rxn = r
        key = met_rxn.id.replace("-", "_DASH_").replace("__", "_DASH_").replace(":","_COLON_")
        #key = met_rxn.id
        key = "keff_" + key.replace("_FWD_", "_").replace("_REV_", "_")

        matches = [i for i in keffs if key in i]
        # get the direction
        if met_rxn.reverse:
            matches = [i for i in matches if i.endswith("_reverse_priming_keff")]
        else:
            matches = [i for i in matches if i.endswith("_forward_priming_keff")]
        if len(matches) == 1:
            met_rxn.keff = keffs[matches[0]]
        elif len(matches) > 0:
            if len(matches) == len([i for i in matches if key + "_mod_"]):
                met_rxn.keff = keffs[matches[0]]
            else:
                print key, len(matches)
        else:  # len(matches) == 0
            print "no keff found for", key

## Solve

In [None]:
me.reactions.dummy_reaction_FWD_CPLX_dummy.objective_coefficient = 1.

In [None]:
# Turn off reactions that throw off results as in iOL
KO_list = ['DHPTDNR','DHPTDNRN', 'SUCASPtpp','SUCFUMtpp', 'SUCMALtpp', 'SUCTARTtpp', 
           'CAT', 'FHL', 'SPODM', 'SPODMp']
for reaction in KO_list:
    a = me.reactions.query(reaction + '_')
    for rxn in a:
        rxn.upper_bound = 0
        rxn.lower_bound = 0

In [None]:
me.reactions.EX_glc__D_e.lower_bound = -100

In [None]:
expressions = compile_expressions(me)

In [None]:
solve_at_growth_rate(me, 0.1, compiled_expressions=expressions)

In [None]:
binary_search(me, min_mu=0, max_mu=2, mu_accuracy=1e-6,
              compiled_expressions=expressions)

In [None]:
for cplx in generic_RNase_list:
    print me.metabolites.get_by_id(cplx).reactions

In [None]:
import escher
view = escher.Builder("iJO1366.Central metabolism")
view.reaction_data = me.get_metabolic_flux()
view.display_in_notebook()