
# Build a basic ME model

We will try to build an ME model from the NC_000913.2 Genbank file, the iJO1366 M model, and the complex reconstruction from iJL1650-ME

In [None]:
# python imports
import re
import json
from os.path import join

# third party imports
import pandas
import escher
import cobra.test

# ecoli me
from ecolime.flat_files import *

from minime import *
#from minime.util.building import build_reactions_from_genbank, add_transcription_reaction, add_TU_transcription_reaction
from minime.solve.algorithms import binary_search, fva, solve_at_growth_rate
from minime.solve.symbolic import compile_expressions

In [None]:
def fix_id(id_str):
    return id_str.replace("_DASH_", "__")

In [None]:
me = MEmodel("iJO1366-ME")
me.id = "iJO1366-ME"
me.compartments = {"p": "Periplasm", "e": "Extra-organism",
                   "c": "Cytosol"}
compartment_lookup = {v: k for k, v in me.compartments.items()}

In [None]:
met_info = pandas.read_csv(
    join(ecoli_files_dir, "metabolites.txt"),
    delimiter="\t", header=None, index_col=0,
    names=["id", "name", "formula", "compartment", "data_source"])

In [None]:
for met_id in met_info.index:
    fixed_id = fix_id(met_id)
    for compartment in met_info.compartment[met_id].split("AND"):
        compartment = compartment.strip()
        if compartment == "No_Compartment":
            print "Assigned %s to e" % met_id
            compartment = me.compartments["e"]
        metab = Metabolite(fixed_id + "_" + compartment_lookup[compartment])
        metab.name = met_info.name[met_id]
    
        metab.formula = met_info.formula[met_id]
        me.add_metabolites(metab)

Add generic ions to simplify complexes

In [None]:
divalent_list = divalent_list
monovalent_list = monovalent_list
comp = "_c"  # compartment
# Divalent
div = cobra.Metabolite('generic_divalent' + comp)
div.name = 'Generic divalent ion'
me.add_metabolites(div)
for ion in divalent_list:
    rxn1 = cobra.Reaction(ion + comp + '_to_generic')
    ion_dict = {}
    met1 = me.metabolites.get_by_id(ion + comp)
    ion_dict[met1] = -1
    ion_dict[div] = 1
    rxn1.add_metabolites(ion_dict)
    me.add_reaction(rxn1)
# Monovalent
mono = cobra.Metabolite('generic_monovalent' + comp)
mono.name = 'Generic monovalent ion'
for ion in monovalent_list:
    rxn2 = cobra.Reaction(ion + comp + '_to_generic')
    ion_dict = {}
    met2 = me.metabolites.get_by_id(ion + comp)
    ion_dict[met2] = -1
    ion_dict[mono] = 1
    rxn2.add_metabolites(ion_dict)
    me.add_reaction(rxn2)

## Build Metabolic reactions

In [None]:
reaction_info = get_reaction_info_frame()
reaction_dict = get_reaction_matrix_dict()

In [None]:
for r_id in reaction_info.index:
    reaction = StoichiometricData(r_id, me)
    reaction._stoichiometry = {fix_id(k): v
                               for k, v in reaction_dict[r_id].items()}
    reaction.lower_bound = \
        -1000. if reaction_info.is_reversible[r_id] else 0.
    reaction.upper_bound = 1000.

In [None]:
me.reaction

Also make a dummy reaction

In [None]:
dummy = StoichiometricData("dummy_reaction", me)
dummy.lower_bound = 0
dummy.upper_bound = 1000
dummy._stoichiometry = {}

Boundary Reactions

In [None]:
sources_sinks = pandas.read_csv(join(ecoli_files_dir, "reaction_matrix_sources_and_sinks.txt"), delimiter="\t",
                                header=None, names=["rxn_id", "met_id", "compartment", "stoic"], index_col=1)
sources_sinks.index = [fix_id(i) for i in sources_sinks.index]

source_amounts = pandas.read_csv(join(ecoli_files_dir, "exchange_bounds.txt"),
                                 delimiter="\t", index_col=0, names=["met_id", "amount"])
source_amounts.index = [fix_id(i) for i in source_amounts.index]


for met_id in sources_sinks.index:
    model_met_id = met_id + "_" + compartment_lookup[sources_sinks.compartment[met_id]]
    # EX_ or DM_ + met_id
    reaction_id = sources_sinks.rxn_id[met_id][:3] + model_met_id
    reaction = cobra.Reaction(reaction_id)
    me.add_reaction(reaction)
    reaction.add_metabolites({me.metabolites.get_by_id(model_met_id): -1})
    # set bounds on exchanges
    if reaction.id.startswith("EX_") and met_id in source_amounts.index:
        reaction.lower_bound = -source_amounts.amount[met_id]

Set bounds on boundary reactions

In [None]:
# Add all RNA related metabolites/reactions. Important to run first if using TUs!
RNA_pos_dict = build_reactions_from_genbank(me, join(ecoli_files_dir, "NC_000913.2.gb"), using_TUs=True)

## Build ribosome and RNA Polymerase

In [None]:
# Add generic_16s,23s, and 5s rRNA formation reactions

list_16s = ['b3851', 'b3968', 'b3756', 'b3278', 'b4007', 'b2591', 'b0201']
list_23s = ['b3854', 'b3970', 'b3758', 'b3275', 'b4009', 'b2589', 'b0204']
list_5s = ['b3855', 'b3971', 'b3759', 'b3274', 'b4010', 'b2588', 'b0205', 'b3272']
list_RNase = ['RNase_T_dim_mod_4:mg2', 'RNase_BN_dim_mod_2:zn2', 'Rnd_mono_mod_5:mg2',
             'Rnb_mono_mod_1:mg2', 'Rph_mono_mod_mg2']
list_RNase = [R.replace('mg2', 'generic_divalent').replace('zn2', 'generic_divalent') 
             for R in list_RNase]
for i in ['16s', '23s', '5s', 'RNase']:
    for RNA in eval('list_' + i):
        if RNA.startswith('b'):
            RNA_id = 'RNA_' + RNA
            r = cobra.Reaction("rRNA_" + RNA + '_to_generic')
        else:
            RNA_id = RNA
            r = cobra.Reaction("spliceosome_" + RNA + '_to_generic')
        me.add_reaction(r)
        r.reaction = RNA_id + ' <=> generic_' + i

In [None]:
ribosome_complex = ComplexData("ribosome", me)
ribosome_components = ribosome_complex.stoichiometry
ribosome_modifications = ribosome_complex.modifications

# 30S assembly associated reactions
# Phase 1
# ribosome 30s assembly via 2 GTP bind to ribosome 30s asembly factor
mod_1 = ModificationData('gtp_bound_30S_assembly_factor_phase1', me)
mod_1.stoichiometry = {'gtp_c': 2, 'h2o_c': 2, 'h_c': -2, 'pi_c': -2}
mod_1.enzyme = 'Era_dim'
ribosome_modifications[mod_1.id] = -1

mod_2 = ModificationData('RbfA_mono_aasembly_factor_phase1', me)
mod_2.enzyme = 'RbfA_mono'
ribosome_modifications[mod_2.id] = -1

mod_3 = ModificationData('RimM_mono_aasembly_factor_phase1', me)
mod_3.enzyme = 'RimM_mono'
ribosome_modifications[mod_3.id] = -1

ribosome_components['generic_16s'] = 1
# 30S Listed as [rpsA -rpsU], sra 
# [rplA-rplF],  rplI, [rplK-rplY],
# [rpmA-rpmJ]

# TODO change these to complex names
for i in ["b0911", "b0169", "b3314", "b3296", "b3303", "b4200", "b3341",
          "b3306", "b3230", "b3321", "b3297", "b3342", "b3298", "b3307", 
          "b3165", "b2609", "b3311", "b4202", "b3316", "b0023", "b3065",
          "b1480"]:
    ribosome_components["protein_" + i] = 1
# Phase 2
ribosome_components['mg2_c'] = 60


# 50s reactions
ribosome_components['generic_23s'] = 1
ribosome_components['generic_5s'] = 1

# 50S listed as [rplA-rplF],rplJ, rplI, rplK [rplM-rplY], [rpmA-rpmJ]    
for i in ["b3984", "b3317", "b3320", "b3319", "b3308", "b3305", 
          "b3958", "b4203", "b3983", "b3231", "b3310", "b3301",
          "b3313", "b3294", "b3304", "b2606", "b1716", "b3186",
          "b3315", "b3318", "b3309", "b2185", 
          "b3185", "b3637", "b3312", "b3302", "b3936", "b1089", 
          "b3636", "b3703", "b1717", "b3299"]:
    ribosome_components["protein_" + i] = 1
# [rplJ, 2(2[rpIL7])]
ribosome_components["protein_" + "b3986"] = 4

ribosome_components['mg2_c'] += 111
# get ribosome ready for translation
# ribosome_50 + ribosome_30 + trigger_factor -> rib_70
ribosome_components['Tig_mono'] = 1

# rib_70 + If_1 + If_3 -> rib_50_trigger_factor + rib30_if1_if3
ribosome_components['InfA_mono'] = 1
ribosome_components['InfC_mono'] = 1

# 1 b3168_assumedMonomer_gtp (InfB_mono) + 1 rib_30_IF1_IF3 --> 1 rib_30_ini 
ribosome_components['InfB_mono'] = 1
ribosome_components['gtp_c'] = 1 

# rib_30_ini + rib_50_trigger_factor -> ribsome_complex
ribosome_complex.create_complex_formation()

In [None]:
RNAP_complex = ComplexData("RNA_Polymerase", me)
RNAP_components = RNAP_complex.stoichiometry
# Core RNA Polymerase Enzyme
for i in {"b3295" : "rpoA", "b3988" : "rpoC", "b3987" : "rpoB"}:
    if i == "b3295":
        RNAP_components["protein_" + i] = 2
    else:
        RNAP_components["protein_" + i] = 1

RNAP_complex.create_complex_formation()

In [None]:
me.metabolites.RNA_b3295.reactions

In [None]:
import itertools

In [None]:
def find_existing_genome_region(me_model,left_pos, right_pos, TU_strand, has_5prime_triphosphate):
    try:
        TU_name = RNA_pos_dict[str(left_pos)+','+str(right_pos)]
    except:                 
        TU_name = 'excised_TU_%s_%i_%i_%s' % (TU_strand.replace('-','M').replace('+','P'), 
                                                  left_pos, right_pos, has_5prime_triphosphate)
        try:
            me.metabolites.get_by_id(TU_name)
        except:
            #print 'Creating new transcribed gene: ', TU_name
            new_TU = TranscribedGene(TU_name)
            new_TU.left_pos = left_pos
            new_TU.right_pos = right_pos
            new_TU.strand = TU_strand
            me_model.add_metabolites([new_TU])
            r = cobra.Reaction('DM_' + TU_name)
            me_model.add_reaction(r)
            r.reaction = TU_name + ' --> '

    return TU_name

In [None]:
def transcribe_all_TU_combos(me_model, TU, TU_left, TU_right, TU_strand, bnum_set, TU_seq, TU_pieces):
    
    # Create list of all left strand positions of tRNA, rRNA, sRNA that will be excised
    all_lefts = []
    for bnum in bnum_set:
        all_lefts.append(me.metabolites.get_by_id('RNA_'+bnum).left_pos)

    # Create list [0:number_of_possible_excised_portions]
    num_in_combo = range(len(all_lefts))
    num_in_combo.append(len(all_lefts))
    
    for i in num_in_combo:
        
        # Iterate through all possible excised combinations that
        # Create the number of pieces in i
        for left_combo in itertools.combinations(all_lefts, i):
            left_combo_list = list(left_combo)
   
            if TU not in TU_pieces:
                TU_pieces[TU] = []
            
            # Do nothing if TU has not tRNA, rRNA to excise
            if len(left_combo_list) == 0:
                #print TU, '<---- check'
                TU_pieces[TU].append([])
                continue
                
            left_combo_list.sort() # ascending by default
            excised_TU_portions = []
            excised_TU_portion_count = 1
            
            # Look at remaining RNA portion between TU left_pos
            # and first tRNA, rRNA
            if left_combo_list[0] > TU_left:
                if TU_strand == '-':
                    has_5prime_triphosphate = 'False'
                # (+) strain has not been sliced on the left side yet, preserving
                # triphosphate group
                else:
                    has_5prime_triphosphate = 'True'
                
                
                # Add TranscribedGene for this segment if not already in model
                TU_id = find_existing_genome_region(me_model, TU_left, left_combo_list[0]-1, 
                                               TU_strand, has_5prime_triphosphate)
                
                excised_TU_portions.append(TU_id)
                excised_TU_portion_count += 1
                
            # iterate rest of possible segments
            while len(left_combo_list) > 0:
                left = left_combo_list[0]
                
                # Check to see which rRNA, tRNA this segment represents
                for bnum in bnum_set:
                    if me.metabolites.get_by_id('RNA_'+bnum).left_pos == left:
                        right = me.metabolites.get_by_id('RNA_'+bnum).right_pos
                        excised_portion_name = 'RNA_'+bnum
                
                # tRNA, rRNA loses 5' triphosphate if cleaved
                if left != TU_left and TU_strand == '+':
                    me.metabolites.get_by_id('RNA_'+bnum).has_5prime_triphosphate = 'False'
                if right != TU_right and TU_strand == '-':
                    me.metabolites.get_by_id('RNA_'+bnum).has_5prime_triphosphate = 'False'
                
                
                excised_TU_portions.append(excised_portion_name)
                excised_TU_portion_count += 1
                left_combo_list.pop(left_combo_list.index(left))
                
                # Deal with the last (far right) segment    
                if len(left_combo_list) == 0:
                    if TU_right - right > 0:
                        # (-) strain has not been sliced on the right side yet, preserving
                        # triphosphate group
                        if TU_strand == '-':
                            has_5prime_triphosphate = 'True'
                        else:
                            has_5prime_triphosphate = 'False'
                            
                        TU_id = find_existing_genome_region(me_model, right + 1, TU_right, 
                                               TU_strand, has_5prime_triphosphate)

                        excised_TU_portions.append(TU_id)
                        excised_TU_portion_count += 1
                        
                # Add in excised TU for next segment, if not tRNA, rRNA
                else:
                    next_left = left_combo_list[0]
                    TU_id = find_existing_genome_region(me_model, right + 1, next_left-1, 
                                                   TU_strand, 'False')
                    excised_TU_portion_count += 1
                    excised_TU_portions.append(TU_id)
                    
            if len(excised_TU_portions) > 0:
                TU_pieces[TU].append(excised_TU_portions)
                

In [None]:
no_TU_info_list = ['b3247', 'b3166', 'b3167', 'b3705', 'b3980']
for TU in no_TU_info_list:
    RNA = me.metabolites.get_by_id('RNA_' + TU)
    add_transcription_reaction(me, 'mRNA_' + TU, {TU}, RNA.seq)
    
    protein = me.metabolites.get_by_id('protein_' + TU)
    translation_data = TranslationData(TU, me, 'protein_' + TU)
    translation_data.TUs = ['RNA_' + TU]
    translation_data.compute_sequence_from_DNA(RNA.seq)
    translation_data.create_translation_reactions()

In [None]:
from Bio import SeqIO


#stable_RNA_df = pandas.read_csv(join(ecoli_files_dir,'stable_RNA.txt'), delimiter='\t', index_col=0)
#stable_RNA_dict = stable_RNA_df.T.to_dict()
#TU_gene_dict = TU_gene_df.T.to_dict()
gb_file = SeqIO.read(join(ecoli_files_dir,'NC_000913.2.gb'), 'gb')                                      
full_seq = str(gb_file.seq)                                                  
TU_df = pandas.read_csv(join(ecoli_files_dir,'TUs_from_ecocyc.txt'), delimiter="\t", index_col=0)            


TU_pieces = {}
for index, TU in TU_df.iterrows():
    seq = full_seq[TU.start:TU.stop]                                          
    if TU.strand == '-':
        seq = util.dogma.reverse_transcribe(seq) 
    index_list = index.split('_')[0:2]
    new_index = index_list[0] + '_' + index_list[1]
    new_index = new_index.replace('_from', '').replace('_with','')

    excise = False
    loci = []
    for string_pos in RNA_pos_dict:
        pos = string_pos.split(',')
        if int(pos[0]) + 1 >= float(TU.start) and int(pos[1]) <= int(TU.stop):
            if me.metabolites.get_by_id(RNA_pos_dict[string_pos]).strand == TU.strand:
                loci.append(RNA_pos_dict[string_pos].replace('RNA_',''))
    loci = set(loci)
    #print index, loci
    for locus in loci:
        try:
            if me.metabolites.get_by_id('RNA_'+locus).RNA_type == 'tRNA' or \
            me.metabolites.get_by_id('RNA_'+locus).RNA_type == 'rRNA' or locus == 'b3123':
                excise = True
        except:
            pass

    if excise == False:
        add_TU_transcription_reaction(me, index, loci, seq)
        for locus in loci:
            protein_id = 'protein_' + locus
            try:
                protein = me.metabolites.get_by_id(protein_id)
            except:
                continue
            try:
                translation_data = me.translation_data.get_by_id(locus)
                translation_data.TUs += [index]
            except:
                translation_data = TranslationData(locus, me, protein_id)
                translation_data.TUs = [index]
                translation_data.amino_acid_sequence = protein.amino_acid_sequence
            translation_data.create_translation_reactions()
    else:
        if index == 'TU482_with_TERM161_from_RpoD_mono':
            for locus in loci:
                if locus == 'b3123': # RNAseP
                    continue
                protein_id = 'protein_' + locus
                protein = TranslatedGene(protein_id)
                mRNA_id = 'RNA_' + locus
                mRNA = me.metabolites.get_by_id(mRNA_id)
                translation_data = TranslationData(locus, me, protein_id)
                translation_data.TUs = ['RNA_' + locus]
                translation_data.compute_sequence_from_DNA(mRNA.seq)
                translation_data.create_translation_reactions()
        TU_data = TranscriptionData(index, me)
        TU_data.nucleotide_sequence = seq
        TU_data.RNA_products = loci
        # Must splice TU to form tRNA, rRNA, sRNA (not supported)
        transcribe_all_TU_combos(me, index, TU.start, TU.stop, TU.strand, loci, seq, TU_pieces)

In [None]:
# Combine excision machinery into one set
rRNA_containing = ['RNase_E_tetra_mod_2:zn2', 'RNase_P_cplx_mod_2:mg2', 'generic_RNase', 'RNase_m5', 'RNase_m16', 
                   'RNase_m23', 'RNase_III_dim_mod_2:mg2', 'RNase_G_dim', 'RNase_T_dim_mod_4:mg2']

monocistronic = ['RNase_E_tetra_mod_2:zn2', 'RNase_P_cplx_mod_2:mg2', 'generic_RNase']

polycistronic_wout_rRNA = ['RNase_E_tetra_mod_2:zn2', 'RNase_P_cplx_mod_2:mg2', 'generic_RNase', 'RNase_III_dim', 
                           'RNase_G_dim', 'RNase_T_dim_mod_4:mg2']

excision_types = ['rRNA_containing', 'monocistronic', 'polycistronic_wout_rRNA']
for excision in excision_types:
    excision_dict = {}
    r = cobra.Reaction('combine_' + excision + '_excision_machinery')
    for machine in eval(excision):
        for ion in divalent_list:
            machine = machine.replace(ion,'generic_divalent')
        excision_dict[Metabolite(machine)] = -1
    excision_dict[Metabolite(excision + '_excision_set')] = 1
    r.add_metabolites(excision_dict)
    me.add_reaction(r)
                                                                                
    # Add modification data objects for TU excision reactions
    rRNA_mod = ModificationData(excision + '_excision', me)
    rRNA_mod.stoichiometry = {'h2o_c': -1, 'h_c': 1}
    rRNA_mod.enzyme = excision + '_excision_set'

In [None]:
for TU, combos_of_pieces in TU_pieces.iteritems():
    for i, pieces in enumerate(combos_of_pieces):
        tRNA_count = 0
        rRNA_count = 0
        sRNA_count = 0 # not supported
        
        if len(pieces) < 1: # no fragments to splice
            continue
        
        transcription = TranscriptionReaction('transcription_' + TU + '_slice_' + str(i+1))
        transcription_data = TranscriptionData(TU + '_slice_' + str(i+1), me)
        full_TU_data = me.transcription_data.get_by_id(TU)
        transcription_data.nucleotide_sequence = full_TU_data.nucleotide_sequence


        RNA_products = set()
        
        for piece in pieces:
            RNA_object = me.metabolites.get_by_id(piece)
            if RNA_object.RNA_type == 'tRNA':
                tRNA_count += 1
            elif RNA_object.RNA_type == 'rRNA':
                rRNA_count += 1
            else:
                sRNA_count += 1
            RNA_products.add(piece)
            
        if rRNA_count > 0:
            transcription_data.modifications['rRNA_containing_excision'] = len(pieces)-1
        elif tRNA_count == 1 and rRNA_count == 0:
            transcription_data.modifications['monocistronic_excision'] = len(pieces)-1
        elif tRNA_count > 1 and rRNA_count == 0:
            transcription_data.modifications['polycistronic_wout_rRNA_excision'] = len(pieces)-1
        else: # only applies to rnpB
            transcription_data.modifications['monocistronic_excision'] = len(pieces)-1
        
        transcription_data.RNA_products = RNA_products
        transcription.transcription_data = transcription_data
        
        me.add_reaction(transcription)
        transcription.update()

In [None]:
for cplx in me.complex_data:
    for protein in cplx.stoichiometry.keys():
        try:
            if len(me.metabolites.get_by_id(protein).reactions) == 1:
                print protein
        except:
            print me.metabolites.get_by_id(protein.replace('protein','RNA')).RNA_type

### Add Transcription/Translation from the genbank file

Add a dummy protein in as well

In [None]:
dna_sequence = "ATG" + "TTT" * 12 + "TAT"*12+ "ACG"*12 + "GAT" *12 + "AGT"*12+ "TAA"
add_transcription_reaction(me, "dummy", {"dummy"}, dna_sequence)
me.add_metabolites(TranslatedGene("protein_" + "dummy"))

translation_data = TranslationData("dummy", me, "protein_dummy")
translation_data.compute_sequence_from_DNA(dna_sequence)
translation_data.TUs = ['RNA_dummy']

translation_data.create_translation_reactions()

complex_data = ComplexData("CPLX_dummy", me)
complex_data.stoichiometry = {}
complex_data.stoichiometry["protein_" + "dummy"] = 1
complex_data.create_complex_formation()

## Associate the tRNA synthetases

The tRNA charging reactions were automatically added when loading the genome from the genbank file. However, the charging reactions still need to be made aware of the tRNA synthetases which are responsible.

In [None]:
with open(join(ecoli_files_dir, "amino_acid_tRNA_synthetase.json"), "rb") as infile:
    aa_synthetase_dict = json.load(infile)
for data in me.tRNA_data:
    data.synthetase = str(aa_synthetase_dict[data.amino_acid])

### Add in complex Formation with modifications

In [None]:
# ME_complex_dict is a dict of {'complex_id': ['bnum(count)']}
ME_complex_dict = get_complex_to_bnum_dict()
# some entries in the file need to be renamed
renames = {"MnmE_": "b3706", "MnmG_": "b3741", "YheM_": "b3344", "YheL_": "b3343", "YheN_": "b3345"}
rna_components = {"b3123"}  # component id should have RNA_ instead of protein_

for cplx, value in ME_complex_dict.iteritems():
    complex_data = ComplexData(cplx, me)
    for gene in value:
        stoichiometry = gene[6]
        bnum = gene[0:5]
        comp_id = "RNA_" + bnum if bnum in rna_components \
            else "protein_" + renames.get(bnum, bnum)
        try:
            complex_data.stoichiometry[comp_id] = float(stoichiometry)
        except:
            complex_data.stoichiometry[comp_id] = float(1)

In [None]:
me.reactions.transcription_TU00058_from_RpoD_mono.reaction

In [None]:
# {modified_complex_id: ['unmodified_complex_id', {component_id: stoic}]
modification_dict = get_protein_modification_dict(generic=True)
for mod_cplx_id, mod_complex_info in iteritems(modification_dict):
    unmod_cplx_id, mods = mod_complex_info
    unmod_cplx = me.complex_data.get_by_id(unmod_cplx_id)
    cplx = ComplexData(mod_cplx_id, me)
    cplx.stoichiometry = unmod_cplx.stoichiometry
    cplx.translocation = unmod_cplx.translocation
    cplx.chaperones = unmod_cplx.chaperones
    if len(set(mod_cplx_id.split("_mod_")[1:])) == len(mods):
        for mod_comp, mod_count in iteritems(mods):
            mod_id = "mod_" + mod_comp
            try:
                mod = me.modification_data.get_by_id(mod_id)
            except:
                mod = ModificationData(mod_id, me)
                mod.stoichiometry = {mod_comp: -1}
            cplx.modifications[mod_id] = -mod_count
    else:
        print "TODO:", mod_cplx_id

Some modifications are enzyme-catalyzed

TODO: Fe-S enzyme-catalyzed modifications

In [None]:
# two different reactiond can add a lipoate modification.
# We create a separate ModificationData for each one
lipo = me.modification_data.get_by_id("mod_lipo_c")
alt_lipo = ModificationData("mod_lipo_c_alt", me)
alt_lipo.stoichiometry = lipo.stoichiometry

lipo.stoichiometry = {"lipoamp_c": -1, "amp_c": 1}
lipo.enzyme = 'EG11796-MONOMER'
lipo.keff = 65.

alt_lipo.stoichiometry = {'EG50003-MONOMER_mod_pan4p_mod_lipo':-1,
                          'EG50003-MONOMER_mod_pan4p':1,
                          'h_c':-1,}
alt_lipo.enzyme = 'EG11591-MONOMER'
alt_lipo.keff = 65.

for cplx_data in lipo.get_complex_data():
    alt_cplx_data = ComplexData(cplx_data.id + "alt", me)
    alt_cplx_data.complex_id = cplx_data.complex_id
    alt_cplx_data.translocation = cplx_data.translocation
    alt_cplx_data.chaperones = cplx_data.chaperones
    alt_cplx_data.modifications = cplx_data.modifications
    alt_cplx_data.modifications[alt_lipo.id] = \
        alt_cplx_data.modifications.pop(lipo.id)

In [None]:
# todo bmocogdp_c mods
mod_catalysts = {'CPLX0-1762':'G6712-MONOMER', # FE-S modification
                 'TMAOREDUCTI-CPLX':'EG12195-MONOMER',
                 'DIMESULFREDUCT-CPLX':'G6849-MONOMER',
                 'NITRATREDUCTA-CPLX':'NARJ-MONOMER', 
                 'NITRATREDUCTZ-CPLX':'NARW-MONOMER',
                 'NAP-CPLX':'NAPD-MONOMER',
                 'NAPAB-CPLX_NAPC-MONOMER':'NAPD-MONOMER'}

In [None]:
target_list = ['4fe4s_c', 'LI_c', '2fe2s_c', '3fe4s_c',
               'NiFeCoCN2_c',
               'RNase_m5','RNase_m16','RNase_m23'] # RNAses are gaps in model

for met_id in target_list:
    r = cobra.Reaction("EX_" + met_id)
    me.add_reaction(r)
    r.reaction = met_id + " <=> "

Build all complex formation reactions

TODO: This shouldn't be prining out stuff. Fix them.

In [None]:
for cplx_data in me.complex_data:
    formation = cplx_data.formation
    if formation:
        formation.update()
    else:
        cplx_data.create_complex_formation()

Crutch reactions for mets that are blocked. TODO remove

## Associate Complexes with Reactions

In [None]:
# associate reaction id with the old ME complex id (including modifications)
rxnToModCplxDict = get_reaction_to_modified_complex(generic=True)

In [None]:
for reaction_data in me.stoichiometric_data:
    # Some reactions have no complex associated. Determine if they are
    # spontaneous or orphan reactions.
    try:
        complexes = rxnToModCplxDict[reaction_data.id]
    except KeyError:
        # These are orphans catalyzed by a dummy
        if reaction_data.id == "dummy_reaction" or \
                not reaction_info.is_spontaneous[reaction_data.id]:
            complexes = ["CPLX_dummy"]
        # These are truly spontaneous
        else:
            complexes = [None]
    for complex_id in complexes:
        complex_data = me.complex_data.get_by_id(complex_id) if complex_id else None
        if reaction_data.lower_bound < 0:
            r = MetabolicReaction(reaction_data.id + "_REV_" + str(complex_id))
            me.add_reaction(r)
            r.keff = 65
            r.stoichiometric_data = reaction_data
            if complex_data is not None:
                r.complex_data = complex_data
            r.reverse = True
            r.update(create_new=True)
        if reaction_data.upper_bound > 0:
            r = MetabolicReaction(reaction_data.id + "_FWD_" + str(complex_id))
            me.add_reaction(r)
            r.keff = 65
            r.stoichiometric_data = reaction_data
            if complex_data is not None:
                r.complex_data = complex_data
            r.reverse = False
            r.update(create_new=True)

In [None]:
# This reaction is weird
me.reactions.get_by_id('CITLY-CPLX_2tpr3dpcoa_FWD_G6340-MONOMER').reaction

Rebuild transcription and translation to use tRNA (now that tRNA synthetase complexes are in the model

In [None]:
for r in me.reactions:
    if isinstance(r, tRNAChargingReaction):
        r.update()
for r in me.reactions:
    if isinstance(r, TranslationReaction):
        r.update()
    if isinstance(r, TranscriptionReaction):
        r.update()
    if isinstance(r, MetabolicReaction):
        r.update()

In [None]:
for met in me.metabolites.query('RNA_b'):
    no_trans = True
    if len(met.reactions) < 2:
        no_trans = False
    for r in met.reactions:
        if isinstance(r, TranscriptionReaction):
            no_trans = False
    if no_trans == True:
        print met.id

In [None]:
me.metabolites.TU0_13904_from_RpoD_mono.reactions

In [None]:
me.metabolites.RNA_b0206.right_pos

In [None]:
me.reactions.transcription_TU0_12906_from_RpoD_mono.reaction

## Add in translocation

In [None]:
translocPath = pandas.read_csv(join(ecoli_files_dir, "translocation_pathways.txt"),sep='\t')

for index, row in translocPath.iterrows():
    translocRxn = ProteinTranslocationData(row.Reaction_name, me)
    translocRxn.keff = float(row.Keff)
    translocRxn.costs_complexes = row.Complexes.split(' AND ')

In [None]:
proteins_sa_coeff_inner={}

transloc = pandas.read_csv(join(ecoli_files_dir, "peptide_compartment_and_pathways2.txt"), sep='\t', comment="#")
for index, row in transloc.iterrows():
    me.metabolites.get_by_id(row.Complex).compartment = row.Complex_compartment
    me.metabolites.get_by_id('protein_'+row.Protein.split('(')[0]).compartment = row.Protein_compartment
    #if index > 316:
    #    continue
    if row.translocase_pathway=='s':
        me.translocation_pathways.srp_translocation.add_translocation_cost(me,row.Complex,row.Protein)
        
        ## This is in preparation for membrane constraint
        mass = me.translation_data.get_by_id(row.Protein.split('(')[0]).mass
        if row.Complex in proteins_sa_coeff_inner.keys():
            proteins_sa_coeff_inner[row.Complex]+=mass*1.21/42.*2
        else:
            proteins_sa_coeff_inner[row.Complex]=mass*1.21/42.*2
    elif row.translocase_pathway == 'r':
        me.translocation_pathways.srp_translocation.add_translocation_cost(me,row.Complex,row.Protein)       
    elif row.translocase_pathway == 'p':
        me.translocation_pathways.srp_yidC_translocation.add_translocation_cost(me,row.Complex,row.Protein)         
    elif row.translocase_pathway =='t':
        me.translocation_pathways.tat_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway=='a':
        me.translocation_pathways.srp_translocation.add_translocation_cost(me,row.Complex,row.Protein)
        me.translocation_pathways.secA_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway =='l':
        me.translocation_pathways.lol_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway =='b':
        me.translocation_pathways.bam_translocation.add_translocation_cost(me,row.Complex,row.Protein)
    elif row.translocase_pathway =='y':
        me.translocation_pathways.yidC_translocation.add_translocation_cost(me,row.Complex,row.Protein)        
    elif row.translocase_pathway!='n':
        print row.translocase_pathway        

Remove unused protein and mRNA to make the model solve faster (TODO remove unused complexes too)

In [None]:
one_rxn_list = []
for met in me.metabolites:
    if len(me.metabolites.get_by_id(met.id.rstrip('_')).reactions) == 1:
        print met.id
        one_rxn_list.append(met.id)

In [None]:
for met in one_rxn_list:
    metab = me.metabolites.get_by_id(met)
    for r in metab.reactions:
        if isinstance(r, ComplexFormation):
            print metab

In [None]:
me.metabolites.protein_dummy.reactions

In [None]:
for c_d in me.complex_data:
    c = c_d.complex
    if len(c.reactions) == 1:
        list(c.reactions)[0].delete(remove_orphans=True)
for p in me.metabolites.query("protein"):
    if len(p._reaction) == 1:
        list(p._reaction)[0].delete(remove_orphans=True)
for m in me.metabolites.query("RNA"):
    if len(m._reaction) == 1:
        list(m._reaction)[0].delete(remove_orphans=True)

In [None]:
me.metabolites.RNA_b0001.reactions

This gives the total number of genes included

In [None]:
len(me.reactions.query("transcription"))

In [None]:
len(me.reactions)

In [None]:
len(me.metabolites.query('RNA_'))

In [None]:
len(me.metabolites.query('protein_'))

## Attempt to set keffs

In [None]:
divalent_list = divalent_list
monovalent_list = ['_mod_k','_mod_na1']
from pickle import load
with open("test_keffs.pickle", "rb") as infile:
    old_keffs = load(infile)
keffs = {}

for keff, value in old_keffs.items():
    for i in divalent_list: 
        keff = keff.replace(i, 'generic_divalent')
    for i in monovalent_list: 
        keff = keff.replace(i, '_mod_generic_monovalent')
    keffs[keff] = value
    
for r in me.reactions:
    if isinstance(r, MetabolicReaction) and r.complex_data is None:
        continue
    if isinstance(r, MetabolicReaction) and r.complex_data.id != "CPLX_dummy":
        met_rxn = r
        key = met_rxn.id.replace("-", "_DASH_").replace("__", "_DASH_").replace(":","_COLON_")
        #key = met_rxn.id
        key = "keff_" + key.replace("_FWD_", "_").replace("_REV_", "_")

        matches = [i for i in keffs if key in i]
        # get the direction
        if met_rxn.reverse:
            matches = [i for i in matches if i.endswith("_reverse_priming_keff")]
        else:
            matches = [i for i in matches if i.endswith("_forward_priming_keff")]
        if len(matches) == 1:
            met_rxn.keff = keffs[matches[0]]
        elif len(matches) > 0:
            if len(matches) == len([i for i in matches if key + "_mod_"]):
                met_rxn.keff = keffs[matches[0]]
            else:
                print key, len(matches)
        else:  # len(matches) == 0
            print "no keff found for", key

## Solve

In [None]:
me.reactions.dummy_reaction_FWD_CPLX_dummy.objective_coefficient = 1.

In [None]:
# Turn off reactions that throw off results as in iOL
KO_list = ['DHPTDNR','DHPTDNRN', 'SUCASPtpp','SUCFUMtpp', 'SUCMALtpp', 'SUCTARTtpp', 
           'CAT', 'FHL', 'SPODM', 'SPODMp']
for reaction in KO_list:
    a = me.reactions.query(reaction+'_')
    for rxn in a:
        rxn.upper_bound = 0
        rxn.lower_bound = 0

In [None]:
me.reactions.EX_glc__D_e.lower_bound = -100

In [None]:
expressions = compile_expressions(me)

In [None]:
sol = solve_at_growth_rate(me, .1, compiled_expressions=expressions)

In [None]:
sol.status

In [None]:
binary_search(me, min_mu=0, max_mu=2, mu_accuracy=1e-6,
              compiled_expressions=expressions)

In [None]:
me.reactions.transcription_mRNA_b3166.reaction

In [None]:
for key, value in sol.x_dict.items():
    if key.startswith('DM_') and value > 0:
        print key, '-->', value

In [None]:
for key, value in sol.x_dict.items():
    if key.startswith('transcription_') and value > 0:
        print key, value

In [None]:
me.metabolites.RNA_b2590.reactions

In [None]:
me.reactions.transcription_TU0_14237_from_RpoD_mono.reaction

In [None]:
me.compute_solution_error()

In [None]:
import escher
view = escher.Builder("iJO1366.Central metabolism")
view.reaction_data = me.get_metabolic_flux()
view.display_in_notebook()