In [1]:
from tools import *

## Add exchange reactions for all metabolites in medium. Use _e for extracellular compartment. Set all these exchange reversible

* For those metabolites that doesn't have any transporters, add reversible transport reactions
* use metacyc id for all metabolites. if no metacyc id, then use bigg.metabolite id
* use 'Exchange_xx' for exhange reactions. xx is a metabolite id
* use 'Transport_xx' for transport reactions. xx is a metabolite id

In [2]:
def add_medium_rxns(model):
    book = xlrd.open_workbook('../Results/core_biomass_medium_summary_iML1515.xlsx')
    sh = book.sheet_by_name('medium')
    
    mnx2meta,mnx2kegg = load_MNX('../../../ComplementaryData/chem_xref.tsv')

    all_rxns = model.reactions
    for i in range(sh.nrows-1):
        i += 1
        mnx = sh.cell(i,6).value
        bigg = sh.cell(i,2).value
        name = sh.cell(i,0).value

        met_ids = mnx2meta.get(mnx)
        kegg_ids = mnx2kegg.get(mnx)
        if met_ids is not None:
            met_id = None
            for item in met_ids:
                try: 
                    model.metabolites.get_by_id(item)
                    met_id = item
                    break
                except: None
            if met_id is None: met_id = met_ids[0]
                
        elif kegg_ids is not None:
            met_id = None
            for item in met_ids:
                try: 
                    model.metabolites.get_by_id(item)
                    met_id = item
                    break
                except: None
            if met_id is None: met_id = kegg_ids[0]
        else: met_id = bigg

        # create exchange reaction
        met_e = cobra.Metabolite(met_id+'_e')
        met_e.name = name
        met_e.compartment = 'e'
        exg_rxn = cobra.Reaction('Exchange_'+met_id)
        exg_rxn.upper_bound = 1000
        exg_rxn.lower_bound = -1000
        exg_rxn.add_metabolites({met_e:1})
        #halo_meta_c.add_reaction(exg_rxn) 
        all_rxns.append(exg_rxn)
        print(exg_rxn.id,exg_rxn.reaction)
        
        # create exchange reaction between e and p
        try: met_p = model.metabolites.get_by_id(met_id+'_p')
        except:
            met_p = cobra.Metabolite(met_id+'_p')
            met_p.name = name
            met_p.compartment = 'p'
        exg_rxn = cobra.Reaction('ExchangeEP_'+met_id)
        exg_rxn.upper_bound = 1000
        exg_rxn.lower_bound = -1000
        exg_rxn.add_metabolites({met_e:-1,met_p:1})
        #halo_meta_c.add_reaction(exg_rxn) 
        all_rxns.append(exg_rxn)
        print(exg_rxn.id,exg_rxn.reaction)


        
        # create transport reaction for some ions which transporter were not found
        if met_id in ['OXYGEN-MOLECULE']: #'CO+2','MN+2','K+','CU+2',
            try: 
                met_c = halo_meta_c.metabolites.get_by_id(met_id+'_c')
                print(met_c,met_c.compartment)
            except:
                met_c = cobra.Metabolite(met_id+'_c')
                met_c.name = name
                met_c.compartment = 'c'
            
            try: 
                met_p = halo_meta_c.metabolites.get_by_id(met_id+'_p')
                print(met_p,met_p.compartment)
            except:
                met_p = cobra.Metabolite(met_id+'_p')
                met_p.name = name
                met_p.compartment = 'p'

            tran_rxn = cobra.Reaction('TransportPC_'+met_id)
            tran_rxn.upper_bound = 1000
            tran_rxn.lower_bound = -1000
            tran_rxn.add_metabolites({met_p:-1,met_c:1})
            all_rxns.append(tran_rxn)
            print(tran_rxn.id,tran_rxn.reaction)

        

    # if we use halo_meta_c.add_reactions([]) to add transport and exchange reactions, there would be a 
    # key error in optlang costrant variant when use halo_meta_c.copy().
    new_model = cobra.Model(model.id)
    new_model.add_reactions(all_rxns)
    return new_model

###  Add exchange reactions for all secreted metabolites. Use _e for extracellular compartment. Set all these exchange irreversible 

* use metacyc id for all metabolites. if no metacyc id, then use bigg.metabolite id
* use 'Exchange_xx' for exhange reactions. xx is a metabolite id

In [3]:
def add_secretion_rxns(model):
    book = xlrd.open_workbook('../Results/core_biomass_medium_summary_iML1515.xlsx')
    sh = book.sheet_by_name('secretion')

    mnx2meta,mnx2kegg = load_MNX('../../../ComplementaryData/chem_xref.tsv')

    all_rxns = model.reactions
    for i in range(sh.nrows-1):
        i += 1
        mnx = sh.cell(i,9).value
        bigg = sh.cell(i,2).value
        name = sh.cell(i,0).value

        met_ids = mnx2meta.get(mnx)
        kegg_ids = mnx2kegg.get(mnx)
        if met_ids is not None:
            met_id = None
            for item in met_ids:
                try: 
                    model.metabolites.get_by_id(item)
                    met_id = item
                    break
                except: None
            if met_id is None: met_id = met_ids[0]
                
        elif kegg_ids is not None:
            met_id = None
            for item in met_ids:
                try: 
                    model.metabolites.get_by_id(item)
                    met_id = item
                    break
                except: None
            if met_id is None: met_id = kegg_ids[0]
        else: met_id = bigg

        # create exchange reaction
        met_e = cobra.Metabolite(met_id+'_e')
        met_e.name = name
        met_e.compartment = 'e'
        exg_rxn = cobra.Reaction('Exchange_'+met_id)
        exg_rxn.upper_bound = 1000
        exg_rxn.lower_bound = 0
        exg_rxn.add_metabolites({met_e:-1})
        #halo_meta_c.add_reaction(exg_rxn) 
        all_rxns.append(exg_rxn)
        
        #halo_meta_c.add_reaction(tran_rxn)# ,
        print(exg_rxn.id,exg_rxn.reaction)
        #print(tran_rxn.id,tran_rxn.reaction)
        print('')

    # if we use halo_meta_c.add_reactions([]) to add transport and exchange reactions, there would be a 
    # key error in  optlang costrant variant when use halo_meta_c.copy().
    new_model = cobra.Model(model.id)
    new_model.add_reactions(all_rxns)
    return new_model

## Add protein, dna, rna, ions, lipid synthesis

In [4]:
def find_halo_met_c(bigg_id,mnx_id,mnx2meta,mnx2kegg,halomodel,comp):
    # check metacyc id
    meta_ids = mnx2meta.get(mnx_id)
    kegg_ids = mnx2kegg.get(mnx_id)
    met = None
    if meta_ids is not None:
        # check which id is in model
        for item in meta_ids:
            try: 
                met = halomodel.metabolites.get_by_id(item+'_{0}'.format(comp))
                break
            except: 
                met = cobra.Metabolite(item+'_{0}'.format(comp))
                met.compartment = comp
                
    elif kegg_ids is not None:
        # check which id is in model
        for item in kegg_ids:
            try: 
                met = halomodel.metabolites.get_by_id(item+'_{0}'.format(comp))
                break
            except: 
                met = cobra.Metabolite(item+'_{0}'.format(comp))
                met.compartment = comp
    else:
        item = bigg_id+'_{0}'.format(comp)
        try: met = halomodel.metabolites.get_by_id(item)
        except: 
            met = cobra.Metabolite(item)
            met.compartment = comp
    return met 

In [5]:
def add_general_rxn(sheet_name,gen_met_name,gen_rxn_name,halomodel):
    # add reaction for protein synthesis. 20 aas -> protein
    book = xlrd.open_workbook('../Results/core_biomass_medium_summary_iML1515.xlsx')
    sh = book.sheet_by_name(sheet_name)

    mnx2meta,mnx2kegg = load_MNX('../../../ComplementaryData/chem_xref.tsv')

    coeffs = dict()
    for i in range(sh.nrows-1):
        i += 1
        comp = sh.cell(i,0).value.split('_')[-1]
        mnx_id = sh.cell(i,9).value
        bigg_id = sh.cell(i,4).value
        met = find_halo_met_c(bigg_id,mnx_id,mnx2meta,mnx2kegg,halomodel,comp)
        coeffs[met] = sh.cell(i,3).value
    met = cobra.Metabolite('{}_c'.format(gen_met_name))
    met.name = gen_met_name
    met.compartment = 'c'
    coeffs[met] = 1

    rxn = cobra.Reaction(gen_rxn_name)
    rxn.add_metabolites(coeffs)
    halomodel.add_reaction(rxn)
    print(rxn.reaction)
    test_if_model_can_produce(met,halomodel)

In [6]:
def add_general_components(model):
    add_general_rxn('protein','protein','Protein_synthesis',model)
    add_general_rxn('dna','dna','DNA_synthesis',model)
    add_general_rxn('rna','rna','RNA_synthesis',model)
    add_general_rxn('ions','ions','ions_pool',model)
    add_general_rxn('lipids','lipids','lipids_synthesis',model)

In [7]:
def add_biomass_eq(model,eco):
    # model, our recon
    # eco, ecoli model where we borrow biomass eq from
    
    mnx2meta_met, mnx2kegg_met = load_MNX('../../../ComplementaryData/chem_xref.tsv')
    mnx2meta_rxn, mnx2kegg_rxn = load_MNX('../../../ComplementaryData/reac_xref.tsv')
    
    meta_raven_ids_rxn, kegg_raven_ids_rxn, meta_raven_ids_met, kegg_raven_ids_met = load_met_rxn_ids_in_raven_model(model)
    
    
    biomass_rxn = cobra.Reaction('Biomass_v1')
    coeffs = {model.metabolites.protein_c:-1,
              model.metabolites.dna_c:-1,
              model.metabolites.rna_c:-1,
              model.metabolites.ions_c:-1,
              model.metabolites.lipids_c:-1}
    # Others
    book = xlrd.open_workbook('../Results/core_biomass_medium_summary_iML1515.xlsx')
    sh = book.sheet_by_name('others')
    for i in range(sh.nrows-1):
        i += 1
        eco_met = eco.metabolites.get_by_id(sh.cell(i,0).value)
        met = convert_met_id(eco_met,mnx2meta_met, mnx2kegg_met,meta_raven_ids_met, kegg_raven_ids_met)
        
        coeff = sh.cell(i,3).value
        coeffs[met] = coeff
    biomass_rxn.add_metabolites(coeffs)
    print(biomass_rxn.reaction)
    model.add_reactions([biomass_rxn])

In [8]:
def run_pipeline(infile,outfile,eco):
    model = pickle.load(open(infile,'rb'))
    model = add_medium_rxns(model)
    model = add_secretion_rxns(model)
    add_general_components(model)
    add_biomass_eq(model,eco)
    
    save_pickle_model(model,outfile)

In [9]:
eco = load_ecoli_model('../../../ComplementaryData/iML1515.mat',
                       '../../../ComplementaryData/iML1515.xml')

Using license file /Users/gangl/gurobi.lic
Academic license - for non-commercial use only


In [10]:
run_pipeline('../Results/halo_metacycmodel_100_curated_compart_with_eco.pkl',
            '../Results/halo_metacycmodel_100_curated_compart_with_eco_medium_biomass.pkl',
            eco)

Read LP format model from file /var/folders/dx/ghtq02dx2w307xx_5hncb421qny3_w/T/tmppa4t5qyn.lp
Reading time = 0.02 seconds
: 2343 rows, 4424 columns, 21454 nonzeros
Metacyc 16868
KEGG 37608
Exchange_NA+  <=> NA+_e
ExchangeEP_NA+ NA+_e <=> NA+_p
Exchange_CL-  <=> CL-_e
ExchangeEP_CL- CL-_e <=> CL-_p
Exchange_Glucopyranose  <=> Glucopyranose_e
ExchangeEP_Glucopyranose Glucopyranose_e <=> Glucopyranose_p
Exchange_UREA  <=> UREA_e
ExchangeEP_UREA UREA_e <=> UREA_p
Exchange_MG+2  <=> MG+2_e
ExchangeEP_MG+2 MG+2_e <=> MG+2_p
Exchange_HSO4  <=> HSO4_e
ExchangeEP_HSO4 HSO4_e <=> HSO4_p
Exchange_CPD-16459  <=> CPD-16459_e
ExchangeEP_CPD-16459 CPD-16459_e <=> CPD-16459_p
Exchange_FE+3  <=> FE+3_e
ExchangeEP_FE+3 FE+3_e <=> FE+3_p
Exchange_AMMONIA  <=> AMMONIA_e
ExchangeEP_AMMONIA AMMONIA_e <=> AMMONIA_p
Exchange_CIT  <=> CIT_e
ExchangeEP_CIT CIT_e <=> CIT_p
Exchange_FE+2  <=> FE+2_e
ExchangeEP_FE+2 FE+2_e <=> FE+2_p
Exchange_CA+2  <=> CA+2_e
ExchangeEP_CA+2 CA+2_e <=> CA+2_p
Exchange_ZN+2  <=> Z

In [11]:
run_pipeline('../Results/halo_metacyc_kegg_curated_compart_with_eco.pkl',
             '../Results/halo_metacyc_kegg_curated_compart_with_eco_medium_biomass.pkl',
            eco)

Read LP format model from file /var/folders/dx/ghtq02dx2w307xx_5hncb421qny3_w/T/tmpstoytb8u.lp
Reading time = 0.02 seconds
: 3254 rows, 6120 columns, 29042 nonzeros
Metacyc 16868
KEGG 37608
Exchange_NA+  <=> NA+_e
ExchangeEP_NA+ NA+_e <=> NA+_p
Exchange_CL-  <=> CL-_e
ExchangeEP_CL- CL-_e <=> CL-_p
Exchange_Glucopyranose  <=> Glucopyranose_e
ExchangeEP_Glucopyranose Glucopyranose_e <=> Glucopyranose_p
Exchange_UREA  <=> UREA_e
ExchangeEP_UREA UREA_e <=> UREA_p
Exchange_MG+2  <=> MG+2_e
ExchangeEP_MG+2 MG+2_e <=> MG+2_p
Exchange_HSO4  <=> HSO4_e
ExchangeEP_HSO4 HSO4_e <=> HSO4_p
Exchange_CPD-16459  <=> CPD-16459_e
ExchangeEP_CPD-16459 CPD-16459_e <=> CPD-16459_p
Exchange_FE+3  <=> FE+3_e
ExchangeEP_FE+3 FE+3_e <=> FE+3_p
Exchange_AMMONIA  <=> AMMONIA_e
ExchangeEP_AMMONIA AMMONIA_e <=> AMMONIA_p
Exchange_CIT  <=> CIT_e
ExchangeEP_CIT CIT_e <=> CIT_p
Exchange_FE+2  <=> FE+2_e
ExchangeEP_FE+2 FE+2_e <=> FE+2_p
Exchange_CA+2  <=> CA+2_e
ExchangeEP_CA+2 CA+2_e <=> CA+2_p
Exchange_ZN+2  <=> Z

In [12]:
run_pipeline('../Results/halo_metacyc_kegg_curated_compart_with_eco_without_missing.pkl',
             '../Results/halo_metacyc_kegg_curated_compart_with_eco_without_missing_medium_biomass.pkl',
            eco)

Read LP format model from file /var/folders/dx/ghtq02dx2w307xx_5hncb421qny3_w/T/tmpqzv_q9np.lp
Reading time = 0.03 seconds
: 3194 rows, 5976 columns, 28282 nonzeros
Metacyc 16868
KEGG 37608
Exchange_NA+  <=> NA+_e
ExchangeEP_NA+ NA+_e <=> NA+_p
Exchange_CL-  <=> CL-_e
ExchangeEP_CL- CL-_e <=> CL-_p
Exchange_Glucopyranose  <=> Glucopyranose_e
ExchangeEP_Glucopyranose Glucopyranose_e <=> Glucopyranose_p
Exchange_UREA  <=> UREA_e
ExchangeEP_UREA UREA_e <=> UREA_p
Exchange_MG+2  <=> MG+2_e
ExchangeEP_MG+2 MG+2_e <=> MG+2_p
Exchange_HSO4  <=> HSO4_e
ExchangeEP_HSO4 HSO4_e <=> HSO4_p
Exchange_CPD-16459  <=> CPD-16459_e
ExchangeEP_CPD-16459 CPD-16459_e <=> CPD-16459_p
Exchange_FE+3  <=> FE+3_e
ExchangeEP_FE+3 FE+3_e <=> FE+3_p
Exchange_AMMONIA  <=> AMMONIA_e
ExchangeEP_AMMONIA AMMONIA_e <=> AMMONIA_p
Exchange_CIT  <=> CIT_e
ExchangeEP_CIT CIT_e <=> CIT_p
Exchange_FE+2  <=> FE+2_e
ExchangeEP_FE+2 FE+2_e <=> FE+2_p
Exchange_CA+2  <=> CA+2_e
ExchangeEP_CA+2 CA+2_e <=> CA+2_p
Exchange_ZN+2  <=> Z

In [13]:
run_pipeline('../Results/halo_metacycmodel_100_curated_compart_with_eco_without_missing.pkl',
            '../Results/halo_metacycmodel_100_curated_compart_with_eco_without_missing_medium_biomass.pkl',
            eco)

Read LP format model from file /var/folders/dx/ghtq02dx2w307xx_5hncb421qny3_w/T/tmpt024hjbf.lp
Reading time = 0.02 seconds
: 2279 rows, 4278 columns, 20682 nonzeros
Metacyc 16868
KEGG 37608
Exchange_NA+  <=> NA+_e
ExchangeEP_NA+ NA+_e <=> NA+_p
Exchange_CL-  <=> CL-_e
ExchangeEP_CL- CL-_e <=> CL-_p
Exchange_Glucopyranose  <=> Glucopyranose_e
ExchangeEP_Glucopyranose Glucopyranose_e <=> Glucopyranose_p
Exchange_UREA  <=> UREA_e
ExchangeEP_UREA UREA_e <=> UREA_p
Exchange_MG+2  <=> MG+2_e
ExchangeEP_MG+2 MG+2_e <=> MG+2_p
Exchange_HSO4  <=> HSO4_e
ExchangeEP_HSO4 HSO4_e <=> HSO4_p
Exchange_CPD-16459  <=> CPD-16459_e
ExchangeEP_CPD-16459 CPD-16459_e <=> CPD-16459_p
Exchange_FE+3  <=> FE+3_e
ExchangeEP_FE+3 FE+3_e <=> FE+3_p
Exchange_AMMONIA  <=> AMMONIA_e
ExchangeEP_AMMONIA AMMONIA_e <=> AMMONIA_p
Exchange_CIT  <=> CIT_e
ExchangeEP_CIT CIT_e <=> CIT_p
Exchange_FE+2  <=> FE+2_e
ExchangeEP_FE+2 FE+2_e <=> FE+2_p
Exchange_CA+2  <=> CA+2_e
ExchangeEP_CA+2 CA+2_e <=> CA+2_p
Exchange_ZN+2  <=> Z