In [1]:
from tools import *
import pickle

In [2]:
def is_subset(lst1,lst2):
    # check if elements in lst1 are all in lst2
    is_ = True
    for item in lst1:
        if item not in lst2: is_ = False
    return is_

In [3]:
def combine(eco_model,reven_model):
    # eco_model, ecoli based reconstruction.
    # raven_model, meatacyc kegg based recon
    # 
    # add reactions in metacyc draft model to halo_iJO_meta model
    
    ## Step 1: convert reactions ids and metabolites ids
    eco_model = convert_eco_to_metacyc_kegg(eco_model,reven_model)
    
    ## Step 2: combine
    combined_rxns = dict()
    for rxn in eco_model.reactions: combined_rxns[rxn.id] = rxn
        
    for rxn in reven_model.reactions:
        eco_rxn = combined_rxns.get(rxn.id)
        if eco_rxn is None: combined_rxns[rxn.id] = rxn
        else: 
            # if the rxn exist in both models, combine their gr rule 
            gr_eco = eco_rxn.gene_reaction_rule
            gr_meta = rxn.gene_reaction_rule
            if gr_eco == gr_meta: continue

            ## case 1: if 'and' not in both gr rules. Combine all genes with 'or'.
            if ' and ' not in gr_eco and ' and ' not in gr_meta:
                genes = set(parse_gr(gr_eco)+parse_gr(gr_meta))
                new_gr = ''
                for gene in genes: new_gr += gene + ' or '
                new_gr = new_gr[:-4]

                eco_rxn.gene_reaction_rule = new_gr

            ## case 2: if two grs only differ in 'and' 'or' while with the same genes. use the one from eco li
            elif parse_gr(gr_eco) == parse_gr(gr_meta): pass

            ## case 3: if genes in metacyc model is a subset of eco model, use the one from eco model
            elif is_subset(parse_gr(gr_meta),parse_gr(gr_eco)): pass
            else: 
                print(gr_eco)
                print(gr_meta)
                #print(new_gr)
                print('')
    print('For above reactions, use the one from ecoli at the moment.')



    combined = cobra.Model('combined_metacyc_iJO1366')
    combined.add_reactions(combined_rxns.values())
    return combined

In [4]:
def run_pipeline(ecofile,ravenfile,outfile):
    eco_model   = cobra.io.load_json_model(ecofile)
    raven_model = pickle.load(open(ravenfile,'rb'))
    report_model_status(eco_model)
    report_model_status(raven_model)
    combined_eco_metacyc = combine(eco_model,raven_model)
    report_model_status(combined_eco_metacyc)
    save_pickle_model(combined_eco_metacyc,outfile)

In [5]:
run_pipeline('../Results/halo_iML1515_template_with_missing_genes.json',
             '../Results/halo_metacycmodel_100_curated_compart.pkl',
             '../Results/halo_metacycmodel_100_curated_compart_with_eco.pkl')

Using license file /Users/gangl/gurobi.lic
Academic license - for non-commercial use only
Read LP format model from file /var/folders/dx/ghtq02dx2w307xx_5hncb421qny3_w/T/tmpv291bmyp.lp
Reading time = 0.01 seconds
: 1679 rows, 2644 columns, 12744 nonzeros
Number of reactions: 1290
Number of metabolits: 1242
Number of compartments: 3 {'c': '', 'p': '', 'e': ''}
Number of genes: 859
Number of missing genes: 73
Number of reactions with missing genes: 75

Number of reactions: 1322
Number of metabolits: 1679
Number of compartments: 3 {'c': '', 'p': '', 'e': ''}
Number of genes: 1253
Number of missing genes: 0
Number of reactions with missing genes: 0

Metacyc 16868
KEGG 37608
Metacyc 14768
KEGG 10278
TD01GL002722 and TD01GL002723
TD01GL001634 or TD01GL001635

TD01GL001227 and TD01GL002214
TD01GL001226 or TD01GL001227 or TD01GL002214 or TD01GL002215

TD01GL002722 and TD01GL002723
TD01GL001635 or TD01GL002722

TD01GL003202 and TD01GL003203
TD01GL003202 or TD01GL003203 or TD01GL003289

TD01GL00

In [6]:
run_pipeline('../Results/halo_iML1515_template_with_missing_genes.json',
             '../Results/halo_metacyc_kegg_curated_compart.pkl',
             '../Results/halo_metacyc_kegg_curated_compart_with_eco.pkl')

Read LP format model from file /var/folders/dx/ghtq02dx2w307xx_5hncb421qny3_w/T/tmpcxehkadc.lp
Reading time = 0.02 seconds
: 2654 rows, 4480 columns, 20978 nonzeros
Number of reactions: 1290
Number of metabolits: 1242
Number of compartments: 3 {'c': '', 'p': '', 'e': ''}
Number of genes: 859
Number of missing genes: 73
Number of reactions with missing genes: 75

Number of reactions: 2240
Number of metabolits: 2654
Number of compartments: 3 {'c': '', 'p': '', 'e': ''}
Number of genes: 1327
Number of missing genes: 0
Number of reactions with missing genes: 0

Metacyc 16868
KEGG 37608
Metacyc 14768
KEGG 10278
TD01GL002722 and TD01GL002723
TD01GL001634 or TD01GL001635

TD01GL001227 and TD01GL002214
TD01GL001226 or TD01GL001227 or TD01GL002214 or TD01GL002215

TD01GL002722 and TD01GL002723
TD01GL001635 or TD01GL002722

TD01GL003202 and TD01GL003203
TD01GL003202 or TD01GL003203 or TD01GL003289

TD01GL003202 and TD01GL003203
TD01GL003202 or TD01GL003203 or TD01GL003289

TD01GL002722 and TD01G

In [7]:
run_pipeline('../Results/halo_iML1515_template_without_missing_genes.json',
             '../Results/halo_metacycmodel_100_curated_compart.pkl',
             '../Results/halo_metacycmodel_100_curated_compart_with_eco_without_missing.pkl')

Read LP format model from file /var/folders/dx/ghtq02dx2w307xx_5hncb421qny3_w/T/tmpoffs20km.lp
Reading time = 0.01 seconds
: 1679 rows, 2644 columns, 12744 nonzeros
Number of reactions: 1215
Number of metabolits: 1173
Number of compartments: 3 {'c': '', 'p': '', 'e': ''}
Number of genes: 751
Number of missing genes: 0
Number of reactions with missing genes: 0

Number of reactions: 1322
Number of metabolits: 1679
Number of compartments: 3 {'c': '', 'p': '', 'e': ''}
Number of genes: 1253
Number of missing genes: 0
Number of reactions with missing genes: 0

Metacyc 16868
KEGG 37608
Metacyc 14768
KEGG 10278
TD01GL002722 and TD01GL002723
TD01GL001634 or TD01GL001635

TD01GL001227 and TD01GL002214
TD01GL001226 or TD01GL001227 or TD01GL002214 or TD01GL002215

TD01GL002722 and TD01GL002723
TD01GL001635 or TD01GL002722

TD01GL003202 and TD01GL003203
TD01GL003202 or TD01GL003203 or TD01GL003289

TD01GL003202 and TD01GL003203
TD01GL003202 or TD01GL003203 or TD01GL003289

TD01GL002722 and TD01GL0

In [8]:
run_pipeline('../Results/halo_iML1515_template_without_missing_genes.json',
             '../Results/halo_metacyc_kegg_curated_compart.pkl',
             '../Results/halo_metacyc_kegg_curated_compart_with_eco_without_missing.pkl')

Read LP format model from file /var/folders/dx/ghtq02dx2w307xx_5hncb421qny3_w/T/tmp51lu4hdh.lp
Reading time = 0.02 seconds
: 2654 rows, 4480 columns, 20978 nonzeros
Number of reactions: 1215
Number of metabolits: 1173
Number of compartments: 3 {'c': '', 'p': '', 'e': ''}
Number of genes: 751
Number of missing genes: 0
Number of reactions with missing genes: 0

Number of reactions: 2240
Number of metabolits: 2654
Number of compartments: 3 {'c': '', 'p': '', 'e': ''}
Number of genes: 1327
Number of missing genes: 0
Number of reactions with missing genes: 0

Metacyc 16868
KEGG 37608
Metacyc 14768
KEGG 10278
TD01GL002722 and TD01GL002723
TD01GL001634 or TD01GL001635

TD01GL001227 and TD01GL002214
TD01GL001226 or TD01GL001227 or TD01GL002214 or TD01GL002215

TD01GL002722 and TD01GL002723
TD01GL001635 or TD01GL002722

TD01GL003202 and TD01GL003203
TD01GL003202 or TD01GL003203 or TD01GL003289

TD01GL003202 and TD01GL003203
TD01GL003202 or TD01GL003203 or TD01GL003289

TD01GL002722 and TD01GL0