In [1]:
import cobra
import pickle

In [19]:
def create_universal_met(eco_met,halomodel,universal,mnx2meta):
    is_in_halo = dict()
    for met in halomodel.metabolites: is_in_halo[met.id] = True
    mnx_id = eco_met.annotation.get('metanetx.chemical')
    
    comp = eco_met.compartment
    if comp == 'p': comp = 'e'
    met_id = None
    
    if mnx_id is None or mnx2meta.get(mnx_id,None) is None: 
        # if there is no mnx id or no metacyc id, use bigg id instead
        met_id = eco_met.annotation['bigg.metabolite'] + '_' + comp
    else:
        meta_ids = mnx2meta[mnx_id]
        for item in meta_ids:
            if is_in_halo.get(item+ '_' + comp,False): 
                met_id = item+ '_' + comp
                break
        if met_id is None: met_id = meta_ids[0] + '_' + comp
    try: 
        new_met = halomodel.metabolites.get_by_id(met_id)
    except:
        try:
            new_met = universal.metabolites.get_by_id(met_id)
        except:
            new_met = eco_met.copy()
            new_met.id = met_id
            new_met.compartment = comp
    return new_met

In [20]:
def update_rxnid(new_rxn,halomodel,universal):
    # metacyc reaction ids in halomodel:
    halo_rxn_ids = dict()
    halo_rxn_mets = dict()
    mets_rxn = dict()
    for rxn in halomodel.reactions: 
        halo_rxn_ids[rxn.id] = True
        mets = [met.id for met in rxn.metabolites]
        mets.sort()
        mets = tuple(mets)
        halo_rxn_mets[mets] = True
        mets_rxn[mets] = rxn
        
    # 1. check metacyc id
    meta_ids = new_rxn.annotation.get('biocyc')
    new_rxn_id = new_rxn.id
    # case 1: there is metacyc ids, map metacyc id
    if meta_ids is not None:
        if type(meta_ids) == str: meta_ids = [meta_ids]
        for meta_id in meta_ids:
            meta_id = meta_id.replace('META:','')
            if halo_rxn_ids.get(meta_id,False): new_rxn_id = meta_id
    
    # case 2: if case 1 failed, check reaction equations
    else:
        new_mets = [met.id for met in new_rxn.metabolites]
        new_mets.sort()
        new_mets = tuple(new_mets)
        if halo_rxn_mets.get(new_mets,False): 
            # the two reactions contain the same metabolites. Now compare the equations, including direction
            eq1 = new_rxn.reaction
            eq2 = mets_rxn[new_mets].reaction
            eq1 = eq1.split()
            eq2 = eq2.split()
            
            eq1.sort()
            eq2.sort()
            if eq1 == eq2:
                # print(new_rxn.id,new_rxn.reaction)
                # print(mets_rxn[new_mets].id,mets_rxn[new_mets].reaction)
                # print('')
                new_rxn_id = mets_rxn[new_mets].id
            
    new_rxn.id = new_rxn_id
    
    # if new_rxn_id is in universal model, use the reactions in univeral model
    try: 
        rxn_universal = universal.reactions.get_by_id(new_rxn_id)
        gr_rule = new_rxn.gene_reaction_rule 
        new_rxn = rxn_universal.copy()
        new_rxn.gene_reaction_rule = gr_rule
    except: None
    
    
    return new_rxn

In [21]:
def create_universal_rxn(eco_rxn,halomodel,universal,mnx2meta):
    coeffs = dict()
    is_transport = False
    
    # convert metabolite ids
    mets = list()
    for eco_met in eco_rxn.metabolites:
        new_met = create_universal_met(eco_met,halomodel,universal,mnx2meta)
        if new_met.id in mets:
            is_transport = True

        mets.append(new_met.id)
        coeffs[new_met] = coeffs.get(new_met,0) + eco_rxn.get_coefficient(eco_met)

    if is_transport and 'BIOMASS' not in eco_rxn.id: return None
    else:
        # convert reaction ids
        new_rxn = eco_rxn.copy()
        new_rxn.subtract_metabolites(new_rxn.metabolites)
        new_rxn.add_metabolites(coeffs)
        
        new_rxn = update_rxnid(new_rxn,halomodel,universal)
        return new_rxn

In [22]:
def is_transport_in_universal(rxn):
    is_transport = False
    mets = dict()
    for met in rxn.metabolites: 
        comp = met.compartment
        met_id = met.id.replace('_{0}'.format(comp),'')
        if mets.get(met_id) is not None: is_transport = True
        else: mets[met_id] = 0
    if is_transport: print(rxn.reaction)
    return is_transport

In [23]:
def remove_transport_rxn_from_model_on_template(model):
    rxns = list()
    
    
    for rxn in model.reactions:
        is_transport = is_transport_in_universal(rxn)
        if not is_transport: rxns.append(rxn)
    
    new_model = cobra.Model('model_from_template_without_transport')
    new_model.add_reactions(rxns)
    print('{0} transport reactions were removed'.format(len(model.reactions)-len(rxns)))
    return new_model

In [24]:
eco = pickle.load(open('../Results/eco.pkl','rb'))

Read LP format model from file /var/folders/dx/ghtq02dx2w307xx_5hncb421qny3_w/T/tmpy7001q7_.lp
Reading time = 0.02 seconds
: 1877 rows, 5424 columns, 21150 nonzeros


In [25]:
halo_meta = pickle.load(open('../Results/halo_meta.pkl','rb'))

Read LP format model from file /var/folders/dx/ghtq02dx2w307xx_5hncb421qny3_w/T/tmp76fxk72u.lp
Reading time = 0.02 seconds
: 1677 rows, 2608 columns, 12534 nonzeros


In [26]:
universal = cobra.Model('universal')
mnx2meta = dict()
for line in open('../../../ComplementaryData/MNX2metacyc.tsv'):
    cont = line.split()
    mnx2meta[cont[0]] = mnx2meta.get(cont[0],[]) + [cont[1]]

univeral_rxns = dict()
for eco_rxn in eco.reactions:
    
    new_rxn = create_universal_rxn(eco_rxn,halo_meta,universal,mnx2meta)
    if new_rxn is None: 
        print(eco_rxn.id)
        print(eco_rxn.reaction)
        continue
    
    
    univeral_rxns[new_rxn.id] = new_rxn
universal.add_reactions(univeral_rxns.values())

GLUtex
glu__L_e <=> glu__L_p
GLYtex
gly_e <=> gly_p
GLYALDtex
glyald_e <=> glyald_p
GLYBtex
glyb_e <=> glyb_p
GLYCtex
glyc_e <=> glyc_p
GLYC3Ptex
glyc3p_e <=> glyc3p_p
GLYCLTtex
glyclt_e <=> glyclt_p
GTPtex
gtp_e <=> gtp_p
GUAtex
gua_e <=> gua_p
Htex
h_e <=> h_p
H2tex
h2_e <=> h2_p
H2Otex
h2o_e <=> h2o_p
DAPtex
15dap_e <=> 15dap_p
12PPDStex
12ppd__S_e <=> 12ppd__S_p
DDGLCNtex
2ddglcn_e <=> 2ddglcn_p
26DAHtex
26dap__M_e <=> 26dap__M_p
HCINNMtex
3hcinnm_e <=> 3hcinnm_p
HPPPNtex
3hpppn_e <=> 3hpppn_p
ACtex
ac_e <=> ac_p
ACALDtex
acald_e <=> acald_p
ACMANAtex
acmana_e <=> acmana_p
ACMUMtex
acmum_e --> acmum_p
GALtex
gal_e <=> gal_p
GALCTtex
galct__D_e <=> galct__D_p
GALCTNtex
galctn__D_e <=> galctn__D_p
GALTtex
galt_e <=> galt_p
GALURtex
galur_e <=> galur_p
GAMtex
gam_e <=> gam_p
GBBTNtex
gbbtn_e <=> gbbtn_p
HIStex
his__L_e <=> his__L_p
HXAtex
hxa_e <=> hxa_p
HYXNtex
hxan_e <=> hxan_p
IDONtex
idon__L_e <=> idon__L_p
ILEtex
ile__L_e <=> ile__L_p
INSTtex
inost_e <=> inost_p
INStex
ins_e <=> 

LIPAabctex
atp_c + h2o_c + lipa_p --> adp_c + h_c + lipa_e + pi_c
CMtpp
cm_p + h_p --> cm_e + h_c
FUSAtpp
fusa_p + h_p --> fusa_e + h_c
RFAMPtex
rfamp_e <=> rfamp_p
MINCYCtex
mincyc_e <=> mincyc_p
MINCYCtpp
h_p + mincyc_p --> h_c + mincyc_e
DOXRBCNtex
doxrbcn_e <=> doxrbcn_p
DOXRBCNtpp
doxrbcn_p + h_p --> doxrbcn_e + h_c
NOVBCNtpp
h_p + novbcn_p --> h_c + novbcn_e
CMtex
cm_e <=> cm_p
LALALGLUtex
LalaLglu_e <=> LalaLglu_p
5MTRtex
5mtr_e <=> 5mtr_p
LALADGLUtex
LalaDglu_e <=> LalaDglu_p
3HPPtex
3hpp_e <=> 3hpp_p
FUSAtex
fusa_e <=> fusa_p
NOVBCNtex
novbcn_e <=> novbcn_p
QUIN2tex
quin_e <=> quin_p
O16A4COLIPAabctex
atp_c + h2o_c + o16a4colipa_p --> adp_c + h_c + o16a4colipa_e + pi_c
COLIPAabctex
atp_c + colipa_p + h2o_c --> adp_c + colipa_e + h_c + pi_c
ENLIPAabctex
atp_c + enlipa_p + h2o_c --> adp_c + enlipa_e + h_c + pi_c
CLIPAabctex
atp_c + h2o_c + lipa_cold_p --> adp_c + h_c + lipa_cold_e + pi_c
K2L4Aabctex
atp_c + h2o_c + kdo2lipid4_p --> adp_c + h_c + kdo2lipid4_e + pi_c
SELtex
sel_e 

In [27]:
universal.objective = 'ATPASE-RXN'
universal.objective_direction = 'max'
#eco.objective = 'BIOMASS_Ec_iML1515_core_75p37M'

# check why? make sure growth rate is higher after removal of compartment p. 
s = universal.optimize()
print(s)
#print(eco.optimize())

<Solution 235.000 at 0x131a01d7f0>


In [28]:
for rxn in universal.reactions:
    if s[rxn.id] !=0: 
        print(rxn.id,s[rxn.id])
        print(rxn.reaction)
        print()

EX_h_e -1.7053025658242404e-12
PROTON_e <=> 

CITSYN-RXN 20.000000000000114
ACETYL-COA_c + OXALACETIC_ACID_c + WATER_c --> CIT_c + CO-A_c + PROTON_c

ICDHyr 20.000000000000114
NADP_c + icit_c <=> 2-KETOGLUTARATE_c + CARBON-DIOXIDE_c + NADPH_c

2PGADEHYDRAT-RXN 20.0
2-PG_c <=> PHOSPHO-ENOL-PYRUVATE_c + WATER_c

PGLUCISOM-RXN 10.0
D-glucopyranose-6-phosphate_c <=> FRUCTOSE-6P_c

PHOSGLYPHOS-RXN -20.0
ATP_c + G3P_c <=> ADP_c + DPG_c

EX_co2_e 60.00000000000034
CARBON-DIOXIDE_e <=> 

FRD2 100.00000000000045
FUM_c + REDUCED-MENAQUINONE_c --> CPD-9728_c + SUC_c

EX_glc__D_e -10.0
Glucopyranose_e <=> 

MALATE-DEH-RXN 20.000000000000114
MAL_c + NAD_c <=> NADH_c + OXALACETIC_ACID_c + PROTON_c

FUMHYDR-RXN 20.000000000000114
FUM_c + WATER_c <=> MAL_c

PYRNUTRANSHYDROGEN-RXN 20.000000000000114
NADPH_c + NAD_c --> NADH_c + NADP_c

DHAPT 10.0
DIHYDROXYACETONE_c + PHOSPHO-ENOL-PYRUVATE_c --> DIHYDROXY-ACETONE-PHOSPHATE_c + PYRUVATE_c

F6PA 10.0
FRUCTOSE-6P_c <=> DIHYDROXYACETONE_c + GAP_c

2OXOGLUTA

In [70]:
universal.objective = 'BIOMASS_Ec_iML1515_core_75p37M'
#eco.objective = 'BIOMASS_Ec_iML1515_core_75p37M'

# check why? make sure growth rate is higher after removal of compartment p. 
print(universal.optimize())
#print(eco.optimize())

<Solution 1.384 at 0x1328d17ac8>


In [36]:
for rxn in model.metabolites.ATP_c.reactions:
    if rxn.lower_bound :
        print(rxn.id)
        print(rxn.reaction,rxn.lower_bound)
    

SERASr
ATP_c + PROTON_c + SER_c <=> PPI_c + SERYL-AMP_c -1000.0
BTUR2-RXN
ATP_c + COBINAMIDE_c + PROTON_c <=> ADENOSYLCOBINAMIDE_c + P3I_c -1000.0
RXN-12002
ATP_c + UMP_c <=> ADP_c + UDP_c -1000.0
UDPKIN-RXN
ATP_c + UDP_c <=> ADP_c + UTP_c -1000.0
ADENYL-KIN-RXN
AMP_c + ATP_c <=> 2.0 ADP_c -1000.0
PHOSGLYPHOS-RXN
ATP_c + G3P_c <=> ADP_c + DPG_c -1000.0
CBLAT
ATP_c + COB-I-ALAMIN_c + PROTON_c <=> ADENOSYLCOBALAMIN_c + P3I_c -1000.0
CDPKIN-RXN
ATP_c + CDP_c <=> ADP_c + CTP_c -1000.0
PRPPS
ATP_c + CPD-15318_c <=> AMP_c + PROTON_c + PRPP_c -1000.0
NICONUCADENYLYLTRAN-RXN
ATP_c + NICOTINATE_NUCLEOTIDE_c + PROTON_c <=> DEAMIDO-NAD_c + PPI_c -1000.0
DTDPKIN-RXN
ATP_c + TDP_c <=> ADP_c + TTP_c -1000.0
ACETATEKIN-RXN
ACET_c + ATP_c <=> ACETYL-P_c + ADP_c -1000.0
GLYRIBONUCSYN-RXN
5-P-BETA-D-RIBOSYL-AMINE_c + ATP_c + GLY_c <=> 5-PHOSPHO-RIBOSYL-GLYCINEAMIDE_c + ADP_c + PROTON_c + Pi_c -1000.0
DUDPKIN-RXN
ATP_c + DUDP_c <=> ADP_c + DUTP_c -1000.0
GMKALT-RXN
ATP_c + DGMP_c <=> ADP_c + DGDP_c -1000

In [37]:
eco.reactions.ATPM

0,1
Reaction identifier,ATPM
Name,ATP maintenance requirement
Memory address,0x0132ecc8358
Stoichiometry,atp_c + h2o_c --> adp_c + h_c + pi_c  ATP C10H12N5O13P3 + H2O H2O --> ADP C10H12N5O10P2 + H+ + Phosphate
GPR,
Lower bound,6.86
Upper bound,1000.0


In [48]:
model = cobra.io.load_json_model('../../../ModelFiles/json/Halo_GEM_v1.2.json')

In [54]:
for rxn in model.reactions:
    if rxn.id.startswith('GLC'): 
        print(rxn.id)
        print(rxn.reaction,rxn.lower_bound,rxn.upper_bound)
        print()

GLCRAL
5-KETO-4-DEOXY-D-GLUCARATE_c --> PYRUVATE_c + TARTRONATE-S-ALD_c 0.0 1000.0

GLCptspp
Glucopyranose_c + PHOSPHO-ENOL-PYRUVATE_c --> D-glucopyranose-6-phosphate_c + PYRUVATE_c 0.0 1000.0

GLCDpp
Glucopyranose_c + UBIQUINONE-8_c + WATER_c --> CPD-9956_c + GLUCONATE_c + PROTON_c 0.0 1000.0

GLCTR1
CPD-12575_c + CPD0-2295_c --> CPD0-2247_c + PROTON_c + UDP_c 0.0 1000.0

GLCtex_copy2
Glucopyranose_e <=> Glucopyranose_c -1000.0 1000.0

GLCtex_copy1
Glucopyranose_e --> Glucopyranose_c 0.0 1000.0



In [57]:
for rxn in eco.reactions:
    if rxn.id.startswith('GLCtex'): 
        print(rxn.id)
        print(rxn.reaction,rxn.lower_bound,rxn.upper_bound,rxn.gene_reaction_rule)
        print()

GLCtex_copy1
glc__D_e --> glc__D_p 0.0 1000.0 b4036

GLCtex_copy2
glc__D_e <=> glc__D_p -1000.0 1000.0 b0241 or b0929 or b1377 or b2215



In [46]:
for rxn in eco.reactions:
    e, p = False, False
    for met in rxn.metabolites:
        if met.id.endswith('_e'): e = True
        if met.id.endswith('_p'): p = True
    if e and p:
        print(rxn.id)
        print(rxn.reaction,rxn.gene_reaction_rule)
        print()

GLUtex
glu__L_e <=> glu__L_p b1377 or b0241 or b0929 or b2215

GLYtex
gly_e <=> gly_p b1377 or b0241 or b0929 or b2215

GLYALDtex
glyald_e <=> glyald_p b1377 or b0241 or b0929 or b2215

GLYBtex
glyb_e <=> glyb_p b1377 or b0241 or b0929 or b2215

GLYCtex
glyc_e <=> glyc_p b1377 or b0241 or b0929 or b2215

GLYC3Ptex
glyc3p_e <=> glyc3p_p b1377 or b0241 or b0929 or b2215

GLYCLTtex
glyclt_e <=> glyclt_p b1377 or b0241 or b0929 or b2215

GTPtex
gtp_e <=> gtp_p b0241 or b1377 or b2215 or b0929

GUAtex
gua_e <=> gua_p b0411

Htex
h_e <=> h_p b1377 or b0241 or b0929 or b2215

H2tex
h2_e <=> h2_p b1377 or b0241 or b0929 or b2215

H2Otex
h2o_e <=> h2o_p b1377 or b2215 or b1319 or b3875 or s0001 or b0957 or b0241 or b0929

DAPtex
15dap_e <=> 15dap_p b1377 or b0241 or b0929 or b2215

12PPDStex
12ppd__S_e <=> 12ppd__S_p b1377 or b0241 or b0929 or b2215

DDGLCNtex
2ddglcn_e <=> 2ddglcn_p b1377 or b0241 or b0929 or b2215

26DAHtex
26dap__M_e <=> 26dap__M_p b1377 or b0241 or b0929 or b2215

HCINNMtex

In [47]:
for line in open('../Results/gap_filled_reactions_try.txt'):print(line)

>datp_c DATP C10H12N5O12P3 DATP_c

EX_pi_e	Phosphate exchange	Pi_e <=> 	

GLCtex_copy1		Glucopyranose_e --> Glucopyranose_c	b4036

EX_nh4_e	Ammonia exchange	AMMONIUM_e <=> 	

DM_PROTON_c	H+ demand_smiley	PROTON_c --> 	

DM_H2CO3_c	carbonic acid demand_smiley	H2CO3_c --> 	



>dctp_c DCTP C9H12N3O13P3 DCTP_c

EX_pi_e	Phosphate exchange	Pi_e <=> 	

UREAtex	Urea transport via diffusion (extracellular to periplasm)	UREA_e <=> UREA_c	b1377 or b0241 or b0929 or b2215

CITtex	Citrate transport via diffusion (extracellular to periplasm)	CIT_e <=> CIT_c	b1377 or b0241 or b0929 or b2215

GLCtex_copy2		Glucopyranose_e <=> Glucopyranose_c	b0241 or b0929 or b1377 or b2215

DM_FORMATE_c	formate demand_smiley	FORMATE_c --> 	



>dgtp_c DGTP C10H12N5O13P3 DGTP_c

EX_pi_e	Phosphate exchange	Pi_e <=> 	

GLCtex_copy1		Glucopyranose_e --> Glucopyranose_c	b4036

EX_nh4_e	Ammonia exchange	AMMONIUM_e <=> 	

DM_PROTON_c	H+ demand_smiley	PROTON_c --> 	

DM_H2CO3_c	carbonic acid demand_smiley	H2CO3_c --> 	



>