In [1]:
from tools import *
import pickle

##  Check all metabolites ends with -GLU-N. Re-associate these metabolites to non-GLU-N format
We identified that some metabolites are in the form of -GLU-N(poly-glutamine). After carefully check the MetaCyc website and cross-referenced to KEGG database, N in those metabolites should be 0. Then we replaced those ones in -GLU-N format with non-GLU-N format.

In [2]:
def curate_GLU_N(model,halo_kegg):
    # load all metacyc-kegg reaction pair
    dfrxnpair = pd.read_csv('../../../ComplementaryData/metacyc2keggrxns.tsv',sep='\t',index_col=0)
    # load metacyc-kegg metabolites pairs
    dfmetpair = pd.read_csv('../../../ComplementaryData/metaCycMets_KEGG_pair.tsv',sep='\t',index_col=1)
    print(dfmetpair.shape,dfmetpair.columns)
    print(dfrxnpair.shape,dfrxnpair.columns)
    # For each of metabolites that endswith '-GLU-N', find one reaction that are from metacyc model 
    # which has been associated with a kegg reaction
    for met in model.metabolites:
        if met.id.upper().endswith('-GLU-N'): 
            print(met.id)
            for rxn in met.reactions:
                try: 
                    kegg_id = dfrxnpair.loc[rxn.id,'kegg']
                    kegg_rxn = halo_kegg.reactions.get_by_id(kegg_id)
                    break
                except: continue
            print(rxn.reaction)
            print_keggrxn(kegg_rxn)
            print(kegg_rxn.reaction)
            print('')

    # Manually check above reactions, associate each of '*-GLU-N' metabolites to a kegg id 
    # based on metacyc-kegg reaction pairs
    GluNs = {
        '5-10-METHENYL-THF-GLU-N': 'C00445',
        '5-METHYL-THF-GLU-N': 'C00440',
        'DIHYDROFOLATE-GLU-N':'C00415',
        'FORMYL-THF-GLU-N':'C00234',
        'METHYLENE-THF-GLU-N': 'C00143',
        'N5-Formyl-THF-Glu-N':'C03479',
        'THF-GLU-N': 'C00101'
    }

    # Reassociate those GLU-N to non-GLU metabolites in metacyc
    for metg,kegg_id in GluNs.items():
        print(metg,kegg_id,dfmetpair.loc[kegg_id,'metacyc'])



    ## Replace the all metabolites that endswith -GLU-N with non-GLU-N format
    for met in model.metabolites:
        if met.id.upper().endswith('-GLU-N'):
            kegg_id = GluNs[met.id]
            new_id = dfmetpair.loc[kegg_id,'metacyc']
            model = update_rxn(model,met.id,new_id)

    # print all changed reactions to have a final manual check
    for metg,kegg_id in GluNs.items():
        new_id = dfmetpair.loc[kegg_id,'metacyc']
        met = model.metabolites.get_by_id(new_id)
        for rxn in met.reactions: print(rxn.reaction)

In [3]:
halo_kegg = cobra.io.load_matlab_model('../Results/halo_keggmodel_50.mat')
halo_meta = load_metacyc_mat('../Results/halo_metacycmodel_100.mat')
halo_mekg = load_metacyc_mat('../Results/halo_metacyc_kegg.mat')

Using license file /Users/gangl/gurobi.lic
Academic license - for non-commercial use only
Loaded fields: dict_keys(['rxns', 'rxnNames', 'eccodes', 'subSystems', 'rxnMiriams', 'rxnReferences', 'rxnConfidenceScores', 'mets', 'metNames', 'metFormulas', 'metCharges', 'inchis', 'metMiriams'])
Loaded fields: dict_keys(['rxns', 'rxnNames', 'eccodes', 'subSystems', 'rxnMiriams', 'rxnReferences', 'mets', 'metCharges', 'inchis', 'metMiriams', 'metNames', 'metFormulas', 'rxnConfidenceScores'])


#### Since load_metacyc_mat only works for pure metacyc based model, for combined_metacyc and kegg model, we need to correct annotation filed for rxn and met 

In [4]:
 curate_GLU_N(halo_meta,halo_kegg)

(5401, 1) Index(['metacyc'], dtype='object')
(5747, 1) Index(['kegg'], dtype='object')
5-10-METHENYL-THF-GLU-N
METHYLENE-THF-GLU-N + NADP <=> 5-10-METHENYL-THF-GLU-N + NADPH
KEGG: NADP+ + 5,10-Methylenetetrahydrofolate <=> NADPH + 5,10-Methenyltetrahydrofolate
C00006 + C00143 <=> C00005 + C00445

5-METHYL-THF-GLU-N
5-METHYL-THF-GLU-N + HOMO-CYS --> MET + THF-GLU-N
KEGG: L-Homocysteine + 5-Methyltetrahydrofolate <=> L-Methionine + Tetrahydrofolate
C00155 + C00440 <=> C00073 + C00101

DIHYDROFOLATE-GLU-N
DUMP + METHYLENE-THF-GLU-N --> DIHYDROFOLATE-GLU-N + TMP
KEGG: 5,10-Methylenetetrahydrofolate + dUMP <=> dTMP + Dihydrofolate
C00143 + C00365 <=> C00364 + C00415

FORMYL-THF-GLU-N
AICAR + FORMYL-THF-GLU-N <=> PHOSPHORIBOSYL-FORMAMIDO-CARBOXAMIDE + THF-GLU-N
KEGG: 10-Formyltetrahydrofolate + 1-(5'-Phosphoribosyl)-5-amino-4-imidazolecarboxamide <=> Tetrahydrofolate + 1-(5'-Phosphoribosyl)-5-formamido-4-imidazolecarboxamide
C00234 + C04677 <=> C00101 + C04734

METHYLENE-THF-GLU-N
METHYLENE-

In [5]:
 curate_GLU_N(halo_mekg,halo_kegg)

(5401, 1) Index(['metacyc'], dtype='object')
(5747, 1) Index(['kegg'], dtype='object')
5-10-METHENYL-THF-GLU-N
5-10-METHENYL-THF-GLU-N + WATER <=> FORMYL-THF-GLU-N + PROTON
KEGG: H2O + 5,10-Methenyltetrahydrofolate <=> H+ + 10-Formyltetrahydrofolate
C00001 + C00445 <=> C00080 + C00234

5-METHYL-THF-GLU-N
METHYLENE-THF-GLU-N + NADH + PROTON --> 5-METHYL-THF-GLU-N + NAD
KEGG: NAD+ + 5-Methyltetrahydrofolate <=> NADH + H+ + 5,10-Methylenetetrahydrofolate
C00003 + C00440 <=> C00004 + C00080 + C00143

DIHYDROFOLATE-GLU-N
DIHYDROFOLATE-GLU-N + NADPH + PROTON --> NADP + THF-GLU-N
KEGG: NADP+ + Tetrahydrofolate <=> NADPH + H+ + Dihydrofolate
C00006 + C00101 <=> C00005 + C00080 + C00415

FORMYL-THF-GLU-N
FORMYL-THF-GLU-N + L-methionyl-tRNAfmet --> N-formyl-L-methionyl-tRNAfmet + PROTON + THF-GLU-N
KEGG: 10-Formyltetrahydrofolate + L-Methionyl-tRNA <=> Tetrahydrofolate + N-Formylmethionyl-tRNA
C00234 + C02430 <=> C00101 + C03294

METHYLENE-THF-GLU-N
METHYLENE-THF-GLU-N + NADH + PROTON --> 5-METH

## Manually curate metabolites ids
Since some metabolites have multiple metacyc ids. In the case that one metabolites has more than one metacyc id existing in the model, remove one of them

In [6]:
def curate_HS(model):
    # model, a metacyc based
    # 'HS' and 'CPD-7046' are both in the halo_meta model. 
    # They represent the same metabolit with mnx id: 'MNXM89582'
    # Replace 'CPD-7046' by 'HS'
    rxns = list()
    old_met = model.metabolites.get_by_id('CPD-7046' )
    for rxn in old_met.reactions:
        rxn1 = rxn.copy()
        rxn1.subtract_metabolites(rxn1.metabolites)
        coeffs = dict()
        for met in rxn.metabolites:
            coeff = rxn.get_coefficient(met)
            if met.id == 'CPD-7046':
                met = halo_meta.metabolites.get_by_id('HS')
            coeffs[met] = coeff

        rxn1.add_metabolites(coeffs)
        print(rxn1.reaction)
        rxns.append(rxn1)
    model.remove_reactions(old_met.reactions)
    model.add_reactions(rxns)
    model.repair()

In [7]:
curate_HS(halo_meta)

Fe4S4-Cluster-Protein + Octanoylated-domains + 6.0 PROTON + 2.0 Reduced-ferredoxins + 2.0 S-ADENOSYLMETHIONINE --> 2.0 CH33ADO + Dihydro-Lipoyl-Proteins + 4.0 FE+2 + 2.0 HS + Iron-Sulfur-Cluster-Scaffold-Proteins + 2.0 MET + 2.0 Oxidized-ferredoxins


In [8]:
curate_HS(halo_mekg)

Fe4S4-Cluster-Protein + Octanoylated-domains + 6.0 PROTON + 2.0 Reduced-ferredoxins + 2.0 S-ADENOSYLMETHIONINE --> 2.0 CH33ADO + Dihydro-Lipoyl-Proteins + 4.0 FE+2 + 2.0 HS + Iron-Sulfur-Cluster-Scaffold-Proteins + 2.0 MET + 2.0 Oxidized-ferredoxins


## Split the reactions with NAD-P-OR-NOP or  NADH-P-OR-NOP into two reactions

In [9]:
def curate_NAD_P_OR_NOP(model):
    rxns_to_remove = []
    rxns_to_add = []
    for rxn in model.metabolites.get_by_id('NAD-P-OR-NOP').reactions:
        print(rxn.id)
        print(rxn.reaction)

        nad   = model.metabolites.NAD
        nadh  = model.metabolites.NADH

        nadp  = model.metabolites.NADP
        nadph = model.metabolites.NADPH

        or1 = model.metabolites.get_by_id('NAD-P-OR-NOP')
        or2 = model.metabolites.get_by_id('NADH-P-OR-NOP')


        rxn_nad = rxn.copy()
        mets1 = {}
        mets1[nad]  = rxn.metabolites[or1]
        mets1[nadh] = rxn.metabolites[or2]
        mets1[or1]  = -rxn.metabolites[or1]
        mets1[or2]  = -rxn.metabolites[or2]

        rxn_nad.add_metabolites(mets1)
        rxn_nad.id = rxn.id+'_NAD'
        print('NAD:',rxn_nad.reaction)
        print(rxn_nad.id)

        rxn_nadp = rxn.copy()
        mets2 = {}
        mets2[nadp]  = rxn.metabolites[or1]
        mets2[nadph] = rxn.metabolites[or2]
        mets2[or1]  = -rxn.metabolites[or1]
        mets2[or2]  = -rxn.metabolites[or2]

        rxn_nadp.add_metabolites(mets2)
        rxn_nadp.id = rxn.id+'_NADP'
        print('NADP:',rxn_nadp.reaction)
        print(rxn_nadp.id)

        rxns_to_remove.append(rxn)
        rxns_to_add.extend([rxn_nad,rxn_nadp])
        print()
        
    model.add_reactions(rxns_to_add)
    model.remove_reactions(rxns_to_remove)
    model.repair()
    
    print()
    print('After curation')
    for met in model.metabolites:
        if 'NADH-P-OR-NOP' in met.id or 'NAD-P-OR-NOP' in met.id:
            print(met.reactions)

In [10]:
curate_NAD_P_OR_NOP(halo_meta)

RXN0-5293
NAD-P-OR-NOP + SUCC-S-ALD + WATER --> NADH-P-OR-NOP + 2.0 PROTON + SUC
NAD: NAD + SUCC-S-ALD + WATER --> NADH + 2.0 PROTON + SUC
RXN0-5293_NAD
NADP: NADP + SUCC-S-ALD + WATER --> NADPH + 2.0 PROTON + SUC
RXN0-5293_NADP

RXN-2962
NAD-P-OR-NOP + S-HYDROXYMETHYLGLUTATHIONE --> CPD-548 + NADH-P-OR-NOP + PROTON
NAD: NAD + S-HYDROXYMETHYLGLUTATHIONE --> CPD-548 + NADH + PROTON
RXN-2962_NAD
NADP: NADP + S-HYDROXYMETHYLGLUTATHIONE --> CPD-548 + NADPH + PROTON
RXN-2962_NADP

R501-RXN
CPD-821 + NADH-P-OR-NOP + OXYGEN-MOLECULE + PROTON --> CPD-904 + FORMALDEHYDE + NAD-P-OR-NOP + WATER
NAD: CPD-821 + NADH + OXYGEN-MOLECULE + PROTON --> CPD-904 + FORMALDEHYDE + NAD + WATER
R501-RXN_NAD
NADP: CPD-821 + NADPH + OXYGEN-MOLECULE + PROTON --> CPD-904 + FORMALDEHYDE + NADP + WATER
R501-RXN_NADP

GLYC3PDEHYDROGBIOSYN-RXN
DIHYDROXY-ACETONE-PHOSPHATE + NADH-P-OR-NOP + PROTON --> GLYCEROL-3P + NAD-P-OR-NOP
NAD: DIHYDROXY-ACETONE-PHOSPHATE + NADH + PROTON --> GLYCEROL-3P + NAD
GLYC3PDEHYDROGBIOSYN-R

In [11]:
curate_NAD_P_OR_NOP(halo_mekg)

HYDROXYPYRUVATE-REDUCTASE-RXN
GLYCERATE + NAD-P-OR-NOP <=> NADH-P-OR-NOP + OH-PYR + PROTON
NAD: GLYCERATE + NAD <=> NADH + OH-PYR + PROTON
HYDROXYPYRUVATE-REDUCTASE-RXN_NAD
NADP: GLYCERATE + NADP <=> NADPH + OH-PYR + PROTON
HYDROXYPYRUVATE-REDUCTASE-RXN_NADP

HOMOSERDEHYDROG-RXN
L-ASPARTATE-SEMIALDEHYDE + NADH-P-OR-NOP + PROTON --> HOMO-SER + NAD-P-OR-NOP
NAD: L-ASPARTATE-SEMIALDEHYDE + NADH + PROTON --> HOMO-SER + NAD
HOMOSERDEHYDROG-RXN_NAD
NADP: L-ASPARTATE-SEMIALDEHYDE + NADPH + PROTON --> HOMO-SER + NADP
HOMOSERDEHYDROG-RXN_NADP

RXN-12105
25-DIDEHYDRO-D-GLUCONATE + NADH-P-OR-NOP + PROTON <=> CPD-13059 + NAD-P-OR-NOP
NAD: 25-DIDEHYDRO-D-GLUCONATE + NADH + PROTON <=> CPD-13059 + NAD
RXN-12105_NAD
NADP: 25-DIDEHYDRO-D-GLUCONATE + NADPH + PROTON <=> CPD-13059 + NADP
RXN-12105_NADP

PYRROLINE-2-CARBOXYLATE-REDUCTASE-RXN
DELTA1-PYRROLINE_2-CARBOXYLATE + NADH-P-OR-NOP + 2.0 PROTON --> NAD-P-OR-NOP + PRO
NAD: DELTA1-PYRROLINE_2-CARBOXYLATE + NADH + 2.0 PROTON --> NAD + PRO
PYRROLINE-2-CA

## manually curate the ec number and reversibility based on metacyc website

In [12]:
def curate_nad_or_nadp(model,metacyc_id,tpe,ec=None,lb=None,ub=None):
    # tpe: NAD or NADP
    rxn = model.reactions.get_by_id(metacyc_id+'_'+tpe)
    
    print('Old:')
    print(rxn.reaction)
    print(rxn.annotation)
    
    if ec is not None: rxn.annotation['ec-code'] = ec
    if lb is not None: rxn.lower_bound = lb
    if ub is not None: rxn.upper_bound = ub
        
    print('New:')
    print(rxn.reaction)
    print(rxn.annotation)
    print()

In [13]:
def curate_nad_or_nadps(model):
    curate_nad_or_nadp(model,'HYDROXYPYRUVATE-REDUCTASE-RXN','NAD',ec='1.1.1.29')
    curate_nad_or_nadp(model,'HYDROXYPYRUVATE-REDUCTASE-RXN','NADP',ec='1.1.1.-',ub=0)

    curate_nad_or_nadp(model,'GLUTAMATE-DEHYDROGENASE-NADP+-RXN','NAD',ec='1.4.1.2',lb=0)
    curate_nad_or_nadp(model,'GLUTAMATE-DEHYDROGENASE-NADP+-RXN','NADP',ec='1.4.1.4')

    curate_nad_or_nadp(model,'NQOR-RXN','NAD',ec='1.6.5.9')
    curate_nad_or_nadp(model,'NQOR-RXN','NADP',ec='1.6.5.10')

    curate_nad_or_nadp(model,'RXN0-5293','NAD',ec='1.2.1.24')
    curate_nad_or_nadp(model,'RXN0-5293','NADP',ec='1.2.1.79')

    curate_nad_or_nadp(model,'RXN-9958','NAD',ec='1.2.1.18')
    curate_nad_or_nadp(model,'RXN-9958','NADP',ec='1.2.1.-')

    curate_nad_or_nadp(model,'RXN-8166','NAD',ec='1.5.1.-')
    curate_nad_or_nadp(model,'RXN-8166','NADP',ec='1.5.1.21')

In [14]:
def curate_water_id(model):
    met = model.metabolites.get_by_id('WATER')
    met.id = 'CPD-15815'
    met.annotation['biocyc'] = 'META:CPD-15815'
    model.repair()

In [15]:
curate_nad_or_nadps(halo_meta)

Old:
GLYCERATE + NAD <=> NADH + OH-PYR + PROTON
{'kegg.reaction': 'R01388', 'rhea': '17908', 'ec-code': '1.1.1.81', 'biocyc': 'META:HYDROXYPYRUVATE-REDUCTASE-RXN', 'id_source': 'biocyc'}
New:
GLYCERATE + NAD <=> NADH + OH-PYR + PROTON
{'kegg.reaction': 'R01388', 'rhea': '17908', 'ec-code': '1.1.1.29', 'biocyc': 'META:HYDROXYPYRUVATE-REDUCTASE-RXN', 'id_source': 'biocyc'}

Old:
GLYCERATE + NADP <=> NADPH + OH-PYR + PROTON
{'kegg.reaction': 'R01388', 'rhea': '17908', 'ec-code': '1.1.1.81', 'biocyc': 'META:HYDROXYPYRUVATE-REDUCTASE-RXN', 'id_source': 'biocyc'}
New:
GLYCERATE + NADP <-- NADPH + OH-PYR + PROTON
{'kegg.reaction': 'R01388', 'rhea': '17908', 'ec-code': '1.1.1.-', 'biocyc': 'META:HYDROXYPYRUVATE-REDUCTASE-RXN', 'id_source': 'biocyc'}

Old:
GLT + NAD + WATER <=> 2-KETOGLUTARATE + AMMONIUM + NADH + PROTON
{'ec-code': '1.4.1.3', 'biocyc': 'META:GLUTAMATE-DEHYDROGENASE-NADP+-RXN', 'id_source': 'biocyc'}
New:
GLT + NAD + WATER --> 2-KETOGLUTARATE + AMMONIUM + NADH + PROTON
{'ec-code

In [16]:
curate_nad_or_nadps(halo_mekg)

Old:
GLYCERATE + NAD <=> NADH + OH-PYR + PROTON
{'kegg.reaction': 'R01388', 'rhea': '17908', 'ec-code': '1.1.1.81', 'biocyc': 'META:HYDROXYPYRUVATE-REDUCTASE-RXN', 'id_source': 'biocyc'}
New:
GLYCERATE + NAD <=> NADH + OH-PYR + PROTON
{'kegg.reaction': 'R01388', 'rhea': '17908', 'ec-code': '1.1.1.29', 'biocyc': 'META:HYDROXYPYRUVATE-REDUCTASE-RXN', 'id_source': 'biocyc'}

Old:
GLYCERATE + NADP <=> NADPH + OH-PYR + PROTON
{'kegg.reaction': 'R01388', 'rhea': '17908', 'ec-code': '1.1.1.81', 'biocyc': 'META:HYDROXYPYRUVATE-REDUCTASE-RXN', 'id_source': 'biocyc'}
New:
GLYCERATE + NADP <-- NADPH + OH-PYR + PROTON
{'kegg.reaction': 'R01388', 'rhea': '17908', 'ec-code': '1.1.1.-', 'biocyc': 'META:HYDROXYPYRUVATE-REDUCTASE-RXN', 'id_source': 'biocyc'}

Old:
GLT + NAD + WATER <=> 2-KETOGLUTARATE + AMMONIUM + NADH + PROTON
{'ec-code': '1.4.1.3', 'biocyc': 'META:GLUTAMATE-DEHYDROGENASE-NADP+-RXN', 'id_source': 'biocyc'}
New:
GLT + NAD + WATER --> 2-KETOGLUTARATE + AMMONIUM + NADH + PROTON
{'ec-code

In [17]:
curate_water_id(halo_meta)

In [18]:
curate_water_id(halo_mekg)

## Save model

In [19]:
pickle.dump(halo_meta,open('../Results/halo_metacycmodel_100_curated.pkl','wb'))

In [20]:
pickle.dump(halo_mekg,open('../Results/halo_metacyc_kegg_curated.pkl','wb'))