In [46]:
import pandas as pd
import os
from os.path import join
import matplotlib.pyplot as plt
import numpy as np
import json
from rdkit import Chem
from rdkit.Chem import AllChem
from BiGG_functions import *

import warnings
warnings.filterwarnings("ignore")

### 1. Creating a DataFrame with all reactions and its metabolite IDs from the universal model of BiGG:

#### (a) Creating DataFrame with all reactions from BiGG:

In [2]:
models = ["iSbBS512_1146", "iJN1463", "iIT341", "iHN637", "iEK1008", "iECO111_1330"]

for i in range(len(models)):
    model_name = models[i]
    print(i,model_name)
    model_df = create_metabolic_model_df(model_path = join("..", "..", "..", "data", "BiGG_data"),
                                         model_name = model_name)
    model_df = adding_KEGG_CIDS_to_model_df(model_df, model_name)
    
    if i == 0:
        df_all_models = model_df
    else:
        df_all_models = df_all_models.append(model_df).reset_index(drop = True)

0 iSbBS512_1146
1 iJN1463
2 iIT341
3 iHN637
4 iEK1008
5 iECO111_1330


In [3]:
df_all_models

Unnamed: 0,BiGG ID,substrates,products,substrate KEGG CIDs,product KEGG CIDs
0,EX_doxrbcn_e,[doxrbcn_e],[],[C01661],
1,EX_dtmp_e,[dtmp_e],[],[C00364],
2,EX_dump_e,[dump_e],[],[C00365],
3,EX_duri_e,[duri_e],[],[C00526],
4,EX_eca4colipa_e,[eca4colipa_e],[],,
...,...,...,...,...,...
10838,DMALRED,"[fad_c, mal__L_c]","[fadh2_c, oaa_c]","[C00016, C00149]","[C01352, C00036]"
10839,AMMQT8_2,"[2dmmq8_c, amet_c]","[ahcys_c, h_c, mqn8_c]",[C00019],"[C00021, C00080]"
10840,CELLBpts_1,"[cellb_e, pep_c]","[6pgg_c, pyr_p]","[C00185, C06421, C06422, C00074]",[C00022]
10841,FFSD,"[h2o_c, suc6p_c]","[fru_c, g6p_c]","[C00001, C01328, C16688, C02591]","[C01496, C05003, C00095, C10906, C00092]"


#### (b) Mapping Compound IDs for substrates and products for to all reactions:

In [19]:
metabolites_df = pd.DataFrame(columns = {"ID", "KEGG ID", "CHEBI ID", "MNX ID", 'InChI Key'})


for  ID in models:
    model_path = join("..", "..", "..", "data", "BiGG_data", ID + ".json")
    with open(model_path) as json_file:
        model = json.load(json_file)
    model = model["metabolites"]

    for metabolite in model:
        try:
            kegg_id = metabolite["annotation"]["kegg.compound"][0]
        except KeyError:
            kegg_id = np.nan
        try:
            chebi_id = metabolite["annotation"]["chebi"][0]
        except KeyError:
            chebi_id = np.nan
        try:
            mnx_id = metabolite["annotation"]["metanetx.chemical"][0]
        except KeyError:
            mnx_id = np.nan
        try:
            inchi_id = metabolite["annotation"]["inchi_key"][0]
        except KeyError:
            inchi_id = np.nan
        
        metabolites_df = metabolites_df.append({"ID" : metabolite["id"] ,
                                                "KEGG ID" : kegg_id,
                                               "CHEBI ID" : chebi_id,
                                               "MNX ID": mnx_id,
                                               'InChI Key': inchi_id},
                                               ignore_index = True)
metabolites_df.index = metabolites_df["ID"]

In [30]:
metabolites_df.drop_duplicates(inplace = True)

Removing all reactions for which we do not hav an ID for every substrate and every product:

In [35]:
def adding_CIDS_to_model_df(model_df, metabolites_df):


    model_df["substrate CIDs"] = ""
    model_df["product CIDs"] = ""
    model_df["complete"] = np.nan
    
    for ind in model_df.index:
        complete = False
        sub_ID_list, complete_subs = get_ID_list(metabolites = model_df["substrates"][ind], metabolites_df = metabolites_df)
        pro_ID_list, complete_pros = get_ID_list(metabolites = model_df["products"][ind], metabolites_df = metabolites_df)
        
        if complete_subs and complete_pros:
            complete = True
        
        model_df["substrate CIDs"][ind] = sub_ID_list
        model_df["product CIDs"][ind] = pro_ID_list
        model_df["complete"][ind] = complete            
        
    return(model_df)

def get_ID_list(metabolites, metabolites_df):
    ID_list = []
    complete = True
    for met in metabolites:
        [kegg_id, mnx_id] = [metabolites_df.loc[met]["KEGG ID"], metabolites_df.loc[met]["MNX ID"]]
        if pd.isnull(kegg_id):
            if pd.isnull(mnx_id):
                ID_list.append(np.nan)
                complete = False
            else:
                ID_list.append(mnx_id)
        else:
            ID_list.append(kegg_id)
    return(ID_list, complete)

In [34]:
model_df = adding_CIDS_to_model_df(df_all_models, metabolites_df = metabolites_df)

0 ['doxrbcn_e']
1 ['dtmp_e']
2 ['dump_e']
3 ['duri_e']
4 ['eca4colipa_e']
5 ['enlipa_e']
6 ['enter_e']
7 ['etha_e']
8 ['ethso3_e']
9 ['etoh_e']
10 ['f6p_e']
11 ['fald_e']
12 ['fe2_e']
13 ['LalaDgluMdapDala_e']
14 ['fe3_e']
15 ['fe3dcit_e']
16 ['LalaLglu_e']
17 ['fe3dhbzs_e']
18 ['ac_e']
19 ['acac_e']
20 ['4crsol_c']
21 ['5drib_c']
22 ['acald_e']
23 ['aacald_c']
24 ['fe3hox_e']
25 ['acgal_e']
26 ['amob_c']
27 ['fe3hox_un_e']
28 ['mththf_c']
29 ['oxam_c']
30 ['acgal1p_e']
31 ['acgam_e']
32 ['fecrm_e']
33 ['acgam1p_e']
34 ['acmana_e']
35 ['acmum_e']
36 ['acnam_e']
37 ['acolipa_e']
38 ['acser_e']
39 ['ade_e']
40 ['adn_e']
41 ['adocbl_e']
42 ['10fthf_c', '2dmmql8_c', '2fe2s_c', '4fe4s_c', '5mthf_c', 'accoa_c', 'adocbl_c', 'ala__L_c', 'amet_c', 'arg__L_c', 'asn__L_c', 'asp__L_c', 'atp_c', 'bmocogdp_c', 'btn_c', 'ca2_c', 'chor_c', 'cl_c', 'clpn160_p', 'clpn161_p', 'clpn181_p', 'coa_c', 'cobalt2_c', 'colipa_e', 'ctp_c', 'cu2_c', 'cys__L_c', 'datp_c', 'dctp_c', 'dgtp_c', 'dttp_c', 'enter_c', 'f

389 ['cdec3eACP_c', 'h_c', 'malACP_c']
390 ['ddcaACP_c', 'h_c', 'malACP_c']
391 ['cddec5eACP_c', 'h_c', 'malACP_c']
392 ['dtdp4aaddg_c', 'unagamu_c']
393 ['gbbtn_e']
394 ['14glucan_c']
395 ['h_c', 'malACP_c', 'myrsACP_c']
396 ['h_c', 'malACP_c', 'tdeACP_c']
397 ['h_c', 'malACP_c', 'palmACP_c']
398 ['h_c', 'hdeACP_c', 'malACP_c']
399 ['14glucan_p']
400 ['arbt6p_c', 'h2o_c']
401 ['gdp_e']
402 ['4abut_c', 'akg_c']
403 ['4abut_p', 'h_p']
404 ['butACP_c', 'h_c', 'malACP_c']
405 ['h_c', 'hexACP_c', 'malACP_c']
406 ['glc__D_e']
407 ['LalaDgluMdap_p', 'atp_c', 'h2o_c']
408 ['4abut_e']
409 ['accoa_c']
410 ['accoa_c', 'btcoa_c']
411 ['accoa_c', 'hxcoa_c']
412 ['accoa_c', 'occoa_c']
413 ['accoa_c', 'dcacoa_c']
414 ['LalaDgluMdap_e']
415 ['3ump_e']
416 ['4hoxpacd_e']
417 ['h2o_c', 'phthr_c']
418 ['LalaDgluMdapDala_c', 'h2o_c']
419 ['LalaDgluMdapDala_p', 'h2o_p']
420 ['accoa_c', 'ddcacoa_c']
421 ['accoa_c', 'tdcoa_c']
422 ['3ohodcoa_c', 'coa_c']
423 ['LalaDgluMdapDala_p', 'atp_c', 'h2o_c']
424 ['gl

723 ['hdca_e']
724 ['bwco1gdp_c', 'gtp_c', 'h_c']
725 ['mptamp_c', 'wco_c']
726 ['pg160_p']
727 ['pg161_p']
728 ['ca2_c', 'h_p']
729 ['hdcea_e']
730 ['ca2_e']
731 ['h2o2_c']
732 ['pg180_p']
733 ['pg181_p']
734 ['cl_p', 'h_c']
735 ['ca2_c', 'na1_p']
736 ['hg2_e']
737 ['cbm_c', 'h_c']
738 ['atp_c', 'co2_c', 'nh4_c']
739 ['cl_e']
740 ['cmp_c', 'h2o_c']
741 ['cmp_e']
742 ['cm_e']
743 ['co2_e']
744 ['atp_c', 'gln__L_c', 'h2o_c', 'hco3_c']
745 ['atp_c', 'cdg_c', 'nh4_c']
746 ['atp_c', 'cd2_c', 'h2o_c']
747 ['cd2_c', 'h_p']
748 ['cd2_e']
749 ['cd2_p']
750 ['co2_p']
751 ['atp_c', 'cobalt2_c', 'h2o_c']
752 ['cobalt2_c', 'h_p']
753 ['h_c', 'nadph_c', 'preq0_c']
754 ['his__L_e']
755 ['cph4_c', 'h_c']
756 ['4c2me_c', 'atp_c']
757 ['cobalt2_e']
758 ['cobalt2_p']
759 ['colipa_p', 'udcpdp_p']
760 ['atp_c', 'colipap_p', 'h2o_c']
761 ['atp_c', 'colipa_c', 'h2o_c']
762 ['amet_c', 'pe161_c']
763 ['amet_c', 'pg161_c']
764 ['amet_c', 'pe181_c']
765 ['amet_c', 'pg181_c']
766 ['atp_c', 'colipa_p', 'h2o_c']
7

1045 ['fecrm_c', 'rbflvrd_c']
1046 ['fecrm_un_p', 'h_p']
1047 ['fe3dhbzs_c']
1048 ['cytd_p', 'h_p']
1049 ['atp_c', 'fe3dhbzs_p', 'h2o_c']
1050 ['fecrm_un_c', 'h_p']
1051 ['atp_c', 'fecrm_p', 'h2o_c']
1052 ['fe3_e', 'fecrm_un_e']
1053 ['fe3dhbzs_e', 'h_p']
1054 ['fecrm_e', 'h_p']
1055 ['fadh2_c', 'fe3hox_c']
1056 ['fadh2_c', 'feenter_c']
1057 ['cytd_e']
1058 ['feenter_c', 'fmnh2_c']
1059 ['fe3hox_c', 'fmnh2_c']
1060 ['feenter_c', 'rbflvrd_c']
1061 ['atp_c', 'cmp_c']
1062 ['atp_c', 'feenter_p', 'h2o_c']
1063 ['fe3hox_c', 'rbflvrd_c']
1064 ['fe3hox_un_c', 'h_p']
1065 ['fe3hox_un_p', 'h_p']
1066 ['atp_c', 'fe3hox_p', 'h2o_c']
1067 ['fe3_e', 'fe3hox_un_e']
1068 ['fe3hox_e', 'h_p']
1069 ['fadh2_c', 'fe3_c']
1070 ['atp_c', 'fe3_p', 'h2o_c']
1071 ['enter_e', 'fe3_e']
1072 ['enter_p', 'h_p']
1073 ['feenter_e', 'h_p']
1074 ['enter_c', 'h_p']
1075 ['fadh2_c', 'feoxam_c']
1076 ['feoxam_c', 'fmnh2_c']
1077 ['feoxam_c', 'rbflvrd_c']
1078 ['feoxam_un_p', 'h_p']
1079 ['fe3_e']
1080 ['feoxam_un_c', 'h_

1364 ['adphep_LD_c', 'hlipa_c']
1365 ['atp_c', 'gthrd_p', 'h2o_c']
1366 ['26dap__M_p', 'atp_c', 'h2o_c']
1367 ['adphep_LD_c', 'phhlipa_c']
1368 ['4mhetz_c', 'atp_c']
1369 ['2agpg140_c', 'pg140_c']
1370 ['15dap_e']
1371 ['atp_c', 'glc__D_c']
1372 ['atp_c', 'man_c']
1373 ['his__L_e']
1374 ['atp_c', 'fru_c']
1375 ['ctp_c', 'h_c', 'pa120_c']
1376 ['atp_c', 'h2o_c', 'hg2_c']
1377 ['h_p', 'hg2_c']
1378 ['hg2_e']
1379 ['h2o_c', 'histd_c', 'nad_c']
1380 ['h2o_c', 'hisp_c']
1381 ['atp_c', 'his__L_c', 'trnahis_c']
1382 ['h2o_c', 'ppbng_c']
1383 ['4ahmmp_c', 'atp_c']
1384 ['h_p', 'hom__L_c']
1385 ['hom__L_e']
1386 ['6hmhpt_c', 'atp_c']
1387 ['3hpppn_e']
1388 ['hpyr_c']
1389 ['h_c', 'hpyr_c', 'nadh_c']
1390 ['h_c', 'hpyr_c', 'nadph_c']
1391 ['atp_c', 'h2o_c', 'his__L_p']
1392 ['h_p', 'his__L_p']
1393 ['hom__L_c', 'nadp_c']
1394 ['ctp_c', 'h_c', 'pa140_c']
1395 ['atp_c', 'hom__L_c']
1396 ['hom__L_c', 'succoa_c']
1397 ['glu__L_c', 'imacp_c']
1398 ['imp_e']
1399 ['h_c', 'indole_c']
1400 ['hxa_e']
140

1694 ['atp_c', 'h2o_c', 'ni2_p']
1695 ['4fe4s_c', 'amet_c', 'h_c', 'nad_c', 'octapb_c']
1696 ['dgtp_c', 'h2o_c']
1697 ['h2o_c', 'ncam_c']
1698 ['atp_c', 'h_c', 'nicrnt_c']
1699 ['dmbzid_c', 'nicrnt_c']
1700 ['h_c', 'prpp_c', 'quln_c']
1701 ['ditp_c', 'h2o_c']
1702 ['h2o_c', 'xtp_c']
1703 ['h_p', 'no2_p']
1704 ['h_p', 'lipoate_p']
1705 ['no2_e']
1706 ['no3_p', 'q8h2_c']
1707 ['gtp_c', 'h2o_c']
1708 ['dctp_c', 'h2o_c']
1709 ['ctp_c', 'h2o_c']
1710 ['datp_c', 'h2o_c']
1711 ['atp_c', 'h2o_c']
1712 ['mql8_c', 'no3_p']
1713 ['no2_c', 'no3_p']
1714 ['no3_e']
1715 ['nadh_c', 'no_c', 'o2_c']
1716 ['nadph_c', 'no_c', 'o2_c']
1717 ['novbcn_e']
1718 ['dttp_c', 'h2o_c']
1719 ['h2o_c', 'utp_c']
1720 ['h2o_c', 'itp_c']
1721 ['dgtp_c', 'h2o_c']
1722 ['gtp_c', 'h2o_c']
1723 ['h_c', 'nadh_c', 'no2_c']
1724 ['h_p', 'no2_p', 'q8h2_c']
1725 ['no_e']
1726 ['no_p']
1727 ['dump_c', 'h2o_c']
1728 ['h_p', 'mql8_c', 'no2_p']
1729 ['atp_c', 'h2o_c', 'o16a4colipa_p']
1730 ['h2o_c', 'xmp_c']
1731 ['lipoate_e']
1732

2143 ['feroxB_e']
2144 ['2agpe120_c', 'pg120_c']
2145 ['h2o_c', 'sucglu_c']
2146 ['h2o_c', 'nad_c', 'sucgsa_c']
2147 ['2agpe140_c', 'pg140_c']
2148 ['2sephchc_c']
2149 ['dscl_c', 'nad_c']
2150 ['fe2_c', 'scl_c']
2151 ['3dhsk_c', 'h_c', 'nadph_c']
2152 ['atp_c', 'skm_c']
2153 ['cys__L_c', 'suchms_c']
2154 ['fum_p', 'succ_c']
2155 ['mal__L_p', 'succ_c']
2156 ['atp_c', 'coa_c', 'succ_c']
2157 ['sucr_e']
2158 ['pep_c', 'sucr_p']
2159 ['atp_c', 'h2o_c', 'sulfac_p']
2160 ['sulfac_e']
2161 ['h_c', 'nadph_c', 'so3_c']
2162 ['atp_c', 'h2o_c', 'so4_p']
2163 ['h_p', 'skm_p']
2164 ['skm_e']
2165 ['slnt_e']
2166 ['h_p', 'slnt_p']
2167 ['tdec2eACP_c']
2168 ['2agpe141_c', 'pg141_c']
2169 ['altrn_c', 'nad_c']
2170 ['g3p_c', 's7p_c']
2171 ['so2_e']
2172 ['so2_p']
2173 ['so3_e']
2174 ['tartr__D_e']
2175 ['succ_c', 'tartr__L_p']
2176 ['tartr__L_e']
2177 ['akg_c', 'o2_c', 'taur_c']
2178 ['h_p', 'so4_p']
2179 ['so4_e']
2180 ['akg_c', 'sucorn_c']
2181 ['accoa_c', 'spmd_c']
2182 ['accoa_c', 'spmd_c']
2183 ['

2642 ['3hcddec5eACP_c']
2643 ['3hmrsACP_c']
2644 ['3hcmrs7eACP_c']
2645 ['fad_c', 'lnlccoa_c']
2646 ['fad_c', 'hd_7_10_coa_c']
2647 ['fad_c', 'td_5_8_coa_c']
2648 ['dec4coa_c', 'fad_c']
2649 ['fad_c', 'occoa_c']
2650 ['dcacoa_c', 'fad_c']
2651 ['ddcacoa_c', 'fad_c']
2652 ['fad_c', 'tdcoa_c']
2653 ['fad_c', 'pmtcoa_c']
2654 ['fad_c', 'stcoa_c']
2655 ['2mbcoa_c', 'fad_c']
2656 ['ibcoa_c', 'o2_c']
2657 ['ACP_c', 'accoa_c']
2658 ['acorn_c', 'h2o_c']
2659 ['acon_C_p', 'na1_p']
2660 ['acon_C_e']
2661 ['acon_T_c']
2662 ['acon_T_c', 'amet_c']
2663 ['cit_c']
2664 ['acon_C_c', 'h2o_c']
2665 ['acorn_c', 'akg_c']
2666 ['ddcaACP_c', 'h_c', 'pi_c']
2667 ['h_c', 'myrsACP_c', 'pi_c']
2668 ['h_c', 'pi_c', 'tdeACP_c']
2669 ['h_c', 'palmACP_c', 'pi_c']
2670 ['h_c', 'hdeACP_c', 'pi_c']
2671 ['h_c', 'ocdcaACP_c', 'pi_c']
2672 ['h_c', 'octeACP_c', 'pi_c']
2673 ['apoACP_c', 'coa_c']
2674 ['ac_c', 'atp_c', 'coa_c']
2675 ['atp_c', 'coa_c', 'ppa_c']
2676 ['acser_e']
2677 ['acser_c']
2678 ['R_3hdcaa_c', 'atp_c',

2947 ['atp_c', 'dhpt_c', 'glu__L_c']
2948 ['dhmpt_c', 'h_c', 'nadph_c']
2949 ['dhmpt_c', 'h_c', 'nadh_c']
2950 ['dhnpt_c']
2951 ['dhnpt_c']
2952 ['dhor__S_c', 'q8_c']
2953 ['dhor__S_c', 'fum_c']
2954 ['dhor__S_c', 'h2o_c']
2955 ['56dura_c', 'h2o_c']
2956 ['56dthm_c', 'h2o_c']
2957 ['25drapp_c', 'h2o_c', 'h_c']
2958 ['2ahhmp_c', '4abz_c']
2959 ['3dhq_c']
2960 ['2dda7p_c']
2961 ['3dhsk_c']
2962 ['25dkglcn_c', 'h_c', 'nadph_c']
2963 ['asn__L_c']
2964 ['cys__L_c']
2965 ['atp_c', 'h2o_c', 'leu__D_p']
2966 ['leu__D_e']
2967 ['met__L_c']
2968 ['fad_c', 'h2o_c', 'lys__D_c']
2969 ['akg_c', 'lys__D_c']
2970 ['lys__D_c', 'pyr_c']
2971 ['atp_c', 'h2o_c', 'lys__D_p']
2972 ['lys__D_e']
2973 ['dmpp_c', 'ipdp_c']
2974 ['met__D_e']
2975 ['dmgly_c', 'h2o_c', 'nad_c']
2976 ['atp_c', 'dmgly_p', 'h2o_c']
2977 ['dmgly_e']
2978 ['h2mb4p_c', 'h_c', 'nadh_c']
2979 ['2omhmbl_c', 'amet_c']
2980 ['atp_c', 'dmso2_p', 'h2o_c']
2981 ['dmso2_e']
2982 ['5drib_c']
2983 ['amob_c']
2984 ['C100aPHA_c']
2985 ['C100pPHA_c']

3493 ['f6p_c', 'gln__L_c']
3494 ['ggaptn_c', 'h2o_c']
3495 ['ggbdapal_c', 'h2o_c', 'nad_c']
3496 ['gg15dap_c', 'h2o_c', 'o2_c']
3497 ['15dap_c', 'atp_c', 'glu__L_c']
3498 ['ggala_B_c', 'h2o_c']
3499 ['ggbamppal_c', 'h2o_c', 'nad_c']
3500 ['gg13dampp_c', 'h2o_c', 'o2_c']
3501 ['13dampp_c', 'atp_c', 'glu__L_c']
3502 ['ggbutal_c', 'h2o_c', 'nad_c']
3503 ['gg4abut_c', 'h2o_c']
3504 ['ggptrc_c', 'h2o_c', 'o2_c']
3505 ['atp_c', 'glu__L_c', 'ptrc_c']
3506 ['ggspmd_c', 'h2o_c', 'o2_c']
3507 ['atp_c', 'glu__L_c', 'spmd_c']
3508 ['ser__L_c', 'thf_c']
3509 ['atp_c', 'gmp_c']
3510 ['glyg4n_c']
3511 ['glc__D_p', 'h2o_p', 'q8_c']
3512 ['glcn_p', 'h_p']
3513 ['glcn_e']
3514 ['atp_c', 'coa_c', 'glutar_c']
3515 ['S_gtrdhdlp_c', 'coa_c']
3516 ['glyg4n_c', 'pi_c']
3517 ['bglyg4n_c', 'pi_c']
3518 ['glcr_c']
3519 ['glcr_p', 'h_p']
3520 ['glcr_e']
3521 ['adpglc_c']
3522 ['glcur_c', 'h2o_c', 'nad_c']
3523 ['glcur_p', 'h_p']
3524 ['glcur_e']
3525 ['atp_c', 'glc__D_p', 'h2o_c']
3526 ['glc__D_e']
3527 ['bglyg4n

3807 ['3odd5coa_c', 'coa_c']
3808 ['3optslacoa_c', 'coa_c']
3809 ['3ohdd4coa_c', 'coa_c']
3810 ['3olnlccoa_c', 'coa_c']
3811 ['3oocoa_c', 'coa_c']
3812 ['3ohd710coa_c', 'coa_c']
3813 ['3ohd58coa_c', 'coa_c']
3814 ['3odd6coa_c', 'coa_c']
3815 ['3odcoa_c', 'coa_c']
3816 ['3oddcoa_c', 'coa_c']
3817 ['3otdcoa_c', 'coa_c']
3818 ['3ohdcoa_c', 'coa_c']
3819 ['3ohodcoa_c', 'coa_c']
3820 ['accoa_c', 'ppcoa_c']
3821 ['ctp_c', 'kdo_c']
3822 ['h2o_c', 'kdo8p_c']
3823 ['ara5p_c', 'h2o_c', 'pep_c']
3824 ['atp_c', 'h2o_c', 'k_p']
3825 ['h_p', 'k_p']
3826 ['h_p', 'k_c']
3827 ['k_e']
3828 ['LalaLglu_e']
3829 ['LalaLglu_c', 'h2o_c']
3830 ['h2o_c', 'lald__L_c', 'nad_c']
3831 ['h_p', 'lcts_c']
3832 ['lac__D_c', 'nad_c']
3833 ['lac__D_c', 'q8_c']
3834 ['h2o_c', 'leu__L_c', 'nad_c']
3835 ['atp_c', 'h2o_c', 'leuleu_p']
3836 ['leuleu_e']
3837 ['akg_c', 'leu__L_c']
3838 ['atp_c', 'leu__L_c', 'trnaleu_c']
3839 ['atp_c', 'h2o_c', 'leu__L_p']
3840 ['h_p', 'leu__L_p']
3841 ['leu__L_e']
3842 ['gthrd_c', 'mthgxl_c']

4276 ['C101PAH_c', 'h2o_c']
4277 ['C120aPHA_c', 'h2o_c']
4278 ['C121aPHA_c', 'h2o_c']
4279 ['C121d6PHA_c', 'h2o_c']
4280 ['C140aPHA_c', 'h2o_c']
4281 ['C141aPHA_c', 'h2o_c']
4282 ['C141d5PHA_c', 'h2o_c']
4283 ['C142PHA_c', 'h2o_c']
4284 ['C40aPHA_c', 'h2o_c']
4285 ['C50aPHA_c', 'h2o_c']
4286 ['C60aPHA_c', 'h2o_c']
4287 ['C70aPHA_c', 'h2o_c']
4288 ['C80aPHA_c', 'h2o_c']
4289 ['C90aPHA_c', 'h2o_c']
4290 ['C100pPHA_c', 'h2o_c']
4291 ['C40pPHA_c', 'h2o_c']
4292 ['C50pPHA_c', 'h2o_c']
4293 ['C60pPHA_c', 'h2o_c']
4294 ['C70pPHA_c', 'h2o_c']
4295 ['C80pPHA_c', 'h2o_c']
4296 ['C90pPHA_c', 'h2o_c']
4297 ['C40atPHA_c', 'h2o_c']
4298 ['C60atPHA_c', 'h2o_c']
4299 ['PHAg_c', 'R_3hdcoa_c']
4300 ['PHAg_c', 'R3hdec4coa_c']
4301 ['PHAg_c', 'R_3hddcoa_c']
4302 ['PHAg_c', 'R_3hcddec5ecoa_c']
4303 ['PHAg_c', 'R_3hdd6coa_c']
4304 ['PHAg_c', 'R_3hmrscoa_c']
4305 ['PHAg_c', 'R_3hcmrs7ecoa_c']
4306 ['PHAg_c', 'R_3htd5coa_c']
4307 ['PHAg_c', 'R_3htd58coa_c']
4308 ['PHAg_c', 'R_3hhcoa_c']
4309 ['PHAg_c', 'R_3ho

4599 ['R_3hphxa_p']
4600 ['R_3hphpa_p']
4601 ['R_3hphpa_p']
4602 ['R_3hpocta_p']
4603 ['R_3hpocta_p']
4604 ['R_3hpnona_p']
4605 ['R_3hpnona_p']
4606 ['R_3h4atba_p']
4607 ['R_3h4atba_p']
4608 ['R_3h6atha_p']
4609 ['R_3h6atha_p']
4610 ['gdpdrhmn_c', 'unaga_c']
4611 ['gdpdrhmn_c', 'rhma13unaga_c']
4612 ['atp_c', 'h2o_c', 'rib__D_p']
4613 ['rib__D_e']
4614 ['gdpddman_c', 'h_c', 'nadph_c']
4615 ['adp_c', 'trdrd_c']
4616 ['gdp_c', 'trdrd_c']
4617 ['cdp_c', 'trdrd_c']
4618 ['trdrd_c', 'udp_c']
4619 ['ru5p__D_c']
4620 ['r5p_c']
4621 ['5prdmbz_c', 'h2o_c']
4622 ['s7p_c']
4623 ['h2o_c', 'h_c', 'sucarg_c']
4624 ['atp_c', 'h_c', 'so4_c']
4625 ['h2o_c', 'o2_c', 'sarcs_c']
4626 ['o2_c', 'sarcs_c', 'thf_c']
4627 ['h_p', 'sbo3_p']
4628 ['h_p', 'sbo3_c']
4629 ['cys__L_c', 'sufse_c']
4630 ['gthrd_c', 'scys__L_c']
4631 ['h2o_c', 'sl26da_c']
4632 ['akg_c', 'sl26da_c']
4633 ['atp_c', 'h_c', 'sel_c']
4634 ['selnp_c', 'sertrna_sec_c']
4635 ['gthrd_c', 'h_c', 'slnt_c']
4636 ['dgslnt_c', 'h_c', 'nadph_c']
4637

4918 ['cholp_c', 'h2o_c']
4919 ['cur_c', 'h_c', 'nadph_c']
4920 ['d2one_e']
4921 ['d2one_p']
4922 ['d3one_e']
4923 ['d3one_p']
4924 ['d4one_e']
4925 ['d4one_p']
4926 ['dhcur_c', 'h_c', 'nadph_c']
4927 ['ethamp_c', 'h2o_c']
4928 ['4hptn_e']
4929 ['4oxptn_e']
4930 ['acmtsoxin_e']
4931 ['acpptrn_e']
4932 ['d2one_e']
4933 ['d3one_e']
4934 ['d4one_e']
4935 ['mtsoxin_e']
4936 ['n2one_e']
4937 ['pptrn_e']
4938 ['und2one_e']
4939 ['13dpg_c', 'h_c', 'nadph_c']
4940 ['akg_c', 'glutar_c', 'o2_c']
4941 ['3oxptcoa_c', 'coa_c']
4942 ['4oxptn_c', 'atp_c', 'coa_c']
4943 ['accoa_c', 'met__L_c']
4944 ['accoa_c', 'mtsoxin_c']
4945 ['mtsoxin_e']
4946 ['mtsoxin_p']
4947 ['n2one_e']
4948 ['n2one_p']
4949 ['phe__L_c']
4950 ['accoa_c', 'pptrn_c']
4951 ['pptrn_e']
4952 ['pptrn_p']
4953 ['h2o_c', 'rephaccoa_c']
4954 ['S2hglut_c', 'o2_c']
4955 ['und2one_e']
4956 ['und2one_p']
4957 ['2omcm_c', 'h_c']
4958 ['2hmc_c']
4959 ['hmccm_c']
4960 ['LalaDgluMdapDala_c', 'h2o_c']
4961 ['LalaDgluMdapDala_p', 'h2o_p']
4962 ['

5244 ['confrl_e']
5245 ['confrl_p']
5246 ['co_e']
5247 ['co_p']
5248 ['ahdt_c', 'h2o_c']
5249 ['gtp_c', 'h2o_c']
5250 ['cpppg3_c', 'h_c', 'o2_c']
5251 ['fad_c', 'h2o_c', 'tyr__D_c']
5252 ['fad_c', 'h2o_c', 'val__D_c']
5253 ['arg__D_c', 'fad_c', 'h2o_c']
5254 ['fad_c', 'h2o_c', 'leu__D_c']
5255 ['fad_c', 'h2o_c', 'met__D_c']
5256 ['fad_c', 'h2o_c', 'orn__D_c']
5257 ['fad_c', 'h2o_c', 'phe__D_c']
5258 ['fad_c', 'pro__D_c']
5259 ['fad_c', 'h2o_c', 'ser__D_c']
5260 ['24dab_c', 'akg_c', 'h_c']
5261 ['dad_2_c', 'h2o_c', 'h_c']
5262 ['atp_c', 'damp_c']
5263 ['12dgr120_c', 'atp_c']
5264 ['12dgr140_c', 'atp_c']
5265 ['12dgr141_c', 'atp_c']
5266 ['12dgr160_c', 'atp_c']
5267 ['12dgr161_c', 'atp_c']
5268 ['12dgr180_c', 'atp_c']
5269 ['12dgr181_c', 'atp_c']
5270 ['12dgr160_e', 'h2o_e']
5271 ['12dgr180_e', 'h2o_e']
5272 ['dag181d9_e', 'h2o_e']
5273 ['dag182d9d12_e', 'h2o_e']
5274 ['balaala_c', 'h2o_c']
5275 ['balabala_c', 'h2o_c']
5276 ['balagly_c', 'h2o_c']
5277 ['balaleu_c', 'h2o_c']
5278 ['ala__D

5584 ['coa_c', 'h2o_c']
5585 ['aspsa_c', 'pyr_c']
5586 ['ac_c', 'atp_c', 'coa_c']
5587 ['h2o_c', 'pad_c']
5588 ['dhf_c', 'h_c', 'nadph_c']
5589 ['ad_c', 'h2o_c']
5590 ['ac_e', 'h_e']
5591 ['4adcho_c']
5592 ['aa_c', 'h2o_c']
5593 ['atp_c', 'dhpt_c', 'glu__L_c']
5594 ['2dmmq6_c', 'amet_c']
5595 ['anth_c', 'prpp_c']
5596 ['chor_c', 'gln__L_c']
5597 ['chor_c', 'nh4_c']
5598 ['ade_c', 'h2o_c', 'h_c']
5599 ['ade_e', 'h_e']
5600 ['amp_c', 'atp_c']
5601 ['dhna_c', 'octdp_c']
5602 ['fe2_e']
5603 ['dhnpt_c']
5604 ['fe3_e']
5605 ['ala__L_c', 'h_c', 'pmcoa_c']
5606 ['5apru_c', 'h_c', 'nadph_c']
5607 ['dhor__S_c', 'mqn6_c']
5608 ['arg__L_c', 'h_c']
5609 ['arg__L_c', 'h2o_c']
5610 ['for_e']
5611 ['fum_e']
5612 ['gal_e']
5613 ['glc__D_e']
5614 ['argsuc_c']
5615 ['asp__L_c', 'atp_c', 'citr__L_c']
5616 ['dhor__S_c', 'h2o_c']
5617 ['arg__L_e', 'h_e']
5618 ['gln__L_e']
5619 ['glu__L_e']
5620 ['25dhpp_c', 'h2o_c', 'h_c']
5621 ['gly_e']
5622 ['gsn_e']
5623 ['aspsa_c', 'nadp_c', 'pi_c']
5624 ['asn__L_c', 'h

5902 ['h_e', 'nmn_e']
5903 ['atp_c', 'h_c', 'nicrnt_c']
5904 ['h_c', 'prpp_c', 'quln_c']
5905 ['no_c', 'o2s_c']
5906 ['1p3h5c_c', 'h2o_c', 'nad_c']
5907 ['h_e', 'no3_e']
5908 ['h_e', 'no3_c']
5909 ['atp_c', 'skm_c']
5910 ['cys__L_c', 'suchms_c']
5911 ['no_e']
5912 ['1p3h5c_c', 'h2o_c', 'h_c']
5913 ['sbzcoa_c']
5914 ['no3_c', 'trdrd_c']
5915 ['akg_c', 'phe__L_c']
5916 ['h_e', 'phe__L_e']
5917 ['pime_e']
5918 ['o2_e']
5919 ['cbp_c', 'orn_c']
5920 ['h2s_c', 'suchms_c']
5921 ['5aprbu_c', 'h2o_c']
5922 ['acac_c', 'succoa_c']
5923 ['4ampm_c', 'atp_c']
5924 ['frdp_c', 'ipdp_c']
5925 ['2ombzl_c', 'amet_c']
5926 ['atp_c', 'pnto__R_c']
5927 ['h_c', 'orot5p_c']
5928 ['h2o_c', 'ppi_c']
5929 ['akg_c', 'coa_c', 'fdxox_c', 'h_c']
5930 ['5aop_c']
5931 ['3ophb_c', 'h_c']
5932 ['akg_c', 'orn_c']
5933 ['h2o_c', 'suchms_c']
5934 ['4ppcys_c', 'h_c']
5935 ['ametam_c', 'ptrc_c']
5936 ['h_e', 'orn_e']
5937 ['h_e', 'orot_e']
5938 ['orot5p_c', 'ppi_c']
5939 ['akg_c', 'h_c', 'thmpp_c']
5940 ['h_c', 'o2s_c']
5941

6216 ['glcn_e']
6217 ['asn__L_e', 'h_e']
6218 ['glcur_e']
6219 ['asp__L_c', 'h_c']
6220 ['gln__L_e']
6221 ['asp__L_e', 'h_e']
6222 ['glu__L_e']
6223 ['atp_c', 'h2o_c']
6224 ['amp_c', 'pppi_c']
6225 ['gly_e']
6226 ['amet_c', 'h_c']
6227 ['agdpcbi_c', 'rdmbzi_c']
6228 ['btd_RR_c']
6229 ['btoh_e']
6230 ['glyb_e']
6231 ['adprib_c', 'h2o_c']
6232 ['glyc_e']
6233 ['ade_c', 'prpp_c']
6234 ['atp_c', 'but_c']
6235 ['dcamp_c']
6236 ['h_e']
6237 ['25aics_c']
6238 ['but_e']
6239 ['asp__L_c', 'gtp_c', 'imp_c']
6240 ['co2dam_c', 'nadh_c']
6241 ['ahcys_c', 'h2o_c']
6242 ['1ddecg3p_c', 'ddcaACP_c']
6243 ['acg5sa_c', 'nadp_c', 'pi_c']
6244 ['atp_c', 'cys__L_e', 'h2o_c']
6245 ['achms_c', 'h2s_c']
6246 ['air_c', 'co2_c']
6247 ['atp_c', 'cu2_e', 'h2o_c']
6248 ['10fthf_c', 'aicar_c']
6249 ['ala__D_c', 'atp_c']
6250 ['ala__L_c']
6251 ['akg_c', 'ala__L_c']
6252 ['atp_c', 'ca2_e', 'h2o_c']
6253 ['citr__L_e', 'h_e']
6254 ['cl_e', 'h_c']
6255 ['co2_e']
6256 ['etoh_c', 'nadp_c']
6257 ['dhptd_c']
6258 ['2dmmql8_c

6533 ['ala_B_c', 'atp_c', 'pant__R_c']
6534 ['mal__L_c', 'nad_c']
6535 ['h2o_c', 'pgp120_c']
6536 ['murein5p5p_e']
6537 ['5mta_c', 'h2o_c']
6538 ['murein5p5p_e', 'uaagmda_e']
6539 ['h2o_c', 'pgp140_c']
6540 ['h2o_c', 'pgp160_c']
6541 ['h_c', 'orn_c']
6542 ['atp_c', 'nad_c']
6543 ['2mecdp_c', 'nadh_c']
6544 ['murein5p5p_e']
6545 ['murein5p5p5p_e']
6546 ['mg2_c']
6547 ['h_e', 'mn2_e']
6548 ['mal__L_c', 'nad_c']
6549 ['h2o_c', 'pgp161_c']
6550 ['mal__L_c', 'nadp_c']
6551 ['h2o_e', 'murein5px4p_e']
6552 ['h2o_e', 'murein5px4px4p_e']
6553 ['mlthf_c', 'nadp_c']
6554 ['h2o_c', 'methf_c']
6555 ['fdxo_42_c', 'h_c', 'mlthf_c', 'nadh_c']
6556 ['atp_c', 'dnad_c', 'nh4_c']
6557 ['h2o_e', 'murein5p5p_e']
6558 ['h2o_e', 'murein5p4p_e']
6559 ['2p4c2me_c']
6560 ['2me4p_c', 'ctp_c', 'h_c']
6561 ['atp_c', 'h2o_c', 'met__L_c']
6562 ['h2o_c', 'pgp180_c']
6563 ['atp_c', 'dnad_c', 'gln__L_c', 'h2o_c']
6564 ['h2o_c', 'pgp181_c']
6565 ['h_c', 'nac_c', 'prpp_c']
6566 ['atp_c', 'phe__L_c', 'trnaphe_c']
6567 ['h_

6849 ['h2o_c', 'utp_c']
6850 ['h2_c', 'h_c', 'mqn8_c']
6851 ['h2o_c', 'itp_c']
6852 ['e4p_c', 'xu5p__D_c']
6853 ['coa_c', 'pyr_c']
6854 ['cdpdodecg_c', 'glyc3p_c']
6855 ['cdpdodec11eg_c', 'glyc3p_c']
6856 ['cdpdodec11eg_c', 'ser__L_c']
6857 ['2agpe160_c', 'atp_c', 'hdca_c']
6858 ['2agpe180_c', 'atp_c', 'ocdca_c']
6859 ['5dglcn_c', 'h_c', 'nadph_c']
6860 ['ACP_c', 'atp_c', 'mocdca_c']
6861 ['4hbz_c', 'ACP_c', 'atp_c']
6862 ['ACP_c', 'atp_c', 'hdca_c']
6863 ['4abut_c', 'akg_c']
6864 ['accoa_c']
6865 ['acald_c', 'coa_c', 'nad_c']
6866 ['adocbip_c', 'gtp_c', 'h_c']
6867 ['1hdecg3p_c', 'palmACP_c']
6868 ['co2_c', 'h_c', 'hexccoa_c']
6869 ['1hdecg3p_c', 'mstrACP_c']
6870 ['1msg3p_c', 'mstrACP_c']
6871 ['acg5sa_c', 'nadp_c', 'pi_c']
6872 ['coa_c', 'h_c', 'hexc_c']
6873 ['acgam6p_c']
6874 ['ahcys_c', 'h2o_c']
6875 ['atp_c', 'chol_e', 'h2o_c']
6876 ['uacgam_c', 'udcpp_c']
6877 ['chol_c', 'nad_c']
6878 ['chor_c']
6879 ['ahcys_c', 'h2o_c']
6880 ['acglu_c', 'atp_c']
6881 ['air_c', 'h_c']
6882 ['10

7178 ['h_c', 'malcoa_c', 'nadph_c', 'octa_c']
7179 ['h2s_e']
7180 ['h_e']
7181 ['atp_c', 'coa_c', 'ttdca_c']
7182 ['atp_c', 'coa_c', 'hdca_c']
7183 ['atp_c', 'coa_c', 'hdcea_c']
7184 ['atp_c', 'coa_c', 'ocdca_c']
7185 ['hdca_e']
7186 ['his__L_e']
7187 ['id3acald_e']
7188 ['ile__L_e']
7189 ['k_e']
7190 ['atp_c', 'coa_c', 'ocdcea_c']
7191 ['dca_c', 'h_c', 'malcoa_c', 'nadph_c']
7192 ['ddca_c', 'h_c', 'malcoa_c', 'nadph_c']
7193 ['glu__D_c']
7194 ['for_c', 'nad_c']
7195 ['h_c', 'malcoa_c', 'nadph_c', 'ttdca_c']
7196 ['akg_c', 'gln__L_c', 'h_c', 'nadph_c']
7197 ['fadh2_c', 'fe3_c']
7198 ['h_c', 'malcoa_c', 'nadph_c', 'o2_c', 'ttdca_c']
7199 ['fmcbtt_c']
7200 ['glu__L_e', 'h_e']
7201 ['h_c', 'hdca_c', 'malcoa_c', 'nadph_c']
7202 ['glx_c', 'h2o_c', 'nad_c']
7203 ['for_c', 'h_c']
7204 ['h_c', 'hdca_c', 'malcoa_c', 'nadph_c', 'o2_c']
7205 ['atp_c', 'fmn_c', 'h_c']
7206 ['atp_c', 'glyb_e', 'h2o_c']
7207 ['atp_c', 'glyc3p_e', 'h2o_c']
7208 ['2ahhmd_c', '4abz_c']
7209 ['h_c', 'malcoa_c', 'nadph_c

7475 ['atp_c', 'h_c', 'nicrnt_c']
7476 ['no2_c']
7477 ['hcys__L_c', 'mhpglu_c']
7478 ['h2o_c', 'mi1p__D_c']
7479 ['dmbzid_c', 'nicrnt_c']
7480 ['h_c', 'prpp_c', 'quln_c']
7481 ['no_c', 'o2s_c']
7482 ['h_c', 'no3_c', 'q8h2_c']
7483 ['coa_c', 'nad_c', 'pyr_c']
7484 ['na1_e']
7485 ['atp_c', 'gdp_c']
7486 ['atp_c', 'udp_c']
7487 ['h_c', 'mql8_c', 'no3_c']
7488 ['atp_c', 'cdp_c']
7489 ['atp_c', 'dtdp_c']
7490 ['no_e']
7491 ['sbzcoa_c']
7492 ['atp_c', 'dgdp_c']
7493 ['atp_c', 'dudp_c']
7494 ['h_c', 'lpam_c', 'pyr_c']
7495 ['adhlam_c', 'coa_c']
7496 ['dump_c', 'h2o_c']
7497 ['dhlam_c', 'nad_c']
7498 ['h_c', 'phthiocerol_c', 'tamocta_c']
7499 ['atp_c', 'fpram_c']
7500 ['pran_c']
7501 ['h2o_c', 'prbamp_c']
7502 ['pdima_c']
7503 ['h2o_c', 'xmp_c']
7504 ['o2_c', 'pdx5p_c']
7505 ['h2o_c', 'imp_c']
7506 ['h2o_c', 'o2_c', 'peamn_c']
7507 ['5aizc_c', 'asp__L_c', 'atp_c']
7508 ['h2o_c', 'prbatp_c']
7509 ['gtp_c', 'oaa_c']
7510 ['h2o_c', 'ump_c']
7511 ['atp_c', 'f6p_c']
7512 ['h2o2_c', 'meoh_c']
7513 [

7779 ['1hdecg3p_c', 'h2o_c']
7780 ['amet_c', 'dtbt_c', 's_c']
7781 ['indole_c', 'ser__L_c']
7782 ['3ig3p_c']
7783 ['akg_c', 'trp__L_c']
7784 ['h_e', 'trp__L_e']
7785 ['ggdp_c', 'ipdp_c']
7786 ['achms_c', 'h2s_c']
7787 ['ala_B_c', 'atp_c', 'pant__R_c']
7788 ['g3p_c', 'gln__L_c', 'ru5p__D_c']
7789 ['2h24pdn_c', 'h2o_c']
7790 ['f420_2_c', 'g6p_c']
7791 ['g3p_c', 'gln__L_c', 'r5p_c']
7792 ['h2o_c', 'pa160_c']
7793 ['dhap_c', 'gln__L_c', 'ru5p__D_c']
7794 ['dhap_c', 'gln__L_c', 'r5p_c']
7795 ['uAgla_c', 'udcpp_c']
7796 ['ni2_e']
7797 ['uGgla_c', 'udcpp_c']
7798 ['udcpp_c', 'ugmda_c']
7799 ['gtp_c', 'lac__L_c']
7800 ['paps_c', 'trdrd_c']
7801 ['h_c', 'ocdca_c', 'tmlgnc_c', 'tre_c']
7802 ['atp_c', 'hco3_c', 'pyr_c']
7803 ['amet_c', 'pre4_c']
7804 ['amet_c', 'pre3b_c']
7805 ['amet_c', 'dscl_c']
7806 ['h_c', 'nadph_c', 'pre6a_c']
7807 ['amet_c', 'pre6b_c']
7808 ['h_c', 'pre8_c']
7809 ['camp_c', 'h2o_c']
7810 ['35cgmp_c', 'h2o_c']
7811 ['coa_c', 'eiscoa_c', 'fad_c', 'nad_c']
7812 ['coa_c', 'fad_

8074 ['adp_c', 'ppap_c']
8075 ['pi_c', 'ppcoa_c']
8076 ['agalfragund_c', 'decda_tb_c']
8077 ['btn_c']
8078 ['btn_e']
8079 ['gly_c', 'h2o_c', 'nad_c']
8080 ['hepdp_c', 'ipdp_c']
8081 ['hexdp_c', 'ipdp_c']
8082 ['ipdp_c', 'pendp_c']
8083 ['cit_e']
8084 ['cl_e']
8085 ['cm_e']
8086 ['cmp_e']
8087 ['co2_e']
8088 ['cobalt2_e']
8089 ['colipa_e']
8090 ['colipap_e']
8091 ['cpgn_e']
8092 ['cpgn_un_e']
8093 ['crn_e']
8094 ['crn__D_e']
8095 ['gbbtn_e']
8096 ['csn_e']
8097 ['gdp_e']
8098 ['cu_e']
8099 ['glc__D_e']
8100 ['glcn_e']
8101 ['cu2_e']
8102 ['glcr_e']
8103 ['glcur_e']
8104 ['4crsol_c']
8105 ['glcur1p_e']
8106 ['5drib_c']
8107 ['cyan_e']
8108 ['aacald_c']
8109 ['gln__L_e']
8110 ['glu__L_e']
8111 ['cynt_e']
8112 ['gly_e']
8113 ['amob_c']
8114 ['mththf_c']
8115 ['glyald_e']
8116 ['cys__D_e']
8117 ['glyb_e']
8118 ['glyc_e']
8119 ['glyc__R_e']
8120 ['glyc2p_e']
8121 ['glyc3p_e']
8122 ['glyclt_e']
8123 ['gmp_e']
8124 ['gsn_e']
8125 ['gthox_e']
8126 ['oxam_c']
8127 ['10fthf_c', '2dmmql8_c', '2fe2

8465 ['LalaDgluMdap_e']
8466 ['3gmp_e']
8467 ['3ump_e']
8468 ['3hdecACP_c']
8469 ['fuc__L_e']
8470 ['3hddecACP_c']
8471 ['3hcddec5eACP_c']
8472 ['3hmrsACP_c']
8473 ['3hcmrs7eACP_c']
8474 ['dopa_p', 'h2o_p', 'o2_p']
8475 ['4hoxpacd_e']
8476 ['h2o_c', 'phthr_c']
8477 ['LalaDgluMdapDala_c', 'h2o_c']
8478 ['LalaDgluMdapDala_p', 'h2o_p']
8479 ['3hpalmACP_c']
8480 ['LalaDgluMdapDala_p', 'atp_c', 'h2o_c']
8481 ['fum_e']
8482 ['LalaDgluMdapDala_e']
8483 ['5dglcn_e']
8484 ['3hcpalm9eACP_c']
8485 ['3hoctaACP_c']
8486 ['3hcvac11eACP_c']
8487 ['3haACP_c']
8488 ['dad_5_c', 'h2o_c']
8489 ['5mtr_e']
8490 ['fusa_e']
8491 ['5mtr_c', 'h_c']
8492 ['ru5p__D_c']
8493 ['ACP_c', 'atp_c', 'ttdca_c']
8494 ['ACP_c', 'atp_c', 'ttdcea_c']
8495 ['ACP_c', 'atp_c', 'hdca_c']
8496 ['3hhexACP_c']
8497 ['3hoctACP_c']
8498 ['3hcinnm_c', 'h_c', 'nadh_c', 'o2_c']
8499 ['3hpppn_c', 'h_c', 'nadh_c', 'o2_c']
8500 ['3hpp_e']
8501 ['3hpp_c', 'h_c']
8502 ['3dhguln_c', 'atp_c']
8503 ['3ump_p', 'h2o_p']
8504 ['ACP_c', 'atp_c', 'h

8782 ['arg__L_p', 'orn_c']
8783 ['argsuc_c']
8784 ['asp__L_c', 'atp_c', 'citr__L_c']
8785 ['arg__L_c', 'atp_c', 'trnaarg_c']
8786 ['aso3_c', 'atp_c', 'h2o_c']
8787 ['arg__L_p', 'atp_c', 'h2o_c']
8788 ['arg__L_c', 'h_p']
8789 ['aso3_e']
8790 ['arg__L_e']
8791 ['aspsa_c', 'nadp_c', 'pi_c']
8792 ['cbi_e', 'h_p']
8793 ['ascb6p_c', 'h2o_c']
8794 ['atp_c', 'cbi_p', 'h2o_c']
8795 ['asp__L_c', 'h_c']
8796 ['atp_c', 'cbl1_p', 'h2o_c']
8797 ['ascb__L_p', 'pep_c']
8798 ['ascb__L_e']
8799 ['cbl1_e', 'h_p']
8800 ['asn__L_c', 'h2o_c']
8801 ['asp__L_c', 'cbp_c']
8802 ['asn__L_p', 'h2o_p']
8803 ['asp__L_c', 'atp_c', 'gln__L_c', 'h2o_c']
8804 ['atp_c', 'cbl1_c', 'h_c']
8805 ['cbm_c', 'h_c']
8806 ['atp_c', 'co2_c', 'nh4_c']
8807 ['atp_c', 'gln__L_c', 'h2o_c', 'hco3_c']
8808 ['atp_c', 'cdg_c', 'nh4_c']
8809 ['atp_c', 'cd2_c', 'h2o_c']
8810 ['cd2_c', 'h_p']
8811 ['asp__L_c', 'atp_c', 'nh4_c']
8812 ['asn__L_c', 'atp_c', 'trnaasn_c']
8813 ['asn__L_p', 'atp_c', 'h2o_c']
8814 ['cd2_e']
8815 ['cd2_p']
8816 ['a

9095 ['h_c', 'nadh_c', 'toctd2eACP_c']
9096 ['doxrbcn_e']
9097 ['doxrbcn_p', 'h_p']
9098 ['ala_B_e']
9099 ['atp_c', 'dpcoa_c']
9100 ['2dhp_c', 'h_c', 'nadph_c']
9101 ['h_c', 'nadph_c', 'toctd2eACP_c']
9102 ['h_c', 'nadh_c', 't3c11vaceACP_c']
9103 ['h_c', 'nadph_c', 't3c11vaceACP_c']
9104 ['but2eACP_c', 'h_c', 'nadh_c']
9105 ['2dr5p_c']
9106 ['dsbard_p', 'q8_c']
9107 ['dsbard_p', 'mqn8_c']
9108 ['but2eACP_c', 'h_c', 'nadph_c']
9109 ['h_c', 'nadh_c', 'thex2eACP_c']
9110 ['dsbcox_p', 'gthrd_p']
9111 ['h_c', 'nadph_c', 'thex2eACP_c']
9112 ['betald_c', 'h2o_c', 'nad_c']
9113 ['h_c', 'nadh_c', 'toct2eACP_c']
9114 ['h_c', 'nadph_c', 'toct2eACP_c']
9115 ['atp_c', 'eca4colipa_p', 'h2o_c']
9116 ['unagamuf_p']
9117 ['eca2und_p', 'unagamuf_p']
9118 ['dsbdox_c', 'trdrd_c']
9119 ['dsbgox_p', 'gthrd_p']
9120 ['nadp_c', 'ser__D_c']
9121 ['h_p', 'ser__D_p']
9122 ['ser__D_e']
9123 ['tartr__D_c']
9124 ['atp_c', 'dtmp_c']
9125 ['eca3und_p', 'unagamuf_p']
9126 ['unagamuf_c']
9127 ['3hbcoa_c']
9128 ['3hhcoa

9409 ['glycogen_c']
9410 ['accoa_c', 'glc__D_c']
9411 ['glc__D_p', 'h2o_p', 'q8_c']
9412 ['glcn_p', 'h_p']
9413 ['glcn_e']
9414 ['glycogen_c', 'pi_c']
9415 ['bglycogen_c', 'pi_c']
9416 ['5dh4dglc_c']
9417 ['glcr_c']
9418 ['glcr_p', 'h_p']
9419 ['g3pe_p', 'h2o_p']
9420 ['g3ps_c', 'h2o_c']
9421 ['g3ps_p', 'h2o_p']
9422 ['g3pg_c', 'h2o_c']
9423 ['g3pg_p', 'h2o_p']
9424 ['glcr_e']
9425 ['adpglc_c']
9426 ['g3pi_c', 'h2o_c']
9427 ['icolipa_c', 'udpg_c']
9428 ['g3pi_p', 'h2o_p']
9429 ['glu__D_c']
9430 ['grdp_c', 'ipdp_c']
9431 ['grxox_c', 'gthrd_c']
9432 ['atp_c', 'gsn_c']
9433 ['glcur1p_e']
9434 ['glcur_p', 'h_p']
9435 ['glcur_e']
9436 ['atp_c', 'glc__D_p', 'h2o_c']
9437 ['glc__D_p', 'pep_c']
9438 ['glc__D_p', 'h_p']
9439 ['glc__D_e']
9440 ['glc__D_e']
9441 ['bglycogen_c']
9442 ['gsn_p', 'h_p']
9443 ['gsn_e']
9444 ['gtspmd_c', 'h2o_c']
9445 ['atp_c', 'gthrd_c', 'spmd_c']
9446 ['gthox_e']
9447 ['gthox_c', 'h_c', 'nadph_c']
9448 ['gthrd_c', 'h2o2_c']
9449 ['atp_c', 'g1p_c', 'h_c']
9450 ['atp_c

9729 ['malthx_e']
9730 ['2agpe141_c', 'h2o_c']
9731 ['2agpe160_c', 'h2o_c']
9732 ['maltpt_e']
9733 ['glx_c', 'h_c', 'nadph_c']
9734 ['malttr_e']
9735 ['maltttr_e']
9736 ['2agpe161_c', 'h2o_c']
9737 ['2agpe180_c', 'h2o_c']
9738 ['2agpe181_c', 'h2o_c']
9739 ['malt_p', 'pep_c']
9740 ['glyclt_p', 'h_p']
9741 ['malt_e']
9742 ['h_p', 'mal__L_p']
9743 ['h_p', 'mal__L_p']
9744 ['2agpg120_c', 'h2o_c']
9745 ['2agpg140_c', 'h2o_c']
9746 ['2agpg141_c', 'h2o_c']
9747 ['2agpg160_c', 'h2o_c']
9748 ['2agpg161_c', 'h2o_c']
9749 ['h_p', 'mal__L_c']
9750 ['mal__L_e']
9751 ['man6p_c']
9752 ['2agpg180_c', 'h2o_c']
9753 ['man6p_p', 'pi_c']
9754 ['man6p_e']
9755 ['mana_c', 'nad_c']
9756 ['manglyc_p', 'pep_c']
9757 ['2agpg181_c', 'h2o_c']
9758 ['nadp_c', 'ser__L_c']
9759 ['h_c', 'lys__L_c']
9760 ['atp_c', 'lys__L_c', 'trnalys_c']
9761 ['manglyc_e']
9762 ['h2o_c', 'man6pglyc_c']
9763 ['glyclt_p', 'na1_p']
9764 ['man_p', 'pep_c']
9765 ['man_e']
9766 ['2mcit_c']
9767 ['h2o_p', 'murein4px4px4p_p']
9768 ['mal__L_c

10180 ['ppcoa_c', 'succ_c']
10181 ['h2o_c', 'ppgpp_c']
10182 ['pep_c', 'skm5p_c']
10183 ['atp_c', 'ppi_c']
10184 ['h2o_p', 'pgp120_p']
10185 ['atp_c', 'pi_c']
10186 ['r1p_c']
10187 ['h_c', 'ps120_c']
10188 ['h_c', 'ps140_c']
10189 ['h_c', 'ps141_c']
10190 ['2dr1p_c']
10191 ['4ppan_c', 'ctp_c', 'cys__L_c']
10192 ['h2o_c', 'pgp140_c']
10193 ['nad_c', 'pphn_c']
10194 ['h_c', 'pphn_c']
10195 ['o2_c', 'pppg9_c']
10196 ['h_c', 'ps160_c']
10197 ['h_c', 'ps161_c']
10198 ['h_c', 'ps180_c']
10199 ['h_c', 'ps181_c']
10200 ['fum_c', 'pppg9_c']
10201 ['h_c', 'nadh_c', 'o2_c', 'pppn_c']
10202 ['h_p', 'pppn_p']
10203 ['pppn_e']
10204 ['atp_c', 'h2o_c', 'pyr_c']
10205 ['h2o_p', 'ppt_p']
10206 ['ppt_e']
10207 ['3php_c', 'glu__L_c']
10208 ['pser__L_e']
10209 ['h2o_c', 'pser__L_c']
10210 ['h2o_p', 'pser__L_p']
10211 ['cdpdddecg_c', 'ser__L_c']
10212 ['cdpdtdecg_c', 'ser__L_c']
10213 ['cdpdtdec7eg_c', 'ser__L_c']
10214 ['h2o_p', 'pgp140_p']
10215 ['cdpdhdecg_c', 'ser__L_c']
10216 ['h2o_c', 'pgp141_c']
102

10484 ['trp__L_e']
10485 ['2h3oppan_c', 'h_c', 'nadh_c']
10486 ['atp_c', 'h2o_c', 'tsul_p']
10487 ['tsul_e']
10488 ['ttdca_e']
10489 ['ttdcea_e']
10490 ['frdp_c', 'ipdp_c']
10491 ['h2o_p', 'udcpdp_p']
10492 ['udcpp_p']
10493 ['udpacgal_e']
10494 ['udpg_c']
10495 ['h2o_p', 'udpgal_p']
10496 ['udpgal_e']
10497 ['h2o_c', 'nad_c', 'udpg_c']
10498 ['nad_c', 'udpglcur_c']
10499 ['udpglcur_e']
10500 ['ttrcyc_e']
10501 ['h_p', 'ttrcyc_p']
10502 ['atp_c', 'h2o_c', 'tungs_p']
10503 ['tungs_e']
10504 ['tym_e']
10505 ['h2o_p', 'udpg_p']
10506 ['udpg_e']
10507 ['amet_c', 'nadph_c', 'tyr__L_c']
10508 ['glu__L_c', 'udpLa4o_c']
10509 ['h2o_p', 'o2_p', 'tym_p']
10510 ['h2o_p', 'udpglcur_p']
10511 ['pheme_p']
10512 ['gal1p_c', 'udpg_c']
10513 ['h2o_c', 'h_c', 'urdglyc_c']
10514 ['alaala_c', 'atp_c', 'ugmd_c']
10515 ['h2o_c', 'u3aga_c']
10516 ['10fthf_c', 'udpLa4n_c']
10517 ['uLa4n_c']
10518 ['h2o_p', 'tyrp_p']
10519 ['tyrp_e']
10520 ['urea_e']
10521 ['urea_p']
10522 ['h2o_c', 'o2_c', 'urate_c']
10523 ['

In [37]:
model_df = model_df.loc[model_df["complete"]]
model_df.reset_index(inplace = True, drop = True)

model_df

Unnamed: 0,BiGG ID,substrates,products,substrate KEGG CIDs,product KEGG CIDs,substrate CIDs,product CIDs,complete
0,EX_doxrbcn_e,[doxrbcn_e],[],[C01661],,[C01661],[],True
1,EX_dtmp_e,[dtmp_e],[],[C00364],,[C00364],[],True
2,EX_dump_e,[dump_e],[],[C00365],,[C00365],[],True
3,EX_duri_e,[duri_e],[],[C00526],,[C00526],[],True
4,EX_eca4colipa_e,[eca4colipa_e],[],,,[MNXM91786],[],True
...,...,...,...,...,...,...,...,...
9929,DMALRED,"[fad_c, mal__L_c]","[fadh2_c, oaa_c]","[C00016, C00149]","[C01352, C00036]","[C00016, C00149]","[C01352, C00036]",True
9930,AMMQT8_2,"[2dmmq8_c, amet_c]","[ahcys_c, h_c, mqn8_c]",[C00019],"[C00021, C00080]","[MNXM2178, C00019]","[C00021, C00080, MNXM509]",True
9931,CELLBpts_1,"[cellb_e, pep_c]","[6pgg_c, pyr_p]","[C00185, C06421, C06422, C00074]",[C00022],"[C00185, C00074]","[MNXM147389, C00022]",True
9932,FFSD,"[h2o_c, suc6p_c]","[fru_c, g6p_c]","[C00001, C01328, C16688, C02591]","[C01496, C05003, C00095, C10906, C00092]","[C00001, C16688]","[C01496, C00092]",True


#### (c) If no KEGG ID is available for a metabolite, but a MetaNetX ID, we download an InChI string using the MetaNetX ID:

Creating a list of all MetaNetX IDs and downloading the InChiCodes for them:

In [38]:
MNX_IDs = []
for ind in model_df.index:
    if model_df["complete"][ind]:
        metabolites = model_df["substrate CIDs"][ind] + model_df["product CIDs"][ind]
        if metabolites[0][0] == "M":
            MNX_IDs = MNX_IDs + metabolites
            
f = open(join("..", "..", "..", "data", "BiGG_data", "MNX_IDs.txt"), "w") 
for ID in list(set(MNX_IDs)):
    f.write(str(ID) + "\n")
f.close()

Mapping MNX IDs to InChI strings with a MetaNetX ID database downloaded from here: https://www.metanetx.org/mnxdoc/mnxref.html

In [39]:
df_MNX = pd.read_csv(join("..", "..", "..", "data", "BiGG_data", "chem_prop.tsv"), sep = "\t")
df_MNX.index = df_MNX["#ID"]
df_MNX.head()

Unnamed: 0_level_0,#ID,name,reference,formula,charge,mass,InChI,InChIKey,SMILES
#ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
BIOMASS,BIOMASS,BIOMASS,mnx:BIOMASS,,,,,,
MNXM01,MNXM01,PMF,mnx:PMF,H,1.0,1.00794,InChI=1S/p+1,InChIKey=GPRLSGONYQIRFK-UHFFFAOYSA-N,[H+]
MNXM02,MNXM02,OH(-),mnx:HYDROXYDE,H,-1.0,17.00734,InChI=1S/H2O/h1H2/p-1,InChIKey=XLYOFNOQVPJJNP-UHFFFAOYSA-M,[O-][H]
MNXM03,MNXM03,H3O(+),mnx:OXONIUM,H3O,1.0,19.02322,InChI=1S/H2O/h1H2/p+1,InChIKey=XLYOFNOQVPJJNP-UHFFFAOYSA-O,[OH3+]
MNXM1,MNXM1,H(+),mnx:PROTON,H,1.0,1.00794,InChI=1S/p+1,InChIKey=GPRLSGONYQIRFK-UHFFFAOYSA-N,[H+]


In [40]:
for ind in model_df.index:
    substrates, products = model_df["substrate CIDs"][ind], model_df["product CIDs"][ind]
    for i, sub in enumerate(substrates):
        if sub[0] == "M":
            try:
                InChI = df_MNX["InChI"].loc[sub]
            except KeyError: 
                InChI = np.nan
            substrates[i] = InChI
            if pd.isnull(InChI):
                 model_df["complete"][ind] = False
    for i, pro in enumerate(products):
        if pro[0] == "M":
            try:
                InChI = df_MNX["InChI"].loc[pro]
            except KeyError: 
                InChI = np.nan
            products[i] = InChI
            if pd.isnull(InChI):
                 model_df["complete"][ind] = False        

In [41]:
model_df = model_df.loc[model_df["complete"]]
model_df.reset_index(inplace = True, drop = True)

model_df

Unnamed: 0,BiGG ID,substrates,products,substrate KEGG CIDs,product KEGG CIDs,substrate CIDs,product CIDs,complete
0,EX_doxrbcn_e,[doxrbcn_e],[],[C01661],,[C01661],[],True
1,EX_dtmp_e,[dtmp_e],[],[C00364],,[C00364],[],True
2,EX_dump_e,[dump_e],[],[C00365],,[C00365],[],True
3,EX_duri_e,[duri_e],[],[C00526],,[C00526],[],True
4,EX_enlipa_e,[enlipa_e],[],[C21173],,[C21173],[],True
...,...,...,...,...,...,...,...,...
7691,G3PCT,"[ctp_c, glyc3p_c, h_c]","[cdpglyc_c, ppi_c]","[C00063, C00093, C00080]","[C00513, C00013]","[C00063, C00093, C00080]","[C00513, C00013]",True
7692,DMALRED,"[fad_c, mal__L_c]","[fadh2_c, oaa_c]","[C00016, C00149]","[C01352, C00036]","[C00016, C00149]","[C01352, C00036]",True
7693,AMMQT8_2,"[2dmmq8_c, amet_c]","[ahcys_c, h_c, mqn8_c]",[C00019],"[C00021, C00080]",[InChI=1S/C50H70O2/c1-38(2)19-12-20-39(3)21-13...,"[C00021, C00080, InChI=1S/C51H72O2/c1-38(2)20-...",True
7694,FFSD,"[h2o_c, suc6p_c]","[fru_c, g6p_c]","[C00001, C01328, C16688, C02591]","[C01496, C05003, C00095, C10906, C00092]","[C00001, C16688]","[C01496, C00092]",True


#### (d) Adding backward directions for all reactions:

In [42]:
for ind in model_df.index:
    [substrates, products, sub_CIDs, pro_CIDs] = [model_df["substrates"][ind], model_df["products"][ind],
                                                  model_df["substrate CIDs"][ind], model_df["product CIDs"][ind]]
    model_df = model_df.append(model_df.loc[ind], ignore_index = True)
    ind2 = list(model_df.index)[-1]
    model_df["substrates"][ind2], model_df["products"][ind2] = products, substrates
    model_df["substrate CIDs"][ind2], model_df["product CIDs"][ind2] = pro_CIDs, sub_CIDs
    model_df["BiGG ID"][ind2] = model_df["BiGG ID"][ind] + "_r"

## 2. Calculation reaction fingerprints for all reactions in BiGG:

In [44]:
mol_folder = join("..", "..", "..", "data", "metabolite_data", "mol-files")
def get_reaction_site_smarts(metabolites):
    reaction_site = ""
    for met in metabolites:
        if met[0] == "C":
            KEGG_ID = met
            try:
                Smarts = Chem.MolToSmarts(Chem.MolFromMolFile(join(mol_folder,  KEGG_ID + '.mol')))
            except:
                raise TypeError
                
        elif met[0] == "I":
            mol = Chem.inchi.MolFromInchi(met)
            if mol is not None:
                Smarts = Chem.MolToSmarts(mol)
            else:
                raise TypeError
        reaction_site = reaction_site + "." + Smarts
    return(reaction_site[1:])

def convert_fp_to_array(difference_fp_dict):
    fp = np.zeros(2048)
    for key in difference_fp_dict.keys():
        fp[key] = difference_fp_dict[key]
    return(fp)

In [47]:
model_df["structural_fp"] = ""
model_df["difference_fp"] = ""

for ind in model_df.index:
    if model_df["complete"][ind]:
        try:
            substrates = model_df["substrate CIDs"][ind]
            products = model_df["product CIDs"][ind]
            left_site = get_reaction_site_smarts(substrates)
            right_site = get_reaction_site_smarts(products)

            rxn_forward = AllChem.ReactionFromSmarts(left_site + ">>" + right_site)

            difference_fp = Chem.rdChemReactions.CreateDifferenceFingerprintForReaction(rxn_forward)
            difference_fp = convert_fp_to_array(difference_fp.GetNonzeroElements())
            structural_fp = Chem.rdChemReactions.CreateStructuralFingerprintForReaction(rxn_forward).ToBitString()

            model_df["structural_fp"][ind] = structural_fp[:3276]
            model_df["difference_fp"][ind] = difference_fp
        except TypeError: 
            pass

In [48]:
model_df.to_pickle(join("..", "..", "..", "data", "BiGG_data",
                             "BiGG_models_with_fingerprints.pkl"))

## 3. Mapping our data point to BiGG reactions via reaction fingerprints:¶

In [49]:
model_df = pd.read_pickle(join("..", "..", "..", "data", "BiGG_data",
                             "BiGG_models_with_fingerprints.pkl"))
model_df.head()

Unnamed: 0,BiGG ID,substrates,products,substrate KEGG CIDs,product KEGG CIDs,substrate CIDs,product CIDs,complete,structural_fp,difference_fp
0,EX_doxrbcn_e,[doxrbcn_e],[],[C01661],,[C01661],[],True,1100010100000101101100110110000001000001111000...,"[0.0, 0.0, 0.0, 0.0, -80.0, 0.0, 0.0, 0.0, 0.0..."
1,EX_dtmp_e,[dtmp_e],[],[C00364],,[C00364],[],True,1100100100000000000000000010000001000001111000...,"[0.0, 0.0, 0.0, 0.0, -10.0, 0.0, 0.0, 0.0, 0.0..."
2,EX_dump_e,[dump_e],[],[C00365],,[C00365],[],True,1100100100000000000000000010000001000001111000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,EX_duri_e,[duri_e],[],[C00526],,[C00526],[],True,1100100000000000000000000010000001000001001000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,EX_enlipa_e,[enlipa_e],[],[C21173],,[C21173],[],True,1110110111100011101001110110011011100101111111...,"[0.0, 0.0, 0.0, 0.0, 0.0, -400.0, -290.0, 0.0,..."


In [50]:
data_train = pd.read_pickle(join("..", "..", "..", "data", "kcat_data", "splits", "train_df_kcat.pkl"))
data_test = pd.read_pickle(join("..", "..", "..", "data", "kcat_data", "splits", "test_df_kcat.pkl"))

data_train["Uniprot ID"] = [UID[0] for UID in data_train["Uniprot IDs"]]
data_test["Uniprot ID"] = [UID[0] for UID in data_test["Uniprot IDs"]]

data_train.rename(columns = {"geomean_kcat" :"log10_kcat"}, inplace = True)
data_test.rename(columns = {"geomean_kcat" :"log10_kcat"}, inplace = True)

df_kcat = pd.concat([data_train, data_test], ignore_index=True)

#### (a) Looking for exact matches via the structural reaction fingerprint:

In [51]:
df_kcat["BiGG acc"] = np.nan
df_kcat["BiGG ID"] = np.nan

for ind in df_kcat.index:
    FP = df_kcat["structural_fp"][ind][:3276]
    if FP != "":
        help_df = model_df.loc[model_df["structural_fp"]  == FP]
        if len(help_df) > 0:
            df_kcat["BiGG ID"][ind] = list(help_df["BiGG ID"])[0]
            df_kcat["BiGG acc"][ind] = 1.0

In [52]:
len(df_kcat.loc[~pd.isnull(df_kcat["BiGG ID"])])

517

#### (b) For thhose reactions that couldn't be mapped yet: Looking for similar reactions:

In [53]:
import time
from rdkit import DataStructs

model_df = model_df.loc[model_df["structural_fp"] != ""]
model_df.reset_index(drop = True, inplace = True)
FP_list = list(model_df["structural_fp"])

df_kcat = df_kcat.loc[df_kcat["structural_fp"] != ""]

for ind in df_kcat.index:
    if pd.isnull(df_kcat["BiGG ID"][ind]):
        scores = np.zeros(len(FP_list))
        fp1 = df_kcat["structural_fp"][ind]
        fp1 = DataStructs.cDataStructs.CreateFromBitString(fp1[:3276])
        for i, fp in enumerate(FP_list):
            fp2 =  DataStructs.cDataStructs.CreateFromBitString(fp[:3276])
            scores[i] = DataStructs.cDataStructs.TanimotoSimilarity(fp1, fp2)

        max_i = np.argmax(scores)
        df_kcat["BiGG ID"][ind] = model_df["BiGG ID"][max_i]
        df_kcat["BiGG acc"][ind] = scores[max_i]
        print(ind, scores[max_i])

    if ind % 500 == 0:
        print(ind)
        time.sleep(1)
        df_kcat.to_pickle(join("..", "..", "..", "data", "kcat_data",
                         "df_kcat_with_BiGG_IDs.pkl"))

0 0.9825418994413407
0
1 0.8601769911504424
2 0.900990099009901
3 0.8942598187311178
4 0.9660377358490566
5 0.5872340425531914
6 0.9866666666666667
7 0.9512635379061372
8 0.6005089058524173
9 0.9790209790209791
11 0.8089005235602095
12 0.7435440783615316
13 0.9035486806187443
14 0.9322709163346613
17 0.9952830188679245
18 0.9762071378586424
19 0.9692982456140351
20 0.6164383561643836
21 0.979108635097493
22 0.8409610983981693
23 0.9815770081061165
24 0.9810126582278481
25 0.9824314827828531
26 0.9662802950474183
28 0.8562367864693446
29 0.8667287977632805
30 0.9817518248175182
32 0.9806496199032481
33 0.7736318407960199
34 0.6270270270270271
35 0.6890756302521008
36 0.8825
37 0.780241935483871
38 0.8446490218642118
39 0.8644222020018199
40 0.8443496801705757
41 0.8077858880778589
42 0.7363636363636363
43 0.981263011797363
44 0.9931506849315068
45 0.9567099567099567
46 0.8839907192575406
47 0.8571428571428571
48 0.9952830188679245
49 0.9990412272291467
50 0.8568075117370892
51 0.9550949

409 0.9806818181818182
410 0.9322709163346613
411 0.8877551020408163
413 0.9817287420941673
415 0.8883647798742138
416 0.9007407407407407
417 0.9323467230443975
418 0.6628131021194605
419 0.9834413246940245
420 0.9778434268833087
421 0.9005847953216374
422 0.7020506634499397
423 0.9650943396226415
425 0.6431159420289855
426 0.8440207972270364
427 0.9371980676328503
428 0.9849765258215962
429 0.9765739385065886
430 0.8394648829431438
431 0.9418604651162791
432 0.8413705583756346
434 0.87215411558669
436 0.9258373205741627
437 0.7828877005347593
438 0.7391304347826086
439 0.9766803840877915
440 0.9592822636300897
442 0.9612565445026178
443 0.5758754863813229
445 0.9847539847539848
446 0.8568075117370892
447 0.8565121412803532
448 0.6923076923076923
449 0.9753086419753086
450 0.8698224852071006
451 0.9445531637312459
452 0.9757412398921833
454 0.8341625207296849
455 0.8070722828913156
456 0.8943820224719101
457 0.8059701492537313
458 0.7736318407960199
459 0.9854267869535045
460 0.9817251

814 0.8924731182795699
815 0.9990654205607477
816 0.9575757575757575
817 0.9603960396039604
819 0.7937685459940653
821 0.983927323549965
822 0.9670250896057347
823 0.7153284671532847
824 0.8620689655172413
825 0.9751958224543081
826 0.7771428571428571
827 0.734520780322307
828 0.8133333333333334
829 0.8746113989637305
830 0.9503610108303249
832 0.8642384105960265
834 0.9745942131263232
836 0.9895052473763118
839 0.9273743016759777
841 0.9841488628532047
842 0.898538961038961
843 0.998019801980198
844 0.8894806924101198
845 0.6746987951807228
846 0.8620689655172413
847 0.7049731182795699
848 0.9796067006554989
850 0.9654943596549436
851 0.9695501730103806
852 0.9771891096394407
853 0.9978237214363439
854 0.9824191279887482
855 0.6911111111111111
856 0.7560975609756098
858 0.9501246882793017
859 0.7342799188640974
860 0.9663461538461539
861 0.9817287420941673
862 0.83
864 0.6737588652482269
865 0.9596662030598053
866 0.9437722419928826
867 0.9391849529780564
868 0.9593062041360907
869 0.

1233 0.8757267441860465
1234 0.9825418994413407
1236 0.9789473684210527
1237 0.8870056497175142
1240 0.8926605504587156
1241 0.875
1242 0.9824314827828531
1243 0.9824314827828531
1244 0.9623915139826422
1245 0.960401891252955
1248 0.9441984056687334
1249 0.6034031413612565
1250 0.7420634920634921
1251 0.665943600867679
1252 0.9926470588235294
1253 0.9507829977628636
1254 0.9286775631500743
1256 0.9273291925465839
1257 0.9817799579537492
1258 0.775623268698061
1260 0.7388743455497382
1261 0.8046332046332046
1262 0.7552301255230126
1263 0.9397192402972749
1264 0.8881789137380192
1265 0.9574898785425101
1266 0.6421499292786421
1267 0.7770961145194274
1268 0.7923728813559322
1270 0.9750346740638003
1271 0.9409547738693468
1272 0.834307992202729
1273 0.7865497076023392
1274 0.9494949494949495
1275 0.8752293577981651
1276 0.9606227106227107
1277 0.972834067547724
1278 0.7126213592233009
1279 0.982468443197756
1280 0.6865284974093264
1281 0.9774545454545455
1282 0.9175257731958762
1283 0.9301

1633 0.93048128342246
1634 0.9778434268833087
1635 0.9036827195467422
1636 0.4827586206896552
1637 0.8450134770889488
1638 0.8746113989637305
1640 0.9966666666666667
1641 0.7555555555555555
1643 0.9252252252252252
1644 0.7045751633986929
1645 0.9212050984936269
1646 0.7617021276595745
1647 0.8475046210720887
1649 0.7779783393501805
1650 0.9561586638830898
1651 0.7521246458923513
1652 0.9743024963289281
1654 0.8828633405639913
1656 0.9853146853146854
1657 0.9130434782608695
1658 0.9733237202595529
1659 0.973457199734572
1660 0.778169014084507
1661 0.9387527839643652
1662 0.8586142322097379
1663 0.7460992907801418
1664 0.7450980392156863
1665 0.8675373134328358
1666 0.9692164179104478
1669 0.6509186351706037
1670 0.9640330188679245
1672 0.9640371229698376
1673 0.9538572458543619
1674 0.9848053181386515
1675 0.9775753328661527
1676 0.9990291262135922
1677 0.9982425307557118
1678 0.9817251461988304
1680 0.5872340425531914
1681 0.9038461538461539
1682 0.9753086419753086
1683 0.9701818181818

2032 0.983927323549965
2033 0.9817784256559767
2034 0.9751824817518249
2035 0.9778434268833087
2036 0.7957957957957958
2040 0.8960843373493976
2041 0.8929765886287625
2043 0.9800796812749004
2044 0.9990272373540856
2045 0.7873417721518987
2046 0.8835470085470085
2047 0.857025472473295
2048 0.9597069597069597
2049 0.8835470085470085
2050 0.9145129224652088
2052 0.6828543111992071
2053 0.7568134171907757
2054 0.8874538745387454
2055 0.9901380670611439
2056 0.9212050984936269
2057 0.7507645259938838
2058 0.7833935018050542
2059 0.700990099009901
2060 0.9672929714683368
2061 0.6389324960753532
2062 0.8752293577981651
2063 0.910958904109589
2064 0.9990224828934506
2065 0.8888888888888888
2066 0.7973684210526316
2067 0.7752293577981652
2069 0.5872340425531914
2070 0.978067169294037
2071 0.9914285714285714
2072 0.9703652653342523
2073 0.9931506849315068
2074 0.978067169294037
2075 0.9830188679245283
2076 0.8866090712742981
2077 0.76
2078 0.6758474576271186
2079 0.725609756097561
2080 0.948550

2416 0.9408121128699243
2417 0.8865619546247818
2418 0.9755859375
2419 0.9825418994413407
2420 0.9812138728323699
2421 0.9574007220216606
2422 0.9832402234636871
2423 0.9256578947368421
2425 0.7471264367816092
2426 0.67472240365774
2427 0.9494949494949495
2428 0.9774545454545455
2429 0.9026217228464419
2430 0.6851683348498635
2431 0.9778434268833087
2432 0.8609756097560975
2433 0.9817518248175182
2434 0.9695290858725761
2435 0.8695652173913043
2436 0.6751269035532995
2438 0.8568075117370892
2439 0.8403587443946189
2441 0.9853146853146854
2442 0.9466357308584686
2443 0.8701421800947867
2444 0.9922480620155039
2445 0.9634308510638298
2446 0.6427870461236507
2447 0.980188679245283
2448 0.9369127516778524
2449 0.9543726235741445
2450 0.9006734006734006
2451 0.9778270509977827
2452 0.848882035466461
2454 0.7974167233174712
2457 0.9097222222222222
2458 0.8722438391699092
2459 0.8077922077922078
2460 0.9087136929460581
2461 0.9763837638376384
2462 0.9138257575757576
2463 0.8177676537585421
24

2801 0.8875
2802 0.9555555555555556
2803 0.9286775631500743
2804 0.6081504702194357
2805 0.9817287420941673
2806 0.9854469854469855
2808 0.9188626907073509
2809 0.8484335309060118
2810 0.9087136929460581
2811 0.852589641434263
2812 0.9730305180979418
2813 0.908256880733945
2815 0.7262723521320495
2816 0.9463917525773196
2817 0.9347079037800687
2818 0.9951267056530214
2819 0.9786552828175027
2820 0.9286775631500743
2821 0.9674289674289674
2823 0.8560606060606061
2824 0.9825454545454545
2825 0.8568075117370892
2826 0.9876712328767123
2828 0.9754689754689755
2829 0.9751412429378531
2830 0.9786764705882353
2831 0.9875827814569537
2832 0.8840864440078585
2833 0.9818313953488372
2834 0.9767441860465116
2835 0.9874686716791979
2837 0.6056603773584905
2838 0.7590361445783133
2839 0.958092485549133
2840 0.6327372764786795
2841 0.8888888888888888
2842 0.8047337278106509
2843 0.8634920634920635
2844 0.975531914893617
2845 0.9586056644880174
2846 0.9781500364166059
2847 0.8826923076923077
2848 0.8

3194 0.7099056603773585
3195 0.9448173005219985
3196 0.7164404223227753
3197 0.9051094890510949
3199 0.9695079695079695
3200 0.933649289099526
3201 0.9369127516778524
3202 0.9083094555873925
3203 0.771505376344086
3204 0.8897058823529411
3205 0.7973684210526316
3206 0.9409547738693468
3207 0.7298728813559322
3208 0.8977635782747604
3209 0.9534662867996201
3210 0.767175572519084
3212 0.9749631811487481
3213 0.9776422764227642
3215 0.9640387275242047
3216 0.9917355371900827
3217 0.7514792899408284
3218 0.8064516129032258
3219 0.9566130160951715
3220 0.9785025945144552
3222 0.9267241379310345
3223 0.9132569558101473
3224 0.8568075117370892
3225 0.9989059080962801
3226 0.9325432999088423
3227 0.925764192139738
3228 0.979663394109397
3231 0.9672131147540983
3232 0.9674149167270094
3233 0.9962335216572504
3234 0.8891013384321224
3235 0.6952887537993921
3236 0.8601769911504424
3237 0.9807834441980784
3238 0.9780018331805683
3240 0.7148014440433214
3241 0.9524714828897338
3243 0.89130434782608

3598 0.9294425087108014
3599 0.9824314827828531
3600 0.9494949494949495
3601 0.7973684210526316
3603 0.648936170212766
3604 0.9600347523892268
3605 0.7323943661971831
3607 0.988795518207283
3608 0.8922413793103449
3609 0.9640387275242047
3611 0.8824306472919419
3612 0.7516778523489933
3613 0.9817799579537492
3614 0.9778434268833087
3615 0.9753086419753086
3617 0.8032659409020217
3618 0.7352941176470589
3619 0.7865497076023392
3620 0.910010111223458
3621 0.937137330754352
3623 0.9133663366336634
3624 0.9273504273504274
3625 0.8736842105263158
3626 0.8363636363636363
3629 0.8476190476190476
3630 0.9470712773465068
3631 0.9854166666666667
3632 0.9723207948899929
3633 0.9992181391712275
3634 0.9483344663494222
3635 0.9808541973490427
3636 0.8949044585987261
3637 0.9990281827016521
3638 0.9051987767584098
3639 0.961352657004831
3640 0.9941690962099126
3641 0.9594868332207968
3642 0.701098901098901
3643 0.9743024963289281
3644 0.8232373386295928
3645 0.9810126582278481
3646 0.968321013727560

3995 0.9775815217391305
3996 0.9770200148257969
3997 0.9808259587020649
3998 0.7480519480519481
3999 0.6931659693165969
4000 0.6333973128598849
4000
4001 0.9785763648928818
4002 0.9755244755244755
4003 0.9758694109297374
4004 0.9817799579537492
4005 0.904639175257732
4006 0.9690444145356663
4007 0.9138755980861244
4008 0.8484018264840183
4009 0.9778434268833087
4010 0.937137330754352
4011 0.978067169294037
4012 0.9770992366412213
4013 0.7428571428571429
4014 0.9615384615384616
4015 0.925764192139738
4016 0.9368505195843325
4017 0.5872340425531914
4020 0.9463364293085655
4021 0.8443496801705757
4022 0.7973684210526316
4023 0.9002541296060991
4025 0.9824314827828531
4026 0.9596662030598053
4027 0.9083094555873925
4028 0.9762071378586424
4029 0.7296538821328344
4030 0.9759377211606511
4031 0.8232373386295928
4032 0.87322695035461
4033 0.9771210676835081
4034 0.7727272727272727
4035 0.7793696275071633
4036 0.9063670411985019
4038 0.9722972972972973
4039 0.9796806966618288
4040 0.8739205526

In [54]:
df_kcat.to_pickle(join("..", "..", "..", "data", "kcat_data",
                         "df_kcat_with_BiGG_IDs.pkl"))