In [8]:
import pandas as pd
import os
from os.path import join
import matplotlib.pyplot as plt
import numpy as np
import json
from rdkit import Chem
from rdkit.Chem import AllChem
#from data_preprocessing import *
from BiGG_functions import *

import warnings
warnings.filterwarnings("ignore")

### 1. Creating a DataFrame with all reactions and its metabolite IDs from the universal model of BiGG:

#### (a) Creating DataFrame with all reactions from BiGG:

In [3]:
model_path = join("..", "..", "..", "data", "BiGG_data", "universal_model.json")
model_df = create_metabolic_model_df(model_path = model_path, model_name = "universal_model")

#### (b) Mapping Compound IDs for substrates and products for to all reactions:

In [3]:
metabolites_df = pd.DataFrame(columns = {"ID", "KEGG ID", "CHEBI ID", "MNX ID", 'InChI Key'})
with open(model_path) as json_file:
        model = json.load(json_file)
model = model["metabolites"]

for metabolite in model:
    metabolites_df = metabolites_df.append({"ID" : metabolite["id"] ,
                                            "KEGG ID" : find_kegg_id(annotation_list =metabolite["annotation"]),
                                           "CHEBI ID" : find_chebi_id(annotation_list =metabolite["annotation"]),
                                           "MNX ID": find_mnx_id(annotation_list = metabolite["annotation"]),
                                           'InChI Key': find_inchi_key(annotation_list =metabolite["annotation"])},
                                           ignore_index = True)
metabolites_df.index = metabolites_df["ID"]

In [4]:
model_df = adding_CIDS_to_model_df(model_df, model_path = model_path, metabolites_df = metabolites_df)

Removing all reactions for which we do not hav an ID for every substrate and every product:

In [6]:
model_df = model_df.loc[model_df["complete"]]
model_df.reset_index(inplace = True, drop = True)

model_df

Unnamed: 0,BiGG ID,substrates,products,substrate CIDs,product CIDs,complete
0,DM_4crsol_c,[4crsol_c],[],[C01468],[],True
1,DM_aacald_c,[aacald_c],[],[C06735],[],True
2,DM_amob_c,[amob_c],[],[C04425],[],True
3,BIOMASS_Ec_iJO1366_core_53p95M,"[10fthf_c, 2fe2s_c, 2ohph_c, 4fe4s_c, ala__L_c...","[adp_c, h_c, pi_c, ppi_c]","[C00234, MNXM151647, C05811, MNXM37766, C00041...","[C00008, C00080, C00009, C00013]",True
4,EX_12ppd__S_e,[12ppd__S_e],[],[C02917],[],True
...,...,...,...,...,...,...
18655,23dhbt4pp,"[23dhb_p, h_p]","[23dhb_c, h_c]","[C00196, C00080]","[C00196, C00080]",True
18656,PENAM_1,"[h2o_p, peng_p]","[6apa_p, h_p, pac_p]","[C00001, C05551]","[C02954, C00080, C00548]",True
18657,URCN_2,[urcan_c],[4izp_c],[C00785],[C03680],True
18658,GALAMptspp,"[galam_p, h_c, pep_c]","[galam6p_c, pyr_c]","[MNXM147460, C00080, C00074]","[C06377, C00022]",True


#### (c) If no KEGG ID is available for a metabolite, but a MetaNetX ID, we download an InChI string using the MetaNetX ID:

Creating a list of all MetaNetX IDs and downloading the InChiCodes for them:

In [9]:
MNX_IDs = []
for ind in model_df.index:
    if model_df["complete"][ind]:
        metabolites = model_df["substrate CIDs"][ind] + model_df["product CIDs"][ind]
        if metabolites[0][0] == "M":
            MNX_IDs = MNX_IDs + metabolites
            
f = open(join("..", "..", "..", "data", "BiGG_data", "MNX_IDs.txt"), "w") 
for ID in list(set(MNX_IDs)):
    f.write(str(ID) + "\n")
f.close()

Mapping MNX IDs to InChI strings with a MetaNetX ID database downloaded from here: https://www.metanetx.org/mnxdoc/mnxref.html

In [20]:
df_MNX = pd.read_csv(join("..", "..", "..", "data", "BiGG_data", "chem_prop.tsv"), sep = "\t")
df_MNX.index = df_MNX["#ID"]
df_MNX.head()

Unnamed: 0_level_0,#ID,name,reference,formula,charge,mass,InChI,InChIKey,SMILES
#ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
BIOMASS,BIOMASS,BIOMASS,mnx:BIOMASS,,,,,,
MNXM01,MNXM01,PMF,mnx:PMF,H,1.0,1.00794,InChI=1S/p+1,InChIKey=GPRLSGONYQIRFK-UHFFFAOYSA-N,[H+]
MNXM02,MNXM02,OH(-),mnx:HYDROXYDE,H,-1.0,17.00734,InChI=1S/H2O/h1H2/p-1,InChIKey=XLYOFNOQVPJJNP-UHFFFAOYSA-M,[O-][H]
MNXM03,MNXM03,H3O(+),mnx:OXONIUM,H3O,1.0,19.02322,InChI=1S/H2O/h1H2/p+1,InChIKey=XLYOFNOQVPJJNP-UHFFFAOYSA-O,[OH3+]
MNXM1,MNXM1,H(+),mnx:PROTON,H,1.0,1.00794,InChI=1S/p+1,InChIKey=GPRLSGONYQIRFK-UHFFFAOYSA-N,[H+]


In [35]:
for ind in model_df.index:
    substrates, products = model_df["substrate CIDs"][ind], model_df["product CIDs"][ind]
    for i, sub in enumerate(substrates):
        if sub[0] == "M":
            try:
                InChI = df_MNX["InChI"].loc[sub]
            except KeyError: 
                InChI = np.nan
            substrates[i] = InChI
            if pd.isnull(InChI):
                 model_df["complete"][ind] = False
    for i, pro in enumerate(products):
        if pro[0] == "M":
            try:
                InChI = df_MNX["InChI"].loc[pro]
            except KeyError: 
                InChI = np.nan
            products[i] = InChI
            if pd.isnull(InChI):
                 model_df["complete"][ind] = False        

In [36]:
model_df = model_df.loc[model_df["complete"]]
model_df.reset_index(inplace = True, drop = True)

model_df

Unnamed: 0,BiGG ID,substrates,products,substrate CIDs,product CIDs,complete
0,DM_4crsol_c,[4crsol_c],[],[C01468],[],True
1,DM_aacald_c,[aacald_c],[],[C06735],[],True
2,DM_amob_c,[amob_c],[],[C04425],[],True
3,EX_12ppd__S_e,[12ppd__S_e],[],[C02917],[],True
4,EX_15dap_e,[15dap_e],[],[C01672],[],True
...,...,...,...,...,...,...
10718,23dhbtex,[23dhb_e],[23dhb_p],[C00196],[C00196],True
10719,EX_23dhb_e,[23dhb_e],[],[C00196],[],True
10720,23dhbt4pp,"[23dhb_p, h_p]","[23dhb_c, h_c]","[C00196, C00080]","[C00196, C00080]",True
10721,PENAM_1,"[h2o_p, peng_p]","[6apa_p, h_p, pac_p]","[C00001, C05551]","[C02954, C00080, C00548]",True


#### (d) Adding backward directions for all reactions:

In [38]:
for ind in model_df.index:
    [substrates, products, sub_CIDs, pro_CIDs] = [model_df["substrates"][ind], model_df["products"][ind],
                                                  model_df["substrate CIDs"][ind], model_df["product CIDs"][ind]]
    model_df = model_df.append(model_df.loc[ind], ignore_index = True)
    ind2 = list(model_df.index)[-1]
    model_df["substrates"][ind2], model_df["products"][ind2] = products, substrates
    model_df["substrate CIDs"][ind2], model_df["product CIDs"][ind2] = pro_CIDs, sub_CIDs
    model_df["BiGG ID"][ind2] = model_df["BiGG ID"][ind] + "_r"

In [5]:
df_MNX = pd.read_pickle(join("..", "..", "..", "data", "BiGG_data", "df_MNX.pkl"))
model_df = pd.read_pickle(join("..", "..", "..", "data", "BiGG_data", "BiGG_model_reaction_information.pkl"))
model_df = model_df.loc[model_df["complete"]]
model_df.drop(columns = ["KEGG IDs", "ECs"], inplace = True)
model_df.rename(columns = {"substrate KEGG CIDs" : "substrate CIDs",
                           "product KEGG CIDs" : "product CIDs"}, inplace = True)
model_df

Unnamed: 0,BiGG ID,substrates,products,substrate CIDs,product CIDs,complete
0,2AGPEAT120,"[2agpe120_c, atp_c, ddca_c]","[amp_c, pe120_c, ppi_c]","[MNXM34808, MNXM3, MNXM162258]","[MNXM14, MNXM2858, MNXM11]",True
1,2AGPEAT140,"[2agpe140_c, atp_c, ttdca_c]","[amp_c, pe140_c, ppi_c]","[MNXM34809, MNXM3, MNXM162239]","[MNXM14, MNXM2859, MNXM11]",True
3,2AGPEAT160,"[2agpe160_c, atp_c, hdca_c]","[amp_c, pe160_c, ppi_c]","[MNXM34810, MNXM3, MNXM108]","[MNXM14, MNXM32178, MNXM11]",True
5,2AGPEAT180,"[2agpe180_c, atp_c, ocdca_c]","[amp_c, pe180_c, ppi_c]","[MNXM34811, MNXM3, MNXM236]","[MNXM14, MNXM31536, MNXM11]",True
6,2AGPEAT181,"[2agpe181_c, atp_c, ocdcea_c]","[amp_c, pe181_c, ppi_c]","[MNXM3449, MNXM3, MNXM306]","[MNXM14, MNXM2150, MNXM11]",True
...,...,...,...,...,...,...
15954,RAFH_r,"[fru_c, melib_c]","[h2o_c, raffin_c]","[C01496, C05003, C00095, C10906]","[C00001, C01328, C00492]",True
15955,DMALRED_r,"[fadh2_c, oaa_c]","[fad_c, mal__L_c]","[C01352, C00036]","[C00016, C00149]",True
15956,AMMQT8_2_r,"[ahcys_c, h_c, mqn8_c]","[2dmmq8_c, amet_c]","[MNXM19, MNXM1, MNXM509]","[MNXM2178, MNXM16]",True
15958,FFSD_r,"[fru_c, g6p_c]","[h2o_c, suc6p_c]","[C01496, C05003, C00095, C10906, C00092]","[C00001, C01328, C16688, C02591]",True


## 2. Calculation reaction fingerprints for all reactions in BiGG:

In [9]:
mol_folder = "C:\\Users\\alexk\\substrateprediction-main\\data\\mol-files"
def get_reaction_site_smarts(metabolites):
    reaction_site = ""
    for met in metabolites:
        if met[0] == "C":
            KEGG_ID = met
            try:
                Smarts = Chem.MolToSmarts(Chem.MolFromMolFile(join(mol_folder,  KEGG_ID + '.mol')))
            except:
                raise TypeError
                
        elif met[0] == "M":
            InchiCode = list(df_MNX["InChi Code"].loc[df_MNX["MNX ID"] == met])[0]
            mol = Chem.inchi.MolFromInchi(InchiCode)
            if mol is not None:
                Smarts = Chem.MolToSmarts(mol)
            else:
                raise TypeError
        reaction_site = reaction_site + "." + Smarts
    return(reaction_site[1:])

def convert_fp_to_array(difference_fp_dict):
    fp = np.zeros(2048)
    for key in difference_fp_dict.keys():
        fp[key] = difference_fp_dict[key]
    return(fp)

In [10]:
model_df["structural_fp"] = ""
model_df["difference_fp"] = ""

for ind in model_df.index:
    if model_df["complete"][ind]:
        try:
            substrates = model_df["substrate CIDs"][ind]
            products = model_df["product CIDs"][ind]
            left_site = get_reaction_site_smarts(substrates)
            right_site = get_reaction_site_smarts(products)

            rxn_forward = AllChem.ReactionFromSmarts(left_site + ">>" + right_site)

            difference_fp = Chem.rdChemReactions.CreateDifferenceFingerprintForReaction(rxn_forward)
            difference_fp = convert_fp_to_array(difference_fp.GetNonzeroElements())
            structural_fp = Chem.rdChemReactions.CreateStructuralFingerprintForReaction(rxn_forward).ToBitString()

            model_df["structural_fp"][ind] = structural_fp[:3276]
            model_df["difference_fp"][ind] = difference_fp
        except TypeError: 
            pass

In [15]:
model_df = model_df.loc[model_df["structural_fp"] != ""]
model_df.to_pickle(join("..", "..", "..", "data", "BiGG_data",
                             "bigg_models_with_fingerprints.pkl"))

## 3. Mapping our data point to BiGG reactions via reaction fingerprints:¶

In [16]:
model_df = pd.read_pickle(join("..", "..", "..", "data", "BiGG_data",
                             "bigg_models_with_fingerprints.pkl"))
model_df.head()

Unnamed: 0,BiGG ID,substrates,products,substrate CIDs,product CIDs,complete,structural_fp,difference_fp
0,2AGPEAT120,"[2agpe120_c, atp_c, ddca_c]","[amp_c, pe120_c, ppi_c]","[MNXM34808, MNXM3, MNXM162258]","[MNXM14, MNXM2858, MNXM11]",True,1100110100000000000000110110010001000001111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,2AGPEAT140,"[2agpe140_c, atp_c, ttdca_c]","[amp_c, pe140_c, ppi_c]","[MNXM34809, MNXM3, MNXM162239]","[MNXM14, MNXM2859, MNXM11]",True,1100110100000000000000110110010001000001111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,2AGPEAT160,"[2agpe160_c, atp_c, hdca_c]","[amp_c, pe160_c, ppi_c]","[MNXM34810, MNXM3, MNXM108]","[MNXM14, MNXM32178, MNXM11]",True,1100110100000000000000110110010001000001111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5,2AGPEAT180,"[2agpe180_c, atp_c, ocdca_c]","[amp_c, pe180_c, ppi_c]","[MNXM34811, MNXM3, MNXM236]","[MNXM14, MNXM31536, MNXM11]",True,1100110100000000000000110110010001000001111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
6,2AGPEAT181,"[2agpe181_c, atp_c, ocdcea_c]","[amp_c, pe181_c, ppi_c]","[MNXM3449, MNXM3, MNXM306]","[MNXM14, MNXM2150, MNXM11]",True,1100110100000000000000110110010001000001111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 40.0, 0.0,..."


In [17]:
data_train = pd.read_pickle(join("..", "..", "..", "data", "kcat_data", "splits", "train_df_kcat_ts.pkl"))
data_test = pd.read_pickle(join("..", "..", "..", "data", "kcat_data", "splits", "test_df_kcat_ts.pkl"))

data_train["Uniprot ID"] = [UID[0] for UID in data_train["Uniprot IDs"]]
data_test["Uniprot ID"] = [UID[0] for UID in data_test["Uniprot IDs"]]

data_train.rename(columns = {"geomean_kcat" :"log10_kcat"}, inplace = True)
data_test.rename(columns = {"geomean_kcat" :"log10_kcat"}, inplace = True)

df_kcat = pd.concat([data_train, data_test], ignore_index=True)

#### (a) Looking for exact matches via the structural reaction fingerprint:

In [19]:
df_kcat["BiGG acc"] = np.nan
df_kcat["BiGG ID"] = np.nan

for ind in df_kcat.index:
    FP = df_kcat["structural_fp"][ind][:3276]
    if FP != "":
        help_df = model_df.loc[model_df["structural_fp"]  == FP]
        if len(help_df) > 0:
            df_kcat["BiGG ID"][ind] = list(help_df["BiGG ID"])[0]
            df_kcat["BiGG acc"][ind] = 1.0

In [20]:
len(df_kcat.loc[~pd.isnull(df_kcat["BiGG ID"])])

562

#### (b) For thhose reactions that couldn't be mapped yet: Looking for similar reactions:

In [21]:
import time
from rdkit import DataStructs

model_df = model_df.loc[model_df["structural_fp"] != ""]
model_df.reset_index(drop = True, inplace = True)
FP_list = list(model_df["structural_fp"])

df_kcat = df_kcat.loc[df_kcat["structural_fp"] != ""]

for ind in df_kcat.index:
    if pd.isnull(df_kcat["BiGG ID"][ind]):
        scores = np.zeros(len(FP_list))
        fp1 = df_kcat["structural_fp"][ind]
        fp1 = DataStructs.cDataStructs.CreateFromBitString(fp1[:3276])
        for i, fp in enumerate(FP_list):
            fp2 =  DataStructs.cDataStructs.CreateFromBitString(fp[:3276])
            scores[i] = DataStructs.cDataStructs.TanimotoSimilarity(fp1, fp2)

        max_i = np.argmax(scores)
        df_kcat["BiGG ID"][ind] = model_df["BiGG ID"][max_i]
        df_kcat["BiGG acc"][ind] = scores[max_i]
        print(ind, scores[max_i])

    if ind % 500 == 0:
        print(ind)
        time.sleep(1)
        df_kcat.to_pickle(join("..", "..", "..", "data", "kcat_data",
                         "df_kcat_with_BiGG_IDs.pkl"))

0 0.9832635983263598
0
1 0.8601769911504424
2 0.900990099009901
3 0.8942598187311178
4 0.9714285714285714
5 0.5872340425531914
6 0.9732582688247713
7 0.9512635379061372
8 0.5576470588235294
9 0.9846261355695318
11 0.7202216066481995
12 0.7435440783615316
13 0.9035486806187443
14 0.9322709163346613
17 0.9952830188679245
18 0.9832285115303984
19 0.9803779069767442
20 0.6164383561643836
21 0.9929922915206727
22 0.851063829787234
23 0.9970193740685543
24 0.988061797752809
25 0.9887719298245614
26 0.9662802950474183
28 0.8586497890295358
29 0.8667287977632805
30 0.9859882005899705
32 0.9772413793103448
33 0.6828703703703703
34 0.6277128547579299
35 0.6867671691792295
36 0.8825
37 0.8695652173913043
38 0.8446490218642118
39 0.8644222020018199
40 0.8443496801705757
41 0.8077858880778589
42 0.7363636363636363
43 0.9860529986052998
44 0.9931506849315068
45 0.9567099567099567
46 0.8839907192575406
47 0.8571428571428571
48 0.9952830188679245
49 0.9990412272291467
50 0.8568075117370892
51 0.955094

416 0.9007407407407407
417 0.9323467230443975
418 0.6871401151631478
419 0.9834413246940245
421 0.9005847953216374
422 0.7020506634499397
423 0.9650943396226415
425 0.6431159420289855
426 0.8440207972270364
427 0.9371980676328503
428 0.9877934272300469
429 0.9896373056994818
430 0.844632768361582
431 0.9419124218051832
432 0.8413705583756346
434 0.87215411558669
436 0.9258373205741627
437 0.7828877005347593
438 0.5526315789473685
439 0.9766803840877915
440 0.9552134359692093
442 0.9612565445026178
443 0.5758754863813229
445 0.9724517906336089
446 0.8568075117370892
447 0.8565121412803532
448 0.6766467065868264
449 0.9754335260115607
450 0.8698224852071006
451 0.9539776462853385
452 0.8825065274151436
454 0.8341625207296849
455 0.8070722828913156
456 0.8943820224719101
457 0.8059701492537313
458 0.6828703703703703
459 0.9888501742160278
460 0.9889135254988913
461 0.6985111662531017
462 0.9765708200212992
464 0.9992520568436799
465 0.9817384952520087
466 0.9530201342281879
467 0.98905109

830 0.9503610108303249
832 0.8642384105960265
834 0.9872430900070872
836 0.9895052473763118
839 0.9273743016759777
840 0.9940119760479041
841 0.9841488628532047
842 0.898538961038961
843 0.998019801980198
844 0.8894806924101198
845 0.6329113924050633
846 0.8566978193146417
847 0.7049731182795699
848 0.9837997054491899
850 0.9654943596549436
851 0.9723756906077348
852 0.9918276374442794
853 0.9978237214363439
854 0.9894662921348315
855 0.639511201629328
856 0.7560975609756098
858 0.9501246882793017
859 0.7533401849948612
860 0.9724896836313618
861 0.9901685393258427
862 0.8661971830985915
864 0.6737588652482269
865 0.980309423347398
866 0.9547062986553433
867 0.9391849529780564
868 0.9593062041360907
869 0.977961432506887
870 0.9838116261957321
871 0.991499227202473
872 0.881283422459893
873 0.9887719298245614
874 0.8601769911504424
875 0.9926470588235294
876 0.8970398970398971
877 0.8622950819672132
878 0.9867060561299852
879 0.8601769911504424
880 0.7606157112526539
881 0.985345429169

1249 0.5613682092555332
1250 0.7420634920634921
1251 0.665943600867679
1252 0.9926470588235294
1253 0.9507829977628636
1254 0.9286775631500743
1256 0.94
1257 0.9860041987403779
1258 0.8404255319148937
1260 0.6587458745874587
1261 0.8046332046332046
1262 0.7552301255230126
1263 0.9397192402972749
1264 0.9029793735676088
1265 0.9643577673167452
1266 0.6403385049365303
1267 0.7770961145194274
1268 0.759825327510917
1270 0.988834612700628
1271 0.9409547738693468
1272 0.834307992202729
1273 0.7940379403794038
1274 0.9494949494949495
1275 0.8752293577981651
1276 0.9606227106227107
1277 0.9903560830860534
1278 0.7126213592233009
1279 0.9866946778711485
1280 0.6837837837837838
1281 0.9897735573411249
1282 0.9175257731958762
1283 0.9301587301587302
1284 0.9730538922155688
1285 0.9905882352941177
1286 0.7365439093484419
1287 0.8566978193146417
1289 0.6577708006279435
1290 0.9142857142857143
1291 0.7593360995850622
1292 0.9264705882352942
1293 0.9987714987714987
1294 0.8875
1295 0.996666666666666

1645 0.9212050984936269
1646 0.7617021276595745
1647 0.8475046210720887
1649 0.7779783393501805
1650 0.9561586638830898
1651 0.7521246458923513
1652 0.9846715328467154
1654 0.8828633405639913
1656 0.9964887640449438
1657 0.9130434782608695
1658 0.9752186588921283
1659 0.973457199734572
1660 0.778169014084507
1661 0.9387527839643652
1662 0.8586142322097379
1663 0.7460992907801418
1664 0.7450980392156863
1665 0.8643122676579925
1666 0.9692164179104478
1669 0.6475195822454308
1670 0.9640330188679245
1672 0.9640371229698376
1673 0.973741794310722
1674 0.9914853358561968
1675 0.9846046186144157
1676 0.9990291262135922
1677 0.9982425307557118
1678 0.9889135254988913
1680 0.5872340425531914
1681 0.9038461538461539
1682 0.9754335260115607
1683 0.9866666666666667
1684 0.8431845597104946
1685 0.659877800407332
1686 0.9990224828934506
1687 0.8779956427015251
1688 0.7327586206896551
1690 0.7868131868131868
1692 0.8287937743190662
1693 0.9697594501718213
1695 0.9876712328767123
1696 0.9866666666666

2052 0.7239685658153242
2053 0.7568134171907757
2054 0.8874538745387454
2055 0.9901380670611439
2056 0.9212050984936269
2057 0.7507645259938838
2058 0.7833935018050542
2059 0.7140077821011673
2060 0.9674681753889675
2061 0.6389324960753532
2062 0.8752293577981651
2063 0.9055690072639225
2064 0.9990224828934506
2065 0.8888888888888888
2066 0.7365439093484419
2067 0.7752293577981652
2068 0.8231511254019293
2069 0.5872340425531914
2071 0.9962013295346629
2072 0.9585062240663901
2073 0.9931506849315068
2075 0.9877704609595485
2076 0.8866090712742981
2077 0.76
2078 0.7521434138737334
2079 0.725609756097561
2080 0.9485507246376812
2084 0.9990224828934506
2085 0.9557788944723619
2086 0.9880868955851436
2087 0.9402985074626866
2088 0.925764192139738
2089 0.9819004524886877
2090 0.9991273996509599
2091 0.7320954907161804
2092 0.7920723226703755
2093 0.9894662921348315
2094 0.7355242566510172
2096 0.9877934272300469
2097 0.9284140969162996
2098 0.9656862745098039
2099 0.8931116389548693
2100 0.9

2441 0.9964887640449438
2442 0.9444444444444444
2443 0.8701421800947867
2444 0.9922480620155039
2445 0.9634308510638298
2446 0.6489874638379942
2447 0.980188679245283
2448 0.943029490616622
2449 0.9543726235741445
2450 0.9006734006734006
2451 0.9985041136873598
2452 0.8815201192250373
2454 0.7339322736696614
2457 0.9097222222222222
2458 0.8820039551746869
2459 0.7108433734939759
2460 0.9087136929460581
2461 0.9903201787043932
2462 0.9138257575757576
2463 0.8177676537585421
2464 0.8640483383685801
2465 0.7365439093484419
2466 0.813200498132005
2467 0.9971751412429378
2468 0.7226502311248074
2469 0.9943342776203966
2470 0.9445506692160612
2471 0.957042957042957
2472 0.994016454749439
2473 0.8821170809943866
2474 0.9322709163346613
2475 0.8601769911504424
2476 0.9157458563535912
2477 0.9914853358561968
2478 0.9286775631500743
2479 0.9457794208256316
2480 0.8374384236453202
2481 0.9922915206727401
2482 0.6766595289079229
2483 0.6814404432132964
2484 0.9877934272300469
2485 0.80272108843537

2833 0.9941605839416059
2834 0.9859985261606485
2835 0.9874686716791979
2837 0.6056603773584905
2838 0.8155172413793104
2839 0.9744525547445255
2840 0.6310013717421125
2841 0.8888888888888888
2842 0.8047337278106509
2843 0.8478260869565217
2844 0.975531914893617
2845 0.9689978370583994
2846 0.9852507374631269
2847 0.8826923076923077
2848 0.8703416149068323
2849 0.9041394335511983
2850 0.9962602842183994
2851 0.9992520568436799
2852 0.9970171513795675
2853 0.9852724594992637
2854 0.9990636704119851
2856 0.9918276374442794
2857 0.8568075117370892
2858 0.9916666666666667
2859 0.9642601858470335
2860 0.766329346826127
2861 0.9428571428571428
2862 0.838486386709737
2863 0.9832635983263598
2864 0.9867872044506258
2865 0.9917910447761195
2866 0.8883647798742138
2867 0.94
2868 0.9523281596452328
2869 0.8396793587174348
2870 0.8883647798742138
2871 0.981021897810219
2872 0.9514084507042253
2874 0.8568075117370892
2875 0.8674832962138085
2876 0.8601769911504424
2878 0.9887719298245614
2879 0.856

3232 0.9766423357664233
3233 0.9962335216572504
3234 0.8891013384321224
3235 0.6952887537993921
3236 0.8601769911504424
3238 0.9843173431734318
3240 0.6933911159263272
3241 0.9522900763358778
3243 0.9068767908309455
3244 0.7029702970297029
3245 0.9961202715809894
3246 0.7281553398058253
3247 0.9760479041916168
3249 0.9285714285714286
3250 0.603448275862069
3251 0.8692737430167597
3253 0.9187817258883249
3254 0.7108433734939759
3255 0.88
3256 0.8315412186379928
3257 0.9908707865168539
3258 0.9166666666666666
3259 0.6842105263157895
3260 0.7936507936507936
3261 0.6326530612244898
3262 0.9409547738693468
3263 0.9860724233983287
3265 0.645748987854251
3267 0.9881533101045297
3269 0.9861014593467686
3270 0.917
3271 0.9542619542619543
3272 0.9070227497527201
3273 0.8835470085470085
3275 0.9873861247372109
3276 0.9825418994413407
3277 0.9623853211009175
3278 0.9947916666666666
3279 0.9992526158445441
3280 0.9524539877300614
3281 0.8334771354616048
3282 0.8746113989637305
3283 0.99627699180938

3644 0.8232373386295928
3645 0.988061797752809
3646 0.9683210137275607
3648 0.9826989619377162
3649 0.8746113989637305
3650 0.9970193740685543
3652 0.9747474747474747
3653 0.9192692987625221
3654 0.8539778449144008
3655 0.9877704609595485
3656 0.9754512635379061
3658 0.9846046186144157
3659 0.9853044086773968
3660 0.9357142857142857
3661 0.9566130160951715
3662 0.5748218527315915
3664 0.9559193954659949
3666 0.9594868332207968
3668 0.9860917941585535
3669 0.6914893617021277
3670 0.6905311778290993
3671 0.8489208633093526
3672 0.9977611940298508
3673 0.7543859649122807
3674 0.8565121412803532
3675 0.7336726039016115
3678 0.9989165763813651
3681 0.929364278506559
3682 0.8249648052557484
3683 0.9992526158445441
3684 0.9296875
3685 0.9654002713704206
3686 0.7634961439588689
3687 0.8046332046332046
3688 0.9549783549783549
3689 0.6794258373205742
3690 0.9986928104575163
3691 0.9947955390334573
3692 0.8676470588235294
3693 0.9354838709677419
3695 0.9437984496124031
3696 0.9818840579710145
369

4048 0.8548009367681498
4049 0.9372738238841978
4050 0.696513470681458
4051 0.9709302325581395
4052 0.9990291262135922
4053 0.8015873015873016
4054 0.630048465266559
4055 0.9642857142857143
4057 0.9706723891273248
4058 0.6394686907020873
4059 0.725609756097561
4060 0.9141004862236629
4061 0.8859903381642512
4062 0.856140350877193
4064 0.8825065274151436
4065 0.986100950987564
4066 0.74235807860262
4067 0.997134670487106
4069 0.896774193548387
4070 0.744832501781896
4071 0.8784977908689249
4072 0.8683602771362586
4073 0.7321428571428571
4074 0.9026548672566371
4075 0.8132530120481928
4076 0.8897338403041825
4078 0.9143835616438356
4079 0.7752293577981652
4081 0.7873015873015873
4082 0.9222222222222223
4083 0.7151675485008818
4084 0.87322695035461
4085 0.9733237202595529
4086 0.9578231292517007
4087 0.7219858156028369
4088 0.7440758293838863
4089 0.9610921501706484
4090 0.9040055248618785
4092 0.6842105263157895
4093 0.9289617486338798
4094 0.9640591966173362
4095 0.8046332046332046
4096

In [23]:
df_kcat.to_pickle(join("..", "..", "..", "data", "kcat_data",
                         "df_kcat_with_BiGG_IDs.pkl"))

## 1. Creating a DataFrame with all enzymatic reactions in 6 genome scale models and their reaction fingerprints:

### (a) Get substrates and products of all reactions and their KEGG CIDs:

In [2]:
def array_column_to_strings(df, column):
    df[column] = [str(list(df[column][ind])) for ind in df.index]
    return(df)

def string_column_to_array(df, column):
    df[column] = [np.array(eval(df[column][ind])) for ind in df.index]
    return(df)

In [3]:
models = ['iSDY_1059', 'iJN678', 'STM_v1_0', 'iSB619', 'iJN746', 'iML1515']

for i in range(len(models)):
    model_name = models[i]
    print(i,model_name)
    model_df = create_metabolic_model_df(model_name)
    model_df = adding_KEGG_CIDS_to_model_df(model_df, model_name)
    
    if i == 0:
        df_all_models = model_df
    else:
        df_all_models = df_all_models.append(model_df).reset_index(drop = True)

0 iSDY_1059


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/

1 iJN678
2 STM_v1_0
3 iSB619
4 iJN746
5 iML1515


In [4]:
droplist = []
for ind in df_all_models.index:
    if df_all_models["products"][ind] == []:
        droplist.append(ind)
    
    if df_all_models["BiGG ID"][ind][-4:] in ["tipp", "t2pp"]:
        droplist.append(ind)
        
    if df_all_models["BiGG ID"][ind][-3:] in ["tpp", "tex"]:
        droplist.append(ind)
    
    if df_all_models["BiGG ID"][ind][:7] == "BIOMASS":
        droplist.append(ind)
df_all_models.drop(droplist, inplace = True)
df_all_models.reset_index(drop = True, inplace = True)
df_all_models

Unnamed: 0,BiGG ID,KEGG IDs,ECs,substrates,products,substrate KEGG CIDs,product KEGG CIDs
0,23PDE2pp,[R03538],[3.1.4.16],"[23cump_p, h2o_p]","[3ump_p, h_p]","[C02355, C00001, C01328]","[C01368, C00080]"
1,23PDE4pp,[R03929],[3.1.4.16],"[23ccmp_p, h2o_p]","[3cmp_p, h_p]","[C02354, C00001, C01328]","[C05822, C00080]"
2,23PDE7pp,[R03537],[3.1.4.16],"[23camp_p, h2o_p]","[3amp_p, h_p]","[C02353, C00001, C01328]","[C01367, C00080]"
3,23PDE9pp,[R05135],[3.1.4.16],"[23cgmp_p, h2o_p]","[3gmp_p, h_p]","[C06194, C00001, C01328]","[C06193, C00080]"
4,2AGPEAT120,,,"[2agpe120_c, atp_c, ddca_c]","[amp_c, pe120_c, ppi_c]",[C00002],"[C00020, C00013]"
...,...,...,...,...,...,...,...
7529,MPTS,,,"[cpmp_c, cu2_c, moadcosh_c]","[h_c, moadcoo_c, mpt_c]",[C00070],"[C00080, C05924]"
7530,MOCOS,,,"[h_c, mobd_c, mptamp_c]","[amp_c, cu2_c, h2o_c, moco_c]","[C00080, C06232, C19848]","[C00020, C00070, C00001, C01328]"
7531,BMOGDS2,,,"[bmoco1gdp_c, gtp_c, h_c]","[bmocogdp_c, ppi_c]","[C00044, C00080]",[C00013]
7532,FESD2s,,,"[4fe4s_c, h_c, no_c]","[3fe4s_c, fe3_c, h2o_c, n2o_c]","[C00080, C00533]","[C14819, C00001, C01328, C00887]"


Check if available KEGG IDs are complete:

In [5]:
df_all_models["complete"] = False

for ind in df_all_models.index:
    try:
        if len(df_all_models["substrate KEGG CIDs"][ind]) >= len(df_all_models["substrates"][ind]):
            if len(df_all_models["product KEGG CIDs"][ind]) >= len(df_all_models["products"][ind]):
                df_all_models["complete"][ind] = True
    except TypeError:
        pass

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


### For all those metabolites, for which we couldn't get a Compound ID, we will try to get it manually:

Mapping all metabolites to InChiKeys if not all KEGG CIDs are available:

In [7]:
df_incomplete_reactions = df_all_models.loc[~df_all_models["complete"]]
df_incomplete_reactions

Unnamed: 0,BiGG ID,KEGG IDs,ECs,substrates,products,substrate KEGG CIDs,product KEGG CIDs,complete
4,2AGPEAT120,,,"[2agpe120_c, atp_c, ddca_c]","[amp_c, pe120_c, ppi_c]",[C00002],"[C00020, C00013]",False
5,2AGPEAT140,,,"[2agpe140_c, atp_c, ttdca_c]","[amp_c, pe140_c, ppi_c]",[C00002],"[C00020, C00013]",False
6,2AGPEAT141,,,"[2agpe141_c, atp_c, ttdcea_c]","[amp_c, pe141_c, ppi_c]",[C00002],"[C00020, C00013]",False
7,2AGPEAT160,,[2.3.1.40],"[2agpe160_c, atp_c, hdca_c]","[amp_c, pe160_c, ppi_c]","[C00002, C00249]","[C00020, C00013]",False
8,2AGPEAT161,,,"[2agpe161_c, atp_c, hdcea_c]","[amp_c, pe161_c, ppi_c]",[C00002],"[C00020, C00013]",False
...,...,...,...,...,...,...,...,...
7528,BMOGDS1,,,"[bmoco_c, gtp_c, h_c]","[bmoco1gdp_c, ppi_c]","[C00044, C00080]",[C00013],False
7529,MPTS,,,"[cpmp_c, cu2_c, moadcosh_c]","[h_c, moadcoo_c, mpt_c]",[C00070],"[C00080, C05924]",False
7531,BMOGDS2,,,"[bmoco1gdp_c, gtp_c, h_c]","[bmocogdp_c, ppi_c]","[C00044, C00080]",[C00013],False
7532,FESD2s,,,"[4fe4s_c, h_c, no_c]","[3fe4s_c, fe3_c, h2o_c, n2o_c]","[C00080, C00533]","[C14819, C00001, C01328, C00887]",False


In [9]:
df_BiGG_metabolites = pd.read_csv(join(datasets_dir, "metabolite_data",  "bigg_models_metabolites.csv"), sep = "\t")
df_BiGG_metabolites["MNX ID"] = np.nan
df_BiGG_metabolites["KEGG ID"] = np.nan
df_BiGG_metabolites["InChiKey"] = np.nan
df_BiGG_metabolites["CHEBI ID"] = np.nan

databases = ["MNX ID", "KEGG ID", "InChiKey", "CHEBI ID"]
identifiers = ["http://identifiers.org/metanetx.chemical/","http://identifiers.org/kegg.compound/",
                "http://identifiers.org/chebi/", "https://identifiers.org/inchikey/"]

def find_compound_IDs(links):
    IDs = []
    for i, database in enumerate(databases):
        identifier = identifiers[i]
        start = links.find(identifier)
        if start != -1:
            ID = links[start +len(identifier): ]
            end = ID.find(";")
            if end != -1:
                ID = ID[:end]
            IDs.append(ID)
        else:
            IDs.append(np.nan)
    return(IDs)

droplist = []
for ind in df_BiGG_metabolites.index:
    links = df_BiGG_metabolites["database_links"][ind]
    if not pd.isnull(links):
        IDs = find_compound_IDs(links = links)
        df_BiGG_metabolites["MNX ID"][ind] = IDs[0]
        df_BiGG_metabolites["KEGG ID"][ind] = IDs[1]
        df_BiGG_metabolites["InChiKey"][ind] = IDs[3]
        df_BiGG_metabolites["CHEBI ID"][ind] = IDs[2]
        if IDs == [np.nan, np.nan, np.nan, np.nan]:
            droplist.append(ind)
    else:
        droplist.append(ind)


### Drop all lines with no identifier at all:
df_BiGG_metabolites.drop(droplist, inplace = True)
df_BiGG_metabolites

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/

Unnamed: 0,bigg_id,universal_bigg_id,name,model_list,database_links,old_bigg_ids,MNX ID,KEGG ID,InChiKey,CHEBI ID
0,12dgr120_c,12dgr120,"1,2-Diacyl-sn-glycerol (didodecanoyl, n-C12:0)",iEC1364_W; iEC1349_Crooks; iEC1356_Bl21DE3; iM...,MetaNetX (MNX) Chemical: http://identifiers.or...,12dgr120; 12dgr120[c]; 12dgr120_c; _12dgr120_c,MNXM4939,,,
1,12dgr140_c,12dgr140,"1,2-Diacyl-sn-glycerol (ditetradecanoyl, n-C14:0)",iECNA114_1301; iECSE_1348; iECO111_1330; iECOK...,MetaNetX (MNX) Chemical: http://identifiers.or...,12dgr140; 12dgr140[c]; 12dgr140_c; _12dgr140_c,MNXM146479,,,
2,12dgr180_c,12dgr180,"1,2-Diacyl-sn-glycerol (dioctadecanoyl, n-C18:0)",iECB_1328; iECDH10B_1368; iEcE24377_1341; iECD...,MetaNetX (MNX) Chemical: http://identifiers.or...,12dgr180; 12dgr180[c]; 12dgr180_c; _12dgr180_c,MNXM4217,,,
3,14glucan_c,14glucan,"1,4-alpha-D-glucan",iSFxv_1172; iUTI89_1310; iSSON_1240; iSbBS512_...,BioCyc: http://identifiers.org/biocyc/META:1-4...,14glucan; 14glucan_c,MNXM2905,,,
4,15dap_c,15dap,"1,5-Diaminopentane",iECUMN_1333; iLF82_1304; iETEC_1333; iECSF_132...,KEGG Compound: http://identifiers.org/kegg.com...,15dap; 15dap[c]; 15dap_c,MNXM943,C01672,VHRGRCVQAFMJIZ-UHFFFAOYSA-P,CHEBI:13928
...,...,...,...,...,...,...,...,...,...,...
15553,mcbtt_e,mcbtt,Mycobactin T,iJN1463,BioCyc: http://identifiers.org/biocyc/META:CPD...,mcbtt; mcbtt_e,MNXM62700,,WTCKJYQWPPSOES-UHFFFAOYSA-N,
15564,ppi_p,ppi,Diphosphate,iJN1463,Reactome Compound: http://identifiers.org/reac...,ppi; ppi_p,MNXM11,C00013,XPPKVPWEQAFLFU-UHFFFAOYSA-K,CHEBI:13420
15567,pqq_p,pqq,Pyrroloquinoline-quinone,iJN1463,KEGG Compound: http://identifiers.org/kegg.com...,pqq; pqq_p,MNXM601,C00113,MMXZSJMASHPLLR-UHFFFAOYSA-K,CHEBI:14986
15580,vacc_p,vacc,Vaccenic acid,iJN1463,MetaNetX (MNX) Chemical: http://identifiers.or...,vacc; vacc_p,MNXM92713,,,


In [10]:
mets_with_ID = list(df_BiGG_metabolites["bigg_id"])

def get_MNX_IDs(metabolites):
    IDs = []
    complete = True
    for met in metabolites:
        if met not in mets_with_ID:
            complete = False
        else:
            IDs.append(list(df_BiGG_metabolites["MNX ID"].loc[df_BiGG_metabolites["bigg_id"] == met])[0])
    return(complete, IDs)

for ind in df_incomplete_reactions.index:
    subs_complete, subs_IDs = get_MNX_IDs(metabolites = df_incomplete_reactions["substrates"][ind])
    pros_complete, pros_IDs = get_MNX_IDs(metabolites = df_incomplete_reactions["products"][ind])
    if subs_complete and pros_complete:
        df_all_models["complete"][ind] = True
        df_all_models["substrate KEGG CIDs"][ind] = subs_IDs
        df_all_models["product KEGG CIDs"][ind] = pros_IDs
        
df_all_models.loc[df_all_models["complete"]]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,BiGG ID,KEGG IDs,ECs,substrates,products,substrate KEGG CIDs,product KEGG CIDs,complete
0,23PDE2pp,[R03538],[3.1.4.16],"[23cump_p, h2o_p]","[3ump_p, h_p]","[C02355, C00001, C01328]","[C01368, C00080]",True
1,23PDE4pp,[R03929],[3.1.4.16],"[23ccmp_p, h2o_p]","[3cmp_p, h_p]","[C02354, C00001, C01328]","[C05822, C00080]",True
2,23PDE7pp,[R03537],[3.1.4.16],"[23camp_p, h2o_p]","[3amp_p, h_p]","[C02353, C00001, C01328]","[C01367, C00080]",True
3,23PDE9pp,[R05135],[3.1.4.16],"[23cgmp_p, h2o_p]","[3gmp_p, h_p]","[C06194, C00001, C01328]","[C06193, C00080]",True
4,2AGPEAT120,,,"[2agpe120_c, atp_c, ddca_c]","[amp_c, pe120_c, ppi_c]","[MNXM34808, MNXM3, MNXM162258]","[MNXM14, MNXM2858, MNXM11]",True
...,...,...,...,...,...,...,...,...
7529,MPTS,,,"[cpmp_c, cu2_c, moadcosh_c]","[h_c, moadcoo_c, mpt_c]","[MNXM725870, MNXM632, MNXM148354]","[MNXM1, MNXM148353, MNXM1193]",True
7530,MOCOS,,,"[h_c, mobd_c, mptamp_c]","[amp_c, cu2_c, h2o_c, moco_c]","[C00080, C06232, C19848]","[C00020, C00070, C00001, C01328]",True
7531,BMOGDS2,,,"[bmoco1gdp_c, gtp_c, h_c]","[bmocogdp_c, ppi_c]","[MNXM148110, MNXM51, MNXM1]","[MNXM147054, MNXM11]",True
7532,FESD2s,,,"[4fe4s_c, h_c, no_c]","[3fe4s_c, fe3_c, h2o_c, n2o_c]","[MNXM37766, MNXM1, MNXM228]","[MNXM147396, MNXM196, MNXM2, MNXM579]",True


Get a list of all MetaNetX IDs and download the InChiCodes for them:

In [11]:
MNX_IDs = []
for ind in df_all_models.index:
    if df_all_models["complete"][ind]:
        metabolites = df_all_models["substrate KEGG CIDs"][ind] + df_all_models["product KEGG CIDs"][ind]
        if metabolites[0][0] == "M":
            MNX_IDs = MNX_IDs + metabolites
            
f = open(join(datasets_dir, "MNX_IDs_Brenda.txt"), "w") 
for ID in list(set(MNX_IDs)):
    f.write(str(ID) + "\n")
f.close()

https://www.metanetx.org/cgi-bin/mnxweb/id_mapper

In [12]:
for i in range(9):
    if i == 0:
        df_MNX_mapping = pd.read_csv(join(datasets_dir,"metabolite_data", "MNX_mapping", "id_mapper(" +str(i)+ ")"), sep = "\t")
    else:
        df_new = pd.read_csv(join(datasets_dir,"metabolite_data", "MNX_mapping", "id_mapper(" +str(i)+ ")"), sep = "\t")
        df_MNX_mapping = pd.concat([df_MNX_mapping, df_new], ignore_index=True)
        
df_MNX_mapping = df_MNX_mapping.loc[~pd.isnull(df_MNX_mapping["InChI"])]
df_MNX_mapping

Unnamed: 0,#query,mnx_id,reference,xrefs,InChIkey,InChI,SMILES,name
0,MNXM232,MNXM232,chebi:61683,CHEBI:61683 ... bigg.metabolite:q8 ... biggM:M...,ICFIZJQGJAJRSU-SGHXUWJISA-N,InChI=1S/C49H74O4/c1-36(2)20-13-21-37(3)22-14-...,COC1=C(OC)C(=O)C(C\C=C(/C)CC\C=C(/C)CC\C=C(/C)...,ubiquinone-8
2,MNXM90891,MNXM90891,biggM:uLa4n,bigg.metabolite:uLa4n ... biggM:M_uLa4n ... bi...,BAFPKKRTAQMYMS-UHFFFAOYSA-N,InChI=1S/C60H100NO7P/c1-46(2)23-13-24-47(3)25-...,CC(C)=CCCC(C)=CCCC(C)=CCCC(C)=CCCC(C)=CCCC(C)=...,Undecaprenyl phosphate-4-amino-4-deoxy-L-arabi...
3,MNXM54308,MNXM731265,chebi:58731,CHEBI:48005 ... CHEBI:58731 ... bigg.metabolit...,WKGTVHGVLRCTCF-ZETCQYMHSA-O,InChI=1S/C9H19N3O3/c10-5-1-2-6-12-8(13)4-3-7(1...,[NH3+]CCCCNC(=O)CC[C@H]([NH3+])C([O-])=O,gamma-L-glutamylputrescine
6,MNXM5266,MNXM5266,biggM:pgp120,bigg.metabolite:pgp120 ... biggM:M_pgp120 ... ...,LNEADDPQSRWQGI-UHFFFAOYSA-K,InChI=1S/C30H60O13P2/c1-3-5-7-9-11-13-15-17-19...,CCCCCCCCCCCC(=O)OCC(COP([O-])(=O)OCC(O)COP([O-...,"Phosphatidylglycerophosphate (didodecanoyl, n-..."
7,MNXM1105,MNXM1101279,biggM:agdpcbi,bigg.metabolite:agdpcbi ... biggM:M_agdpcbi .....,IQTYKHRKNGVJEO-QRRCNOTJSA-N,InChI=1S/C58H86N16O18P2.C10H12N5O3.Co/c1-25(91...,C[C@H](CNC(=O)CC[C@]1(C)[C@H](CC(N)=O)C2[NH+]=...,Adenosine-GDP-cobinamide
...,...,...,...,...,...,...,...,...
1401,MNXM4232,MNXM1103432,chebi:72682,CHEBI:62840 ... CHEBI:72682 ... SLM:000000650 ...,STTKJLVEXMKLNA-CQSZACIVSA-L,InChI=1S/C15H31O7P/c1-2-3-4-5-6-7-8-9-10-11-15...,CCCCCCCCCCCC(=O)OC[C@@H](O)COP([O-])([O-])=O,1-dodecanoyl-sn-glycerol 3-phosphate
1402,MNXM4232,MNXM1103433,hmdb:HMDB0062319,deprecated:MNXM4232 ... hmdb:HMDB0062319 ... h...,STTKJLVEXMKLNA-AWEZNQCLSA-L,InChI=1S/C15H31O7P/c1-2-3-4-5-6-7-8-9-10-11-15...,CCCCCCCCCCCC(=O)OC[C@H](O)COP([O-])([O-])=O,1-dodecanoyl-glycero-3-phosphate
1404,MNXM9,MNXM9,chebi:43474,CHEBI:18367 ... CHEBI:26020 ... CHEBI:26078 .....,NBIIXXVUZAFLBC-UHFFFAOYSA-L,"InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)/p-2",OP([O-])([O-])=O,phosphate
1409,MNXM6,MNXM738702,chebi:57783,CHEBI:16474 ... CHEBI:57783 ... bigg.metabolit...,ACFIXJIJDZMPPO-NNYOXOHSSA-J,InChI=1S/C21H30N7O17P3/c22-17-12-19(25-7-24-17...,NC(=O)C1=CN(C=CC1)[C@@H]1O[C@H](COP([O-])(=O)O...,NADPH


In [13]:
df_MNX = pd.DataFrame({"MNX ID" : list(set(MNX_IDs))})
df_MNX["InChi Code"] = ""
for ind in df_MNX.index:
    ID = df_MNX["MNX ID"][ind]
    help_df = df_MNX_mapping.loc[df_MNX_mapping["#query"] == ID]
    if len(help_df) > 0:
        df_MNX["InChi Code"][ind] = list(help_df["InChI"])[0]

df_MNX = df_MNX.loc[df_MNX["InChi Code"] != ""]
df_MNX

Unnamed: 0,MNX ID,InChi Code
1,MNXM40229,InChI=1S/C60H117O11P/c1-4-7-10-13-16-19-22-25-...
2,MNXM960,InChI=1S/C6H10NO4PS/c1-5-6(13-4-7-5)2-3-11-12(...
3,MNXM6543,InChI=1S/C127H203N9O52P2/c1-60(2)35-25-36-61(3...
4,MNXM1451,"InChI=1S/C28H50N8O18P3S/c1-28(2,23(41)26(42)31..."
5,MNXM49,InChI=1S/C10H15N5O13P2S/c11-8-5-9(13-2-12-8)15...
...,...,...
815,MNXM4926,InChI=1S/C33H58N7O18P3S/c1-4-5-6-7-8-9-10-11-2...
817,MNXM4503,InChI=1S/C81H150O17P2/c1-5-9-13-17-21-25-29-33...
818,MNXM73204,"InChI=1S/C5H12O3S/c1-2-3-4-5-9(6,7)8/h2-5H2,1H..."
820,MNXM162837,InChI=1S/C25H45N6O8.Fe/c1-21(32)29(37)18-9-3-6...


Update if reaction information is complete or not:

In [14]:
MNX_list = list(df_MNX["MNX ID"])

for ind in df_all_models.index:
    if df_all_models["complete"][ind]:
        metabolites = df_all_models["substrate KEGG CIDs"][ind] + df_all_models["product KEGG CIDs"][ind]
        if metabolites[0][0] == "M":
            for met in metabolites:
                if met not in MNX_list:
                    df_all_models["complete"][ind] = False
                    break            
                    
df_all_models.loc[df_all_models["complete"]]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


Unnamed: 0,BiGG ID,KEGG IDs,ECs,substrates,products,substrate KEGG CIDs,product KEGG CIDs,complete
0,23PDE2pp,[R03538],[3.1.4.16],"[23cump_p, h2o_p]","[3ump_p, h_p]","[C02355, C00001, C01328]","[C01368, C00080]",True
1,23PDE4pp,[R03929],[3.1.4.16],"[23ccmp_p, h2o_p]","[3cmp_p, h_p]","[C02354, C00001, C01328]","[C05822, C00080]",True
2,23PDE7pp,[R03537],[3.1.4.16],"[23camp_p, h2o_p]","[3amp_p, h_p]","[C02353, C00001, C01328]","[C01367, C00080]",True
3,23PDE9pp,[R05135],[3.1.4.16],"[23cgmp_p, h2o_p]","[3gmp_p, h_p]","[C06194, C00001, C01328]","[C06193, C00080]",True
4,2AGPEAT120,,,"[2agpe120_c, atp_c, ddca_c]","[amp_c, pe120_c, ppi_c]","[MNXM34808, MNXM3, MNXM162258]","[MNXM14, MNXM2858, MNXM11]",True
...,...,...,...,...,...,...,...,...
7523,VALt3pp,,,"[h_p, val__L_c]","[h_c, val__L_p]","[C00080, C16436, C00183]","[C00080, C16436, C00183]",True
7524,PACCOAE,,,"[h_c, nadph_c, o2_c, phaccoa_c]","[h2o_c, nadp_c, rephaccoa_c]","[C00080, C00005, C00007, C00582]","[C00001, C01328, C00006]",True
7526,HADPCOADH3,[R06941],"[1.1.1.35, 1.1.1.157]","[3hadpcoa_c, nad_c]","[h_c, nadh_c, oxadpcoa_c]","[C14145, C00003]","[C00080, C00004, C02232]",True
7527,PYROX,,,"[h_c, nadh_c, o2_c, ura_c]","[nad_c, uracp_c]","[C00080, C00004, C00007, C00106]","[C00003, C20231]",True


#### Adding reactions in backward direction

In [15]:
for ind in df_all_models.index:
    substrates, products = df_all_models["substrates"][ind], df_all_models["products"][ind]
    sub_CIDs, pro_CIDs = df_all_models["substrate KEGG CIDs"][ind], df_all_models["product KEGG CIDs"][ind]
    df_all_models = df_all_models.append(df_all_models.loc[ind], ignore_index = True)
    ind2 = list(df_all_models.index)[-1]
    df_all_models["substrates"][ind2], df_all_models["products"][ind2] = products, substrates
    df_all_models["substrate KEGG CIDs"][ind2], df_all_models["product KEGG CIDs"][ind2] = pro_CIDs, sub_CIDs
    df_all_models["BiGG ID"][ind2] =df_all_models["BiGG ID"][ind] + "_r"

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


## 2. Calculating reaction Fingerprints:

In [16]:
mol_folder = "C:\\Users\\alexk\\mol-files\\mol-files"
def get_reaction_site_smarts(metabolites):
    reaction_site = ""
    for met in metabolites:
        if met[0] == "C":
            KEGG_ID = met
            try:
                Smarts = Chem.MolToSmarts(Chem.MolFromMolFile(join(mol_folder,  KEGG_ID + '.mol')))
            except:
                raise TypeError
                
        elif met[0] == "M":
            InchiCode = list(df_MNX["InChi Code"].loc[df_MNX["MNX ID"] == met])[0]
            mol = Chem.inchi.MolFromInchi(InchiCode)
            if mol is not None:
                Smarts = Chem.MolToSmarts(mol)
            else:
                raise TypeError
        reaction_site = reaction_site + "." + Smarts
    return(reaction_site[1:])

def convert_fp_to_array(difference_fp_dict):
    fp = np.zeros(2048)
    for key in difference_fp_dict.keys():
        fp[key] = difference_fp_dict[key]
    return(fp)

In [17]:
df_all_models["structural_fp"] = ""
df_all_models["difference_fp"] = ""

for ind in df_all_models.index:
    if df_all_models["complete"][ind]:
        try:
            substrates = df_all_models["substrate KEGG CIDs"][ind]
            products = df_all_models["product KEGG CIDs"][ind]
            left_site = get_reaction_site_smarts(substrates)
            right_site = get_reaction_site_smarts(products)

            rxn_forward = AllChem.ReactionFromSmarts(left_site + ">>" + right_site)

            difference_fp = Chem.rdChemReactions.CreateDifferenceFingerprintForReaction(rxn_forward)
            difference_fp = convert_fp_to_array(difference_fp.GetNonzeroElements())
            structural_fp = Chem.rdChemReactions.CreateStructuralFingerprintForReaction(rxn_forward).ToBitString()

            df_all_models["structural_fp"][ind] = structural_fp
            df_all_models["difference_fp"][ind] = difference_fp
        except TypeError: 
            pass

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [18]:
df_all_models.to_pickle(join(datasets_dir, "reaction_data", "6_bigg_models_with_fingerprints.pkl"))

In [21]:
df_all_models.head()

Unnamed: 0,BiGG ID,KEGG IDs,ECs,substrates,products,substrate KEGG CIDs,product KEGG CIDs,complete,structural_fp,difference_fp
0,23PDE2pp,[R03538],[3.1.4.16],"[23cump_p, h2o_p]","[3ump_p, h_p]","[C02355, C00001, C01328]","[C01368, C00080]",True,1100100100000000000000100010000001000001111000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,23PDE4pp,[R03929],[3.1.4.16],"[23ccmp_p, h2o_p]","[3cmp_p, h_p]","[C02354, C00001, C01328]","[C05822, C00080]",True,1100100100000000000000100010000001000001111000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,23PDE7pp,[R03537],[3.1.4.16],"[23camp_p, h2o_p]","[3amp_p, h_p]","[C02353, C00001, C01328]","[C01367, C00080]",True,1100111100000000000000100010010001000001111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,23PDE9pp,[R05135],[3.1.4.16],"[23cgmp_p, h2o_p]","[3gmp_p, h_p]","[C06194, C00001, C01328]","[C06193, C00080]",True,1100111100000000000000110010010001000001111000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,2AGPEAT120,,,"[2agpe120_c, atp_c, ddca_c]","[amp_c, pe120_c, ppi_c]","[MNXM34808, MNXM3, MNXM162258]","[MNXM14, MNXM2858, MNXM11]",True,1100110100000000000000110110010001000001111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


### Mapping Brenda reactions to BiGG reactions via reaction Fingerprints:

In [7]:
df_all_models = pd.read_pickle(join(datasets_dir, "reaction_data", "6_bigg_models_with_fingerprints.pkl"))

In [8]:
brenda_df = pd.read_pickle(join(datasets_dir , "brenda_data" , "brenda_df_kcat_with_KM_curated_brenda.pkl"))
brenda_df.loc[brenda_df["structural fp"] != ""]

Unnamed: 0,index,ID,EC,kcat,ORGANISM,PMID,kcat_new,correct reaction ID,comment,new,...,structural fp,difference fp,small_difference fp,reaction ID,KM_f,KM_b,KM_f_min,KM_b_min,KM_f_max,KM_b_max
0,0.0,1152.0,1.1.1.49,1120,Pseudomonas fluorescens,1257.0,-,1878,,False,...,1100111100000001001000110110010001000101111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",1878,-0.630582,-1.252412,-1.643362,-1.656518,0.285491,-0.848306
1,1.0,1146.0,1.1.1.49,1380,Pseudomonas fluorescens,1257.0,-,1879,,False,...,1100111100000001001000110110010001000101111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",1879,-0.799228,-1.237316,-1.064580,-1.701240,-0.533876,-0.773392
2,3.0,444.0,1.1.1.203,26.7,Pseudomonas syringae,2471.0,27,768,,False,...,1100111100000001001000110110010001000101111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",768,-0.233049,-0.985119,-0.622818,-1.294502,0.512625,-0.675736
3,5.0,6139.0,2.5.1.54,122,Escherichia coli,9387.0,-,12737,,False,...,1100000000000000000000000000000001000000111000...,"[0.0, 0.0, 0.0, 0.0, -10.0, 0.0, 0.0, 0.0, 0.0...","[0.0, 0.0, 0.0, -1.0, 0.0, 0.0, 0.0, 1.0, 0.0,...",12737,-0.045926,-0.160133,-0.590532,-0.643110,0.867673,0.322845
4,7.0,3933.0,1.5.1.3,4,Lactobacillus casei,10886.0,-,7897,,False,...,1101111100000001001000110110010001001111111100...,"[0.0, 0.0, 0.0, 0.0, -10.0, 0.0, 0.0, 0.0, 0.0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",7897,-1.877653,-1.717308,-1.963772,-1.910283,-1.791535,-1.524333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10529,,13171.0,4.2.3.25,0.044,Actinidia arguta,100528958.0,,29385,,False,...,1100000100000000000000000000000001000000111000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",29385,-1.325613,-0.911095,-2.335188,-0.940920,-0.308836,-0.869615
10530,,7697.0,3.1.1.101,2.3,Thermomonospora curvata,254050803.0,,15373,,False,...,1100110100000000000000110010000001000011101000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...",15373,-0.446542,-0.528029,-0.907334,-0.852626,0.014251,-0.203432
10531,,7698.0,3.1.1.101,12.4,Thermomonospora curvata,254050803.0,,15373,,False,...,1100110100000000000000110010000001000011101000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...",15373,-0.446542,-0.528029,-0.907334,-0.852626,0.014251,-0.203432
10532,,11354.0,3.5.4.10,0.48,Thermococcus kodakarensis,316224277.0,,26450,,False,...,1100100100000000000000000010000001000001111000...,"[0.0, 0.0, 0.0, 0.0, -10.0, 0.0, 0.0, 0.0, 0.0...","[0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, -1.0, 0.0,...",26450,-1.270451,-0.694950,-1.299281,-1.378661,-1.241621,0.187731


In [9]:
df_BiGG_models = pd.read_pickle(join(datasets_dir, "reaction_data", "6_bigg_models_with_fingerprints.pkl"))


brenda_df["BiGG acc"] = np.nan
brenda_df["BiGG ID"] = np.nan

for ind in brenda_df.index:
    FP = brenda_df["structural fp"][ind]
    if FP != "":
        help_df = df_BiGG_models.loc[df_BiGG_models["structural_fp"]  == FP]
        if len(help_df) > 0:
            brenda_df["BiGG ID"][ind] = list(help_df["BiGG ID"])[0]
            brenda_df["BiGG acc"][ind] = 1.0
            
brenda_df.loc[~pd.isnull(brenda_df["BiGG ID"])]

Unnamed: 0,index,ID,EC,kcat,ORGANISM,PMID,kcat_new,correct reaction ID,comment,new,...,small_difference fp,reaction ID,KM_f,KM_b,KM_f_min,KM_b_min,KM_f_max,KM_b_max,BiGG acc,BiGG ID


#### Those reactions that couldn't be mapped yet: Looking for similar reactions:

In [13]:
len(brenda_df["structural fp"][ind])

3276

In [20]:
import time
from rdkit import DataStructs

df_BiGG_models = df_BiGG_models.loc[df_BiGG_models["structural_fp"] != ""]
df_BiGG_models.reset_index(drop = True, inplace = True)
FP_list = list(df_BiGG_models["structural_fp"])

brenda_df = brenda_df.loc[brenda_df["structural fp"] != ""]

for ind in brenda_df.index:
    if pd.isnull(brenda_df["BiGG ID"][ind]):
        scores = np.zeros(len(FP_list))
        fp1 = brenda_df["structural fp"][ind]
        fp1 = DataStructs.cDataStructs.CreateFromBitString(fp1[:3276])
        for i, fp in enumerate(FP_list):
            fp2 =  DataStructs.cDataStructs.CreateFromBitString(fp[:3276])
            scores[i] = DataStructs.cDataStructs.TanimotoSimilarity(fp1, fp2)

        max_i = np.argmax(scores)
        brenda_df["BiGG ID"][ind] = df_BiGG_models["BiGG ID"][max_i]
        brenda_df["BiGG acc"][ind] = scores[max_i]
        print(ind, scores[max_i])

    if ind % 500 == 0:
        print(ind)
        time.sleep(1)
        brenda_df.to_pickle(join(datasets_dir,"brenda_data", "brenda_kcat_with_BiGG_IDs_V2.pkl"))

0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


17 0.6362725450901804
21 0.8
22 0.9361421988150099
23 0.650875386199794
24 0.7747368421052632
25 0.9987012987012988
26 0.9987012987012988
27 0.9047058823529411
29 0.701098901098901
30 0.7142857142857143
31 0.7142857142857143
32 0.7142857142857143
33 0.7142857142857143
35 0.784452296819788
36 0.9487179487179487
37 0.9631901840490797
41 0.8
42 0.8604651162790697
43 1.0
44 0.9867938371239912
45 0.994535519125683
46 0.7119565217391305
47 0.9647283126787417
48 1.0
49 1.0
50 0.5909980430528375
51 0.7790432801822323
52 0.6550522648083623
53 0.7011173184357542
54 0.9155701754385965
55 0.8746113989637305
56 0.725609756097561
57 0.725609756097561
58 0.8746113989637305
59 0.9155701754385965
60 0.686055182699478
62 0.6911764705882353
64 1.0
65 0.8470790378006873
66 0.953998584571833
67 0.953998584571833
68 0.8605914302957152
69 0.8605914302957152
71 0.8746113989637305
72 0.9155701754385965
73 0.8746113989637305
74 0.6467013888888888
75 0.8746113989637305
76 0.9155701754385965
77 0.6467013888888888

573 0.6851683348498635
574 0.8746113989637305
575 0.9155701754385965
576 0.9083665338645418
577 0.9768451519536903
578 0.9985326485693323
579 0.9985326485693323
580 0.9985326485693323
581 0.8093525179856115
582 0.8093525179856115
583 0.9988425925925926
584 0.9783845278725825
585 1.0
586 0.9990636704119851
587 0.9992857142857143
588 0.9993031358885017
589 0.9408919935518538
590 1.0
591 0.8719081272084805
592 0.8746113989637305
593 0.6467013888888888
594 0.9155701754385965
595 0.7981481481481482
596 0.7981481481481482
599 1.0
600 0.7981651376146789
601 1.0
602 0.725609756097561
603 0.9155701754385965
604 0.8746113989637305
605 1.0
606 0.9614305750350631
607 0.9371155160628845
608 1.0
609 1.0
610 0.9846827133479212
611 0.9985326485693323
612 0.9769452449567724
613 1.0
614 0.9561586638830898
615 1.0
616 0.6914893617021277
617 0.7026431718061674
618 0.6069958847736625
619 0.994535519125683
620 1.0
621 0.9803788903924222
622 0.8746113989637305
623 0.6851683348498635
624 0.9155701754385965
62

1054 0.983125458547322
1055 0.766329346826127
1056 0.766329346826127
1057 1.0
1058 0.9168831168831169
1059 0.9990393852065321
1060 0.680373831775701
1061 0.680373831775701
1062 0.680373831775701
1063 0.680373831775701
1064 0.680373831775701
1065 0.680373831775701
1066 0.680373831775701
1067 0.8842257597684515
1073 0.6503667481662592
1074 0.6503667481662592
1075 0.6503667481662592
1076 0.6503667481662592
1077 1.0
1081 0.8610406091370558
1082 1.0
1083 0.744874715261959
1084 1.0
1090 0.8801089918256131
1091 0.7436159346271706
1092 0.8801089918256131
1093 0.7436159346271706
1094 1.0
1096 0.711139896373057
1097 0.7232142857142857
1098 0.7232142857142857
1101 0.9992576095025983
1102 0.9285714285714286
1103 0.9285714285714286
1104 0.9853963838664812
1105 0.9853963838664812
1106 0.8663793103448276
1110 0.8785388127853881
1111 0.7181818181818181
1112 0.8406139315230224
1113 0.9992619926199262
1114 0.9875091844232182
1115 0.9992619926199262
1118 0.9985239852398524
1120 0.9985239852398524
1121 0.

1538 0.9650205761316872
1539 0.9988974641675854
1540 0.9988974641675854
1541 0.9745934959349594
1542 0.9989177489177489
1543 0.9989711934156379
1544 1.0
1545 0.9794326241134752
1546 1.0
1547 0.744874715261959
1549 0.9713983050847458
1550 0.7827004219409283
1551 0.8409785932721713
1552 0.9003623188405797
1553 0.9898107714701602
1554 0.8111298482293423
1555 0.8111298482293423
1556 0.7925801011804384
1557 0.8138392857142858
1558 0.6434977578475336
1559 0.7525083612040134
1560 0.9985326485693323
1561 1.0
1562 1.0
1563 1.0
1564 1.0
1565 1.0
1567 0.9945945945945946
1568 0.9945945945945946
1569 0.984427894380501
1570 0.9901892081289418
1571 0.9901892081289418
1572 0.9944095038434662
1573 0.9605710401087696
1574 0.984427894380501
1575 0.9806629834254144
1576 0.9908835904628331
1577 0.9895615866388309
1578 0.9875173370319001
1579 0.801980198019802
1580 0.9864253393665159
1581 0.9267241379310345
1592 0.6617466174661747
1593 0.9990592662276576
1594 1.0
1595 0.6666666666666666
1596 0.6666666666666

2026 0.9987012987012988
2027 0.9987012987012988
2028 0.5816326530612245
2029 1.0
2030 0.8512861736334405
2032 0.7202216066481995
2034 0.7365439093484419
2036 0.6088516746411483
2037 0.9683720930232558
2040 0.946360153256705
2041 0.9909533750869868
2042 0.9909533750869868
2043 0.9771784232365145
2044 0.8997912317327766
2045 0.7046178343949044
2049 0.9824945295404814
2050 0.9718614718614719
2052 1.0
2053 1.0
2054 1.0
2055 0.936145952109464
2056 0.6914893617021277
2057 0.9383259911894273
2058 1.0
2059 0.9362637362637363
2060 0.7928348909657321
2061 1.0
2062 0.9325842696629213
2063 0.7454819277108434
2064 0.7890222984562607
2065 0.720569210866753
2066 0.8512861736334405
2069 0.6298342541436464
2070 0.631004366812227
2071 0.6239669421487604
2072 0.5176470588235295
2073 1.0
2074 1.0
2075 0.954653937947494
2078 0.6160714285714286
2079 0.9918200408997955
2080 0.9650205761316872
2081 0.8894806924101198
2082 0.9182389937106918
2083 0.8894806924101198
2084 0.9182389937106918
2085 0.62056737588652

2546 1.0
2547 1.0
2550 0.8655959425190194
2551 0.8885658914728682
2552 0.8805418719211823
2553 0.9660326086956522
2554 0.9951923076923077
2555 0.9366515837104072
2557 1.0
2560 0.9637010676156583
2562 0.9911176905995559
2563 0.9911176905995559
2564 0.9875091844232182
2565 0.9875091844232182
2574 0.9672935404742437
2575 1.0
2576 1.0
2579 0.9662222222222222
2580 0.9992181391712275
2582 0.9427312775330396
2583 0.5915492957746479
2584 0.5352112676056338
2585 0.9987613542526838
2586 0.9987613542526838
2587 0.9987613542526838
2588 1.0
2589 1.0
2595 0.8482142857142857
2596 0.9947916666666666
2597 0.9977611940298508
2601 0.7544316996871742
2602 0.9683720930232558
2603 0.6794258373205742
2604 0.998025666337611
2605 0.9683720930232558
2606 0.9683720930232558
2607 0.725609756097561
2608 0.8746113989637305
2609 0.9155701754385965
2610 0.6938020351526365
2611 0.6711956521739131
2612 0.826036866359447
2613 0.9874476987447699
2614 0.9881367759944173
2615 0.9881367759944173
2616 0.9874476987447699
2617

3097 0.6914893617021277
3098 0.9993011879804332
3099 0.9993011879804332
3100 0.8928571428571429
3101 0.7591549295774648
3102 0.7591549295774648
3103 1.0
3104 1.0
3105 1.0
3106 0.8601694915254238
3107 0.996268656716418
3108 0.8598901098901099
3109 0.9964028776978417
3110 0.9968553459119497
3111 0.801980198019802
3112 0.6472945891783567
3113 0.833810888252149
3114 0.6982758620689655
3115 0.7653061224489796
3116 0.6944971537001897
3117 0.8364779874213837
3118 0.8364779874213837
3119 1.0
3120 1.0
3121 1.0
3122 1.0
3123 1.0
3125 0.9859055673009162
3126 0.9872971065631616
3127 0.9759316770186336
3128 0.9759316770186336
3129 0.9507202426080363
3130 0.9507202426080363
3131 0.8111298482293423
3132 0.8802816901408451
3133 0.65832531280077
3134 0.9155701754385965
3135 0.7174541947926711
3136 0.8746113989637305
3137 0.725609756097561
3138 0.8746113989637305
3139 0.945360824742268
3140 0.7119341563786008
3141 0.7194513715710723
3142 1.0
3143 0.9970326409495549
3144 0.9875091844232182
3145 0.8317757

3588 1.0
3589 0.8746113989637305
3591 0.6851683348498635
3592 0.9155701754385965
3593 0.725609756097561
3595 0.9757785467128027
3596 0.9915611814345991
3597 0.7712328767123288
3598 0.7712328767123288
3602 0.665943600867679
3603 0.9986043265875785
3604 1.0
3605 0.9908835904628331
3615 0.9683720930232558
3616 0.9683720930232558
3617 0.6794258373205742
3618 0.6794258373205742
3619 0.6794258373205742
3620 0.6794258373205742
3621 0.9964763918252291
3622 0.9992932862190813
3623 0.951417004048583
3624 0.9899665551839465
3625 0.9992912827781715
3626 0.9961202715809894
3627 0.9961202715809894
3628 0.9991416309012876
3629 0.9991416309012876
3630 0.9991416309012876
3631 0.9991416309012876
3632 0.9991416309012876
3633 0.9991416309012876
3634 0.9291101055806938
3635 0.9291101055806938
3636 0.9588014981273408
3637 0.6268656716417911
3638 0.9588014981273408
3639 0.9444807315480078
3641 1.0
3642 1.0
3643 0.7606060606060606
3646 0.9874213836477987
3647 0.9771151178918169
3648 0.9933431952662722
3649 0.

4064 1.0
4065 1.0
4066 0.7432432432432432
4067 0.9334600760456274
4068 0.9951923076923077
4069 0.9951923076923077
4070 0.8545887961859356
4071 0.8545887961859356
4072 0.946360153256705
4073 0.9833822091886608
4074 0.9735294117647059
4075 1.0
4076 1.0
4077 1.0
4078 0.5741176470588235
4079 0.6245210727969349
4080 0.5962732919254659
4081 0.6434634974533107
4082 0.5741176470588235
4083 0.6183953033268101
4084 0.6403669724770642
4085 0.7689295039164491
4086 0.6507042253521127
4087 0.5893223819301848
4092 0.6403669724770642
4093 0.9992932862190813
4096 0.8746113989637305
4097 0.9155701754385965
4098 0.6851683348498635
4099 0.7174541947926711
4100 0.725609756097561
4101 0.7700831024930748
4102 0.6427870461236507
4103 0.6427870461236507
4104 1.0
4105 1.0
4106 1.0
4107 1.0
4114 0.9989270386266095
4115 0.9987437185929648
4116 0.8253768844221105
4117 0.9962335216572504
4118 0.8625827814569537
4119 0.8625827814569537
4120 0.9962335216572504
4121 1.0
4122 0.8253768844221105
4123 0.9987437185929648


4534 1.0
4535 0.9831501831501831
4536 0.9992542878448919
4537 1.0
4538 0.9970282317979198
4540 0.8215488215488216
4541 0.9966666666666667
4542 1.0
4546 0.8617886178861789
4547 0.8617886178861789
4548 0.8617886178861789
4549 0.7797619047619048
4550 0.8181818181818182
4551 0.7555555555555555
4552 1.0
4553 0.8477508650519031
4554 0.8625827814569537
4555 0.9057301293900185
4556 0.8477508650519031
4557 0.8625827814569537
4558 0.8477508650519031
4559 0.9991273996509599
4560 0.744874715261959
4561 0.7925801011804384
4562 0.8111298482293423
4563 0.8111298482293423
4564 0.7403189066059226
4565 0.744874715261959
4566 1.0
4567 0.989067055393586
4568 0.9883381924198251
4569 0.9970326409495549
4570 0.9977711738484398
4571 1.0
4572 0.9985141158989599
4573 0.9985141158989599
4574 0.9985141158989599
4575 0.9985141158989599
4577 0.9970326409495549
4578 1.0
4579 1.0
4580 1.0
4581 1.0
4582 1.0
4583 1.0
4584 1.0
4585 0.9977989728539985
4586 0.9977989728539985
4587 1.0
4588 1.0
4589 0.6914893617021277
4590

5020 1.0
5022 0.9640718562874252
5023 1.0
5024 0.9356110381077529
5025 0.8085969180859692
5026 0.8085969180859692
5027 0.9356110381077529
5028 0.9703328509406657
5029 0.9703328509406657
5030 0.701098901098901
5031 1.0
5032 0.8874538745387454
5034 0.9641509433962264
5035 0.8178160919540229
5036 0.8178160919540229
5037 0.8675373134328358
5038 0.8675373134328358
5039 0.8675373134328358
5040 0.8675373134328358
5041 0.8675373134328358
5042 0.8675373134328358
5043 0.8675373134328358
5044 0.8675373134328358
5045 0.8675373134328358
5046 0.8675373134328358
5047 0.8675373134328358
5048 0.8675373134328358
5051 0.6431095406360424
5052 1.0
5053 1.0
5054 0.9674721189591078
5055 0.9674721189591078
5056 0.9674721189591078
5057 0.9674721189591078
5058 0.6739606126914661
5060 0.7424749163879598
5061 0.7424749163879598
5063 0.9678638941398866
5064 1.0
5066 0.8744075829383886
5067 0.9974489795918368
5068 0.8744075829383886
5069 0.9974489795918368
5070 0.8744075829383886
5071 0.751412429378531
5072 0.99808

5479 0.9951923076923077
5480 0.9951923076923077
5481 0.6155717761557178
5482 1.0
5483 0.7057710501419111
5484 0.7915904936014625
5485 0.7915904936014625
5486 0.6939721792890263
5487 0.9990234375
5488 0.9990234375
5489 1.0
5491 0.7
5495 0.6914893617021277
5496 0.8151515151515152
5497 0.67
5498 0.9974025974025974
5499 0.9626436781609196
5500 0.6500541711809318
5500
5501 0.7406523468575974
5502 0.6855225311601151
5503 0.6669912366114897
5504 0.7226502311248074
5505 0.7483870967741936
5506 0.6607387140902873
5507 0.6342465753424658
5508 1.0
5509 1.0
5510 1.0
5511 1.0
5512 1.0
5513 1.0
5514 1.0
5516 0.6771978021978022
5517 0.7010526315789474
5519 0.9833795013850416
5520 0.7693798449612403
5521 0.7693798449612403
5522 0.6172839506172839
5523 0.8130252100840336
5524 0.6811320754716981
5526 0.9809236947791165
5529 0.8595166163141994
5530 0.986870897155361
5531 0.725609756097561
5532 0.8746113989637305
5533 0.6851683348498635
5534 0.7634228187919463
5535 0.7347972972972973
5536 0.76013513513513

5966 0.744874715261959
5967 0.8111298482293423
5968 1.0
5969 1.0
5970 0.8426483233018057
5971 0.8426483233018057
5972 0.9707317073170731
5973 0.9707317073170731
5974 0.9707317073170731
5975 0.9707317073170731
5976 0.9707317073170731
5985 0.9935851746258019
5987 0.830945558739255
5988 0.830945558739255
5989 0.6847457627118644
5990 0.9844606946983546
5991 0.9960159362549801
5993 1.0
5995 0.8177257525083612
5996 0.9613259668508287
5997 0.6292134831460674
5998 0.6750313676286073
5999 0.6750313676286073
6000 0.665943600867679
6000
6001 0.8791666666666667
6002 0.8791666666666667
6003 0.9393939393939394
6004 0.9393939393939394
6005 1.0
6006 1.0
6007 0.997212543554007
6011 0.9989384288747346
6012 0.9989384288747346
6013 0.9916666666666667
6014 0.9916666666666667
6015 1.0
6016 1.0
6017 1.0
6018 0.9871152469577666
6019 0.9871152469577666
6020 1.0
6021 0.9155701754385965
6022 0.7244258872651357
6023 0.6680584551148225
6024 0.8069620253164557
6025 0.65625
6027 0.65625
6028 0.7681159420289855
6029 

6462 0.6769436997319035
6463 0.9822595704948646
6464 0.9822595704948646
6473 0.9931506849315068
6474 0.9931506849315068
6475 0.751412429378531
6476 0.751412429378531
6477 0.9931506849315068
6478 0.751412429378531
6479 0.7142857142857143
6480 0.7142857142857143
6482 0.992816091954023
6483 0.992816091954023
6484 0.9004366812227074
6485 0.9004366812227074
6486 0.664364640883978
6487 0.7703984819734345
6488 0.7703984819734345
6489 0.8854166666666666
6490 0.7660550458715596
6491 0.7660550458715596
6492 0.7660550458715596
6493 1.0
6494 1.0
6495 0.9686520376175548
6496 0.9686520376175548
6497 0.9686520376175548
6498 0.9686520376175548
6499 0.9979181124219292
6500 0.9978070175438597
6500
6501 0.9979181124219292
6502 0.9978070175438597
6503 0.7181818181818181
6504 0.7181818181818181
6506 0.7181818181818181
6507 1.0
6508 0.8027210884353742
6509 0.8172231985940246
6517 0.9986043265875785
6518 0.986703988803359
6519 0.9853249475890985
6521 0.8894806924101198
6522 0.8894806924101198
6524 0.99633162

6925 0.9957716701902748
6926 0.9957716701902748
6927 0.9957716701902748
6928 0.9758812615955473
6929 0.9990592662276576
6930 0.9758812615955473
6931 0.9758812615955473
6932 0.9990592662276576
6933 0.9990592662276576
6934 0.9990215264187867
6935 0.904639175257732
6936 0.9957983193277311
6937 0.9759316770186336
6939 0.9986928104575163
6940 0.8446601941747572
6941 0.875
6942 0.9444444444444444
6943 0.8907103825136612
6944 1.0
6945 0.8907103825136612
6946 0.9444444444444444
6947 0.9444444444444444
6948 0.9444444444444444
6949 0.8446601941747572
6950 1.0
6951 0.9983792544570502
6952 0.8111298482293423
6953 0.9452296819787986
6954 0.9401888772298006
6955 0.9401888772298006
6956 1.0
6957 0.9945945945945946
6958 0.8057409879839786
6959 0.8057409879839786
6960 1.0
6961 1.0
6962 0.9173497267759563
6963 0.9173497267759563
6964 0.9173497267759563
6965 0.9733044733044733
6966 0.9611032531824611
6967 0.9173497267759563
6968 0.9173497267759563
6969 0.9733044733044733
6970 0.9611032531824611
6971 0.91

7382 1.0
7383 0.881283422459893
7384 1.0
7385 0.9175170068027211
7387 0.9941747572815534
7388 0.9983792544570502
7389 0.65625
7391 0.9839055793991416
7392 0.9818577648766328
7393 0.9818577648766328
7394 0.9932614555256065
7397 1.0
7399 0.5608591885441527
7400 0.9751895244658856
7401 1.0
7402 0.9789702683103698
7403 1.0
7404 1.0
7405 0.9751895244658856
7406 1.0
7407 0.9789702683103698
7408 0.752258064516129
7409 1.0
7410 1.0
7412 0.8705962059620597
7413 0.881664499349805
7414 0.8688193743693239
7415 1.0
7416 1.0
7417 1.0
7418 0.8033980582524272
7419 0.8033980582524272
7420 0.8033980582524272
7421 1.0
7422 0.953998584571833
7423 0.9904761904761905
7424 0.9904761904761905
7425 0.9904761904761905
7426 0.9722991689750693
7427 0.8423383525243578
7428 0.8423383525243578
7429 0.8423383525243578
7430 0.8423383525243578
7431 1.0
7433 0.8319327731092437
7434 0.8319327731092437
7435 0.8319327731092437
7436 0.8319327731092437
7437 0.8319327731092437
7438 0.7752293577981652
7439 0.9989615784008308
7

7853 1.0
7854 0.9992932862190813
7855 1.0
7856 0.8547418967587035
7857 0.797085201793722
7858 0.7733188720173536
7859 0.7777777777777778
7860 0.8472
7863 0.9598393574297188
7864 1.0
7865 0.9416666666666667
7866 1.0
7867 0.9828326180257511
7868 1.0
7869 0.9625468164794008
7870 0.9457364341085271
7871 0.9598393574297188
7872 0.9606299212598425
7873 1.0
7874 1.0
7875 0.8691588785046729
7876 0.9145299145299145
7877 0.8691588785046729
7878 1.0
7880 0.988795518207283
7881 0.688783570300158
7882 0.688783570300158
7883 0.688783570300158
7884 0.688783570300158
7885 0.688783570300158
7886 0.688783570300158
7887 0.688783570300158
7888 0.688783570300158
7889 0.688783570300158
7890 0.751412429378531
7892 0.8687648456057007
7893 0.8474470734744707
7894 0.7959401709401709
7895 0.979381443298969
7896 0.979381443298969
7897 0.979381443298969
7898 0.7959401709401709
7899 0.7959401709401709
7900 0.9871794871794872
7901 0.6303501945525292
7902 0.6854460093896714
7903 0.6854460093896714
7905 0.685446009389

8328 0.925764192139738
8329 0.925764192139738
8330 0.925764192139738
8331 1.0
8332 0.77
8333 0.77
8334 0.9474734042553191
8335 0.9645776566757494
8336 0.9425363276089829
8337 0.8019559902200489
8338 0.7769607843137255
8339 0.7514792899408284
8340 0.8102409638554217
8341 0.7894736842105263
8342 0.8025974025974026
8343 0.992816091954023
8344 0.648936170212766
8345 0.9744952178533475
8346 0.975531914893617
8347 0.9992721979621543
8348 0.9992721979621543
8349 0.9990253411306043
8350 0.8785276073619632
8351 0.7700831024930748
8352 0.7516778523489933
8353 0.555858310626703
8354 0.9222222222222223
8355 0.9053857350800583
8356 0.8111298482293423
8357 0.7264150943396226
8358 0.9838591342626559
8359 1.0
8360 0.7591549295774648
8361 0.7591549295774648
8362 1.0
8363 1.0
8364 0.8642384105960265
8365 0.8642384105960265
8366 1.0
8367 1.0
8368 1.0
8369 1.0
8370 1.0
8372 0.9993108201240524
8373 1.0
8374 1.0
8375 1.0
8377 1.0
8378 1.0
8379 1.0
8381 1.0
8383 0.9993108201240524
8387 1.0
8388 1.0
8389 1.0


8795 0.776595744680851
8796 1.0
8797 0.81146408839779
8798 0.6855225311601151
8799 0.7406523468575974
8800 0.7864238410596026
8801 0.7264150943396226
8803 0.8111298482293423
8804 0.8229755178907722
8806 0.7048903878583473
8810 1.0
8811 1.0
8812 1.0
8813 1.0
8815 1.0
8817 1.0
8818 0.9381084840055632
8819 0.9381084840055632
8820 0.9841040462427746
8821 0.9842249657064472
8822 0.9992927864214993
8823 0.9618008185538881
8824 0.9076923076923077
8825 0.9881201956673655
8826 0.9835390946502057
8829 0.9901380670611439
8830 0.9881889763779528
8831 0.9920948616600791
8832 0.9990636704119851
8833 0.6985111662531017
8834 1.0
8835 0.9990592662276576
8836 0.8785276073619632
8837 0.7752293577981652
8838 0.7735849056603774
8839 0.8057553956834532
8840 0.7243816254416962
8842 0.9990636704119851
8843 0.9990636704119851
8844 0.9990636704119851
8845 0.9990636704119851
8846 0.8319327731092437
8847 0.9371155160628845
8848 1.0
8849 1.0
8850 1.0
8851 1.0
8852 0.8825065274151436
8853 0.9875827814569537
8854 0.

9265 0.9974025974025974
9266 0.9974025974025974
9267 0.9974025974025974
9268 0.9974025974025974
9269 0.9974025974025974
9270 0.9974025974025974
9271 0.9974025974025974
9272 0.9974025974025974
9273 0.9974025974025974
9274 0.9974025974025974
9275 0.9974025974025974
9276 0.9974025974025974
9277 0.9974025974025974
9278 0.9974025974025974
9279 0.9974025974025974
9280 0.9974025974025974
9281 0.9974025974025974
9282 0.9974025974025974
9283 0.9974025974025974
9284 0.9974025974025974
9285 0.9974025974025974
9286 0.9974025974025974
9287 0.9974025974025974
9288 0.9974025974025974
9289 0.9974025974025974
9290 0.9974025974025974
9291 0.9974025974025974
9292 0.9974025974025974
9293 0.9974025974025974
9294 0.9974025974025974
9295 0.9974025974025974
9296 0.9974025974025974
9297 0.9974025974025974
9298 0.9974025974025974
9299 0.9974025974025974
9300 0.9974025974025974
9301 0.9974025974025974
9302 0.9974025974025974
9303 0.9974025974025974
9304 0.9974025974025974
9305 0.9974025974025974
9306 0.997402597

9627 0.8111298482293423
9628 0.8111298482293423
9629 0.7925801011804384
9630 0.7925801011804384
9631 0.8111298482293423
9632 0.7925801011804384
9633 0.8111298482293423
9634 0.8111298482293423
9635 0.8111298482293423
9636 0.7925801011804384
9637 0.7925801011804384
9638 0.7925801011804384
9639 0.7647058823529411
9640 0.7647058823529411
9641 0.8450980392156863
9642 0.958041958041958
9643 0.8825065274151436
9644 0.8825065274151436
9645 0.8853333333333333
9646 0.7398601398601399
9647 0.7398601398601399
9648 0.9354838709677419
9649 0.9354838709677419
9650 0.958041958041958
9651 0.8853333333333333
9652 1.0
9653 1.0
9654 0.9992972593113141
9655 0.9948415622697127
9656 0.9948415622697127
9657 1.0
9658 0.5834710743801653
9659 0.8608247422680413
9660 0.5867768595041323
9661 0.6867671691792295
9662 0.6246105919003115
9663 0.8608247422680413
9664 0.6104294478527608
9665 0.6867671691792295
9666 0.6867671691792295
9667 0.6151419558359621
9668 0.7718223583460949
9669 0.8608247422680413
9670 1.0
9671 0

10041 0.9588014981273408
10042 0.9565217391304348
10043 0.9565217391304348
10044 0.8064516129032258
10045 1.0
10046 0.6088516746411483
10047 0.6088516746411483
10048 0.6949602122015915
10049 0.6754385964912281
10050 0.7603978300180831
10051 0.7603978300180831
10052 0.6754385964912281
10053 0.9952830188679245
10054 0.9952830188679245
10057 0.9861399861399861
10058 0.7200791295746786
10059 0.7957957957957958
10060 0.8267716535433071
10061 1.0
10062 0.8164021164021164
10064 0.9049951028403526
10065 0.9163179916317992
10067 0.8245901639344262
10068 0.9957983193277311
10069 0.9941860465116279
10070 1.0
10071 0.6926147704590818
10072 0.8540983606557377
10073 0.7416107382550335
10074 0.7235494880546075
10075 1.0
10076 1.0
10077 0.9582027168234065
10078 0.9582027168234065
10079 0.9582027168234065
10080 0.9592050209205021
10081 0.9592050209205021
10083 0.8111298482293423
10084 0.5585585585585585
10085 0.6828703703703703
10086 0.6794258373205742
10087 0.683083511777302
10088 0.6830808080808081
1

10445 0.986870897155361
10446 0.953998584571833
10447 0.953998584571833
10448 0.953998584571833
10450 0.7427258805513017
10453 0.8984771573604061
10457 0.7537537537537538
10461 0.6947890818858561
10462 0.9421965317919075
10463 0.8984771573604061
10467 0.996875
10468 0.7113636363636363
10469 0.9613492621222769
10470 0.8205128205128205
10471 0.8205128205128205
10484 0.8695208970438328
10485 0.6592082616179001
10486 0.6592082616179001
10487 0.6592082616179001
10488 0.8695208970438328
10489 0.8695208970438328
10490 0.9612159329140462
10491 0.9612159329140462
10492 0.980188679245283
10493 0.9428571428571428
10494 0.980188679245283
10495 0.9707317073170731
10496 0.9991803278688525
10497 0.9991803278688525
10498 0.996875
10499 0.953998584571833
10500 0.953998584571833
10500
10501 0.9596662030598053
10502 0.953998584571833
10503 0.953998584571833
10504 1.0
10505 1.0
10506 1.0
10507 0.7194719471947195
10508 0.9794326241134752
10509 0.9808781869688386
10510 0.9561586638830898
10511 1.0
10512 1.0

In [21]:
brenda_df.to_pickle(join(datasets_dir,"brenda_data", "brenda_df_kcat_with_BIGG_ID_curated_brenda.pkl"))

In [22]:
len(brenda_df.loc[brenda_df["BiGG acc"] > 0.9]), len(brenda_df.loc[brenda_df["BiGG acc"] > 0.8]), len(brenda_df)

(5035, 6441, 8712)

In [8]:
brenda_df

Unnamed: 0,Sequence,EC,ORGANISM,Unirep,ESM1b,structural fp,difference fp,reaction ID,log10_kcat,PMID,ID,curated,KM_f,KM_b,KM_f_min,KM_b_min,KM_f_max,KM_b_max,BiGG acc,BiGG ID
0,AHNIVLYTGAKMPILGLGTWKSPPGKVTEAVKVAIDLGYRHIDCAH...,1.1.1.21,Bos taurus,"[0.025203342000000004, 0.30913870000000004, 0....","[-0.10840322, 0.16640322, 0.059083, 0.06476542...",1100111100000001001000110110010001000111111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",844,1.462398,25528584.0,472.0,False,-0.532609,-0.204926,-1.770209,-1.193613,0.679357,0.719127,0.994358,MEHLER
1,AHNIVLYTGAKMPILGLGTWKSPPGKVTEAVKVAIDLGYRHIDCAH...,1.1.1.21,Bos taurus,"[0.025203342000000004, 0.30913870000000004, 0....","[-0.10840322, 0.16640322, 0.059083, 0.06476542...",1100111100000001001000110110010001000111111100...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",847,1.477121,25528584.0,475.0,False,-0.491944,-0.679612,-1.770209,-1.193613,0.740868,-0.229058,0.994358,MEHLER
2,ANPYERGPNPTDALLEARSGPFSVSEENVSRLSASGFGGGTIYYPR...,3.1.1.102,Thermobifida fusca,"[0.005025189, 0.08573241, 0.08789253, -0.04244...","[0.08781557, 0.11973653, 0.0125365695, 0.13011...",1100000000000000000000000000000001000000001000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",15374,1.428135,0.0,7699.0,False,-0.365211,-0.007884,-0.751146,-0.528487,0.020723,0.512719,0.623907,PPBNGS_r
3,ANPYERGPNPTDALLEARSGPFSVSEENVSRLSASGFGGGTIYYPR...,3.1.1.102,Thermobifida fusca,"[0.005025189, 0.08573241, 0.08789253, -0.04244...","[0.08781557, 0.11973653, 0.0125365695, 0.13011...",1100000000000000100100100000000001000000001000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",15375,-0.508638,0.0,7700.0,False,-0.475021,-0.007884,-0.970766,-0.528487,0.020723,0.512719,0.603933,PPBNGS_r
4,ANPYERGPNPTDALLEARSGPFSVSEENVSRLSASGFGGGTIYYPR...,3.1.1.74,Thermobifida fusca,"[0.005025189, 0.08573241, 0.08789253, -0.04244...","[0.08781557, 0.11973653, 0.0125365695, 0.13011...",1100000000000000000000000000000001000001001000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",16149,1.596597,24055682.0,8031.0,False,-0.342578,-0.064454,-0.725704,-0.370010,0.023953,0.248708,0.736544,4ABZt
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8216,WIDAHGDINTPLNSASGNMHGMPLSFLVKELQDQIPWLDDFEGIKP...,3.5.3.1,Schistosoma mansoni,"[0.0083276825, 0.04219356, 0.0381751, -0.02205...","[0.17181623, 0.081409834, -0.013364498, 0.0748...",1000000000000000000000000000000001001001001000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",26286,2.729974,18723022.0,11256.0,False,0.779477,-0.201707,0.760935,-0.971490,0.798018,0.568076,0.770083,CXSAMS_r
8217,WLNDPNGLVYYAGEYHLFYQYHPYGLQWGPMHWGHAVSKDLVTWEH...,3.2.1.65,Bacillus subtilis,"[0.007648978000000001, 0.10721281, 0.07180191,...","[-0.061256662, 0.15615267, -0.22104761, 0.1482...",1100110100000000000000100010000001000000001000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",20098,2.216430,26600431.0,9671.0,False,0.426639,0.351805,0.105799,0.313139,0.747478,0.390472,0.854098,LACZ
8218,YFFSLFLVTIFLYKWLAKKTPSKNLPPSPPRLPIIGNLHQIGPDLH...,1.14.14.148,Pastinaca sativa,"[0.031550586, 0.00068745576, 0.15153226, -0.03...","[-0.089798585, 0.1455369, -0.03858189, 0.11403...",1100110100000101001000110110000001000001111000...,"[0.0, 0.0, 0.0, 0.0, -10.0, 0.0, 0.0, 0.0, 0.0...",5135,-0.108048,19098286.0,2430.5,False,-2.043195,-1.547098,-2.690260,-2.576391,-1.222442,-0.392845,0.642787,ENTERES
8219,YLPAQQIDVQSSLLSDPSKVAGKTYDYIIAGGGLTGLTVAAKLTEN...,1.1.3.4,Penicillium amagasakiense,"[0.012606015, 0.23902997, 0.054749900000000004...","[0.22781767, 0.22576056, 0.015778482, 0.103559...",1100000000000000000000000000000001000000001000...,"[0.0, 0.0, 0.0, 0.0, 0.0, 30.0, 10.0, 10.0, 0....",2561,3.301681,10749686.0,1527.0,True,-0.079757,-0.310286,-0.776331,-0.720533,0.616817,0.099962,0.765775,DHACOAH
