#### Libraries

In [1]:
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
from requests_html import HTMLSession

from tqdm.notebook import tqdm

In [2]:
#Read excel files and create the dfs
camel_df = pd.read_excel('../Data/1-s2.0-S1096717618302258-mmc1.xlsx', header = 1)
hefzi_df = pd.read_excel('../Data/hefzi_final.xlsx')
fouladiha_df = pd.read_excel('../Data/10529_2020_3021_MOESM1_ESM.xlsx', 'Supplementary Table 10', header = 1)
iCHO2291 = pd.read_excel('../Data/iCHO2291_final.xlsx')

In [3]:
#Standarization of the columns names
camel_df.rename(columns = {'Reaction ID':'Reaction', 'Initial reaction in model':'Reaction Formula', 'Reaction name':'Reaction Name', 'Justification':'Curation Notes'}, inplace = True)
fouladiha_df.rename(columns = {'Abbreviation':'Reaction', 'Description':'Reaction Name', 'Reaction':'Reaction Formula'}, inplace = True)

# Addition of tag columns
camel_df.insert(loc=0, column='cam', value='X')
camel_df.insert(loc=1, column='hef', value=np.nan)
camel_df.insert(loc=2, column='fou', value=np.nan)
camel_df.insert(loc=3, column='yeo', value=np.nan)

hefzi_df.insert(loc=0, column='cam', value=np.nan)
hefzi_df.insert(loc=1, column='hef', value='X')
hefzi_df.insert(loc=2, column='fou', value=np.nan)
hefzi_df.insert(loc=3, column='yeo', value=np.nan)

fouladiha_df.insert(loc=0, column='cam', value=np.nan)
fouladiha_df.insert(loc=1, column='hef', value=np.nan)
fouladiha_df.insert(loc=2, column='fou', value='X')
fouladiha_df.insert(loc=3, column='yeo', value=np.nan)

iCHO2291.insert(loc=0, column='cam', value=np.nan)
iCHO2291.insert(loc=1, column='hef', value=np.nan)
iCHO2291.insert(loc=2, column='fou', value=np.nan)
iCHO2291.insert(loc=3, column='yeo', value='X')


fouladiha_df

Unnamed: 0,cam,hef,fou,yeo,Reaction,Reaction Name,Reaction Formula,GPR,Genes,Protein,Subsystem,Reversible,Lower bound,Upper bound,Objective
0,,,X,,COKECBESr,Carboxylesterase (cocaine) (er),coke[r] + h2o[r] => bz[r] + egme[r] + h[r],(100756666) or (100767789),,,ALKALOID SYNTHESIS,0,0.0,1000,0
1,,,X,,EGMESTr,ecgonine methyl esterase (ER),h2o[r] + egme[r] => h[r] + ecgon[r] + meoh[r],(100771815) or (100751803) or (100752322) or (...,,,ALKALOID SYNTHESIS,0,0.0,1000,0
2,,,X,,NMPTRCOX,N-Methylputrescine:oxygen oxidoreductase (deam...,nmptrc[c] + o2[c] => nh4[c] + 1mpyr[c] + h2o2...,(100771382) or (100762635) or (100762926) or (...,,,ALKALOID SYNTHESIS,0,0.0,1000,0
3,,,X,,PRO1x,L-ProlineNAD+ 5-oxidoreductase,pro_L[c] + nad[c] => 2 h[c] + 1pyr5c[c] + nad...,(100773901),,,ARGININE AND PROLINE METABOLISM,0,0.0,1000,0
4,,,X,,DHDDH,Dihydrodiol dehydrogenase,nadp[c] + dhnpthld[c] => npthld[c] + nadph[c],(100753544),,,CYP METABOLISM,0,0.0,1000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7431,,,X,,SameRxns163,SameRxns163,nadph[c] => nadph[n],,,,"TRANSPORT, NUCLEAR",0,0.0,1000,0
7432,,,X,,SameRxns164,SameRxns164,nadp[n] => nadp[c],,,,"TRANSPORT, NUCLEAR",0,0.0,1000,0
7433,,,X,,SameRxns165,SameRxns165,atp[g] <=> atp[e],,,,"TRANSPORT, GOLGI APPARATUS",1,-1000.0,1000,0
7434,,,X,,SameRxns166,SameRxns166,retinol[e] => retinol[r],,,,"TRANSPORT, ENDOPLASMIC RETICULAR",0,0.0,1000,0


In [4]:
cols = hefzi_df.columns.to_list()+fouladiha_df.columns.to_list()+iCHO2291.columns.to_list()+camel_df.columns.to_list()

# Eliminate repetitive values in the 'cols' list
cols = [cols[i] for i in range(len(cols)) if i == cols.index(cols[i])]

cols

['cam',
 'hef',
 'fou',
 'yeo',
 'Reaction',
 'Reaction Name',
 'Reaction Formula',
 'GPR',
 'Subsystem',
 'Lower bound',
 'Upper bound',
 'Curation Notes',
 'References',
 'Genes',
 'Protein',
 'Reversible',
 'Objective',
 'Proteins',
 'EC Number',
 'Mol wt',
 'kcat_forward',
 'kcat_backward',
 'Reaction ID Camels Models']

In [5]:
def add_col(df):
    '''
    This function adds the remaining columns from the cols list 
    to an existing df that are not contained in such df
    '''
    df.columns
    add_col = []
    for col in cols:
        if col not in df.columns:
            add_col.append(col)
    df = df.reindex(columns = df.columns.tolist() + add_col)
    return df

In [6]:
# Unify columns for all datasets
hefzi_df = add_col(hefzi_df)
fouladiha_df = add_col(fouladiha_df)
iCHO2291 = add_col(iCHO2291)
camel_df = add_col(camel_df)

iCHO2291

Unnamed: 0,cam,hef,fou,yeo,Reaction,Reaction Name,Reaction Formula,GPR,Subsystem,Lower bound,...,Mol wt,kcat_forward,kcat_backward,Curation Notes,References,Genes,Protein,Reversible,Objective,Reaction ID Camels Models
0,,,,X,10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu[c] --> 10fthf5glu[l],,Transport,0.0,...,,,,,,,,,,
1,,,,X,10FTHF5GLUtm,"5-glutamyl-10FTHF transport, mitochondrial",10fthf5glu[m] --> 10fthf5glu[c],,Transport,0.0,...,,,,,,,,,,
2,,,,X,10FTHF6GLUtl,"6-glutamyl-10FTHF transport, lysosomal",10fthf6glu[c] --> 10fthf6glu[l],,Transport,0.0,...,,,,,,,,,,
3,,,,X,10FTHF6GLUtm,"6-glutamyl-10FTHF transport, mitochondrial",10fthf6glu[m] --> 10fthf6glu[c],,Transport,0.0,...,,,,,,,,,,
4,,,,X,10FTHF7GLUtl,"7-glutamyl-10FTHF transport, lysosomal",10fthf7glu[c] --> 10fthf7glu[l],,Transport,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6231,,,,X,igg_hc,igg_hc,24.0 ala_L[c] + 8.0 arg_L[c] + 19.0 asn_L[c] +...,,Miscellaneous,0.0,...,,,,,,,,,,
6232,,,,X,igg_lc,igg_lc,15.0 ala_L[c] + 6.0 arg_L[c] + 7.0 asn_L[c] + ...,,Miscellaneous,0.0,...,,,,,,,,,,
6233,,,,X,peplys_synthesis,peplys synthesis,lys_L[n] --> peplys[n],,Miscellaneous,0.0,...,,,,,,,,,,
6234,,,,X,q10h2tc,transport of ubiquinol into cytosol,q10h2[m] <=> q10h2[c],,Transport,-1000.0,...,,,,,,,,,,


In [7]:
# Reorder columns in all datasets the same way
fouladiha_df = fouladiha_df[['cam','hef', 'fou', 'yeo', 'Reaction', 'Reaction Name', 'Reaction Formula', 'GPR', 'Subsystem', 'Genes', 'Protein', 'EC Number', 'Mol wt', 'kcat_forward', 'kcat_backward', 'Reversible','Lower bound', 'Upper bound', 'Objective', 'Curation Notes', 'References', 'Reaction ID Camels Models']]
iCHO2291 = iCHO2291[['cam','hef', 'fou', 'yeo', 'Reaction', 'Reaction Name', 'Reaction Formula', 'GPR', 'Subsystem', 'Genes', 'Protein', 'EC Number', 'Mol wt', 'kcat_forward', 'kcat_backward', 'Reversible','Lower bound', 'Upper bound', 'Objective', 'Curation Notes', 'References', 'Reaction ID Camels Models']]
hefzi_df = hefzi_df[['cam','hef', 'fou', 'yeo', 'Reaction', 'Reaction Name', 'Reaction Formula', 'GPR', 'Subsystem', 'Genes', 'Protein', 'EC Number', 'Mol wt', 'kcat_forward', 'kcat_backward', 'Reversible','Lower bound', 'Upper bound', 'Objective', 'Curation Notes', 'References', 'Reaction ID Camels Models']]
camel_df = camel_df[['cam','hef', 'fou', 'yeo', 'Reaction', 'Reaction Name', 'Reaction Formula', 'GPR', 'Subsystem', 'Genes', 'Protein', 'EC Number', 'Mol wt', 'kcat_forward', 'kcat_backward', 'Reversible','Lower bound', 'Upper bound', 'Objective', 'Curation Notes', 'References', 'Reaction ID Camels Models']]

camel_df['Reaction'] = camel_df['Reaction'].str.strip()

In [8]:
# Merge all the df into a unified df
all_dfs = pd.concat([camel_df, hefzi_df, fouladiha_df, iCHO2291])
all_dfs = all_dfs.reset_index(drop = True)
all_dfs['Reaction'] = all_dfs['Reaction'].str.replace('_cho', '')

all_dfs #20940 rows/reactions (many of them repeated)

Unnamed: 0,cam,hef,fou,yeo,Reaction,Reaction Name,Reaction Formula,GPR,Subsystem,Genes,...,Mol wt,kcat_forward,kcat_backward,Reversible,Lower bound,Upper bound,Objective,Curation Notes,References,Reaction ID Camels Models
0,X,,,,GLCt1r,glucose transport (uniport),glc_D_e --> glc_D_c,,,,...,,,,,0.0,,,Unidrectional transporters,,
1,X,,,,GLCt2r,D-glucose transport in via proton symport,glc_D_e + h_e --> glc_D_c + h_c,,,,...,,,,,0.0,,,Unidrectional transporters,,
2,X,,,,GLCt2_2,D-glucose transport in via proton symport,glc_D_e + 2.0 h_e --> glc_D_c + 2.0 h_c,,,,...,,,,,0.0,,,Unidrectional transporters,,
3,X,,,,RE1342C,RE1342,nad_c + sbt_D_c --> glc_D_c + h_c + nadh_c,,,,...,,,,,0.0,,,Unidrectional transporters,,
4,X,,,,FRUt4,D-fructose transport via sodium cotransport,fru_e + na1_e --> fru_c + na1_c,,,,...,,,,,0.0,,,Unidrectional transporters,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20935,,,,X,igg_hc,igg_hc,24.0 ala_L[c] + 8.0 arg_L[c] + 19.0 asn_L[c] +...,,Miscellaneous,,...,,,,,0.0,1000,,,,
20936,,,,X,igg_lc,igg_lc,15.0 ala_L[c] + 6.0 arg_L[c] + 7.0 asn_L[c] + ...,,Miscellaneous,,...,,,,,0.0,1000,,,,
20937,,,,X,peplys_synthesis,peplys synthesis,lys_L[n] --> peplys[n],,Miscellaneous,,...,,,,,0.0,1000,,,,
20938,,,,X,q10h2tc,transport of ubiquinol into cytosol,q10h2[m] <=> q10h2[c],,Transport,,...,,,,,-1000.0,1000,,,,


In [65]:
#def f(x):
#    d = {}
#    
#    l = []
#    for x in x['GPR']:
#        l.append(x)
#    
#    if len(set(l)) == 1:
#        print('no')
#    elif len(set(l)) > 1:
#        print(l)
#        print(l)
#        if x == x[x+1]:
#            print(x)
#    d['GPR'] = x['GPR'].apply(lambda x: x if len(x.unique()>1))
#    d['a_max'] = x['a'].max()
#    d['b_mean'] = x['b'].mean()
#    d['c_d_prodsum'] = (x['c'] * x['d']).sum()  
#    return pd.Series(d, index=['GPR'])#, 'a_max', 'b_mean', 'c_d_prodsum'])

# all_dfs1 = all_dfs.groupby(['Reaction'], group_keys=True).apply(lambda x: f(x))

no
no
no
no
no
no
no
no
no
no
no
no
no
no
['100762926 or 100771382 or 100762635 or 100763954', '(100762926) or (100771382) or (100762635) or (100763954)', '100762926 or 100771382 or 100762635 or 100763954']
no
no
no
no
no
no
no
no
no
[nan, nan, '100751280 or 100751853 or 100773278']
[nan, nan, '100751280 or 100751853 or 100773278']
[nan, nan, '100751280 or 100751853 or 100773278']
[nan, nan, '100751280 or 100751853 or 100773278']
no
no
no
no
no
no
['100751280 and 100751853 and 100773278', '(100751280 and 100751853 and 100773278)', '100751280 or 100751853 or 100773278']
['100751280 and 100751853 and 100773278', '(100751280 and 100751853 and 100773278)', '100751280 or 100751853 or 100773278']
no
no
no
no
[nan, nan, '100751280 or 100751853 or 100773278']
[nan, nan, '100763450 or 100772030']
no
['100760414 and 100751853 and 100773278', '(100760414 and 100751853 and 100773278)', '100751280 or 100751853 or 100773278']
['100760414 and 100751853 and 100773278', '(100760414 and 100751853 and 10

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



['100758717', '(100758717)', '100758717']
['100758717', '(100758717)', '100758717']
['100758717', '(100758717)', '100758717']
['100758717', '(100758717)', '100758717']
['100758717', '(100758717)', '100758717']
['100758717', '(100758717)', '100758717']
['100758717', '(100758717)', '100758717']
['100754791', '(100754791)', '100754791']
['100758717', '(100758717)', '100758717']
no
no
no
no
no
['100757472', '(100757472)', '100757472']
['100753284', '(100753284)', '100753284']
no
['100762944', '(100762944)', '100762944']
['100768251', '(100768251)', '100768251']
no
[nan, nan, '100754527']
no
no
['100761966 or 100754792', '(100761966) or (100754792)', '100761966 or 100754792']
['100761511', '(100761511)', '100761511']
['100773795 or 100774089 or 100753604 or 100767281 or 100762692 or 100772598 or 100751558 or 100760137 or 100758981', '(100773795) or (100774089) or (100753604) or (100767281) or (100762692) or (100772598) or (100751558) or (100760137) or (100758981)', '100773795 or 100774089 o

In [67]:
all_dfs1 = all_dfs.groupby(['Reaction'], group_keys=True).apply(lambda x: x)
all_dfs1

Unnamed: 0_level_0,Unnamed: 1_level_0,cam,hef,fou,yeo,Reaction,Reaction Name,Reaction Formula,GPR,Subsystem,Genes,...,Mol wt,kcat_forward,kcat_backward,Reversible,Lower bound,Upper bound,Objective,Curation Notes,References,Reaction ID Camels Models
Reaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10FTHF5GLUtl,605,,X,,,10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu_c --> 10fthf5glu_l,,"TRANSPORT, LYSOSOMAL",,...,,,,,0.0,1000.0,,,,
10FTHF5GLUtl,10778,,,X,,10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu[c] => 10fthf5glu[l],,"TRANSPORT, LYSOSOMAL",,...,,,,0.0,0.0,1000,0.0,,,
10FTHF5GLUtl,14704,,,,X,10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu[c] --> 10fthf5glu[l],,Transport,,...,,,,,0.0,1000,,,,
10FTHF5GLUtm,606,,X,,,10FTHF5GLUtm,"5-glutamyl-10FTHF transport, mitochondrial",10fthf5glu_m --> 10fthf5glu_c,,"TRANSPORT, MITOCHONDRIAL",,...,,,,,0.0,1000.0,,,,
10FTHF5GLUtm,10779,,,X,,10FTHF5GLUtm,"5-glutamyl-10FTHF transport, mitochondrial",10fthf5glu[m] => 10fthf5glu[c],,"TRANSPORT, MITOCHONDRIAL",,...,,,,0.0,0.0,1000,0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
r2534,7263,,X,,,r2534,Major Facilitator(MFS) TCDB:2.A.1.44.1,thr_L_e <=> thr_L_c,,"TRANSPORT, EXTRACELLULAR",,...,,,,,-1000.0,1000.0,,,,
r2535,7264,,X,,,r2535,Major Facilitator(MFS) TCDB:2.A.1.44.1,hom_L_e <=> hom_L_c,,"TRANSPORT, EXTRACELLULAR",,...,,,,,-1000.0,1000.0,,,,
r2537,7265,,X,,,r2537,Utilized transport,lnlncgcoa_c <=> lnlncgcoa_r,,"TRANSPORT, ENDOPLASMIC RETICULAR",,...,,,,,-1000.0,1000.0,,,,
r2538,7266,,X,,,r2538,Utilized transport,dlnlcgcoa_c <=> dlnlcgcoa_r,,"TRANSPORT, ENDOPLASMIC RETICULAR",,...,,,,,-1000.0,1000.0,,,,


In [10]:
# Group the entire dataset by reaction name 
all_dfs1 = all_dfs.groupby(['Reaction'], group_keys=True).apply(lambda x: x)

# Save merged datasets as Excel file
all_dfs1.to_excel('../Data/all_dfs1.xlsx')

all_dfs1

Unnamed: 0_level_0,Unnamed: 1_level_0,cam,hef,fou,yeo,Reaction,Reaction Name,Reaction Formula,GPR,Subsystem,Genes,...,Mol wt,kcat_forward,kcat_backward,Reversible,Lower bound,Upper bound,Objective,Curation Notes,References,Reaction ID Camels Models
Reaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10FTHF5GLUtl,605,,X,,,10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu_c --> 10fthf5glu_l,,"TRANSPORT, LYSOSOMAL",,...,,,,,0.0,1000.0,,,,
10FTHF5GLUtl,10778,,,X,,10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu[c] => 10fthf5glu[l],,"TRANSPORT, LYSOSOMAL",,...,,,,0.0,0.0,1000,0.0,,,
10FTHF5GLUtl,14704,,,,X,10FTHF5GLUtl,"5-glutamyl-10FTHF transport, lysosomal",10fthf5glu[c] --> 10fthf5glu[l],,Transport,,...,,,,,0.0,1000,,,,
10FTHF5GLUtm,606,,X,,,10FTHF5GLUtm,"5-glutamyl-10FTHF transport, mitochondrial",10fthf5glu_m --> 10fthf5glu_c,,"TRANSPORT, MITOCHONDRIAL",,...,,,,,0.0,1000.0,,,,
10FTHF5GLUtm,10779,,,X,,10FTHF5GLUtm,"5-glutamyl-10FTHF transport, mitochondrial",10fthf5glu[m] => 10fthf5glu[c],,"TRANSPORT, MITOCHONDRIAL",,...,,,,0.0,0.0,1000,0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
r2534,7263,,X,,,r2534,Major Facilitator(MFS) TCDB:2.A.1.44.1,thr_L_e <=> thr_L_c,,"TRANSPORT, EXTRACELLULAR",,...,,,,,-1000.0,1000.0,,,,
r2535,7264,,X,,,r2535,Major Facilitator(MFS) TCDB:2.A.1.44.1,hom_L_e <=> hom_L_c,,"TRANSPORT, EXTRACELLULAR",,...,,,,,-1000.0,1000.0,,,,
r2537,7265,,X,,,r2537,Utilized transport,lnlncgcoa_c <=> lnlncgcoa_r,,"TRANSPORT, ENDOPLASMIC RETICULAR",,...,,,,,-1000.0,1000.0,,,,
r2538,7266,,X,,,r2538,Utilized transport,dlnlcgcoa_c <=> dlnlcgcoa_r,,"TRANSPORT, ENDOPLASMIC RETICULAR",,...,,,,,-1000.0,1000.0,,,,


In [None]:
#def f(x):
#    d = {}
#    d['EC Number_m'] = x['EC Number'].first()
#    d['a_max'] = x['a'].max()
#    d['b_mean'] = x['b'].mean()
#    d['c_d_prodsum'] = (x['c'] * x['d']).sum()
#    return pd.Series(d, index=['EC Number_m', 'a_max', 'b_mean', 'c_d_prodsum'])

all_dfs2 = all_dfs.groupby('Reaction').first()
all_dfs2.to_excel('../Data/all_dfs2.xlsx')
all_dfs2

In [None]:
# this functions are for fetching information from the BiGG database

def get_rxninfo(rxn):

    #download the page
    session = HTMLSession()
    
    response=session.get('http://bigg.ucsd.edu/models/iCHOv1/reactions/'+rxn)
    if response.status_code != 200:
        print('Status code:', response.status_code,f'Failed to fetch info on {rxn} from iCHOv1 model')
        response=session.get('http://bigg.ucsd.edu/universal/reactions/'+rxn)
        if response.status_code != 200:
            print('Status code:', response.status_code,f'Failed to fetch info on {rxn} from BiGG')

    #parse using beautiful soup
    rxn_doc = BeautifulSoup(response.text,'html.parser')

    return rxn_doc


def bigg_attributes(rxn):
    
    '''
    recieves a rxn file from the get_rxninfo function
    and returns Reaction Description, Subsystem
    '''
    if rxn.title.get_text().endswith('iCHOv1'):
        # Reaction description
        rxn_d = rxn.find_all("p")[0].get_text()

        # Reaction formula
        form = rxn.find_all("p")[2].get_text()

        # Subsystem
        subsystem = rxn.find_all("p")[5].get_text()
    else:
        # Reaction description
        rxn_d = rxn.find_all("p")[0].get_text()

        # Reaction formula
        form = rxn.find_all("p")[1].get_text()

        # Subsystem
        subsystem = ''
    
    return rxn_d, form, subsystem


In [None]:
#all_dfs3 = pd.DataFrame(columns = ['Reactions', 'Reaction Description', 'Reaction Formula', 'Subsystem'])
att = []
for i,r in enumerate(tqdm((all_dfs2.index))):
    try:
        rxn = get_rxninfo(r)
        rxn_d, form, subsystem = bigg_attributes(rxn)
        print(rxn_d, form, subsystem)
        if all_dfs2['Reaction Formula'][i] == None:
            all_dfs2['Reaction Formula'][i] = form
        if all_dfs2['Reaction Name'][i] == None:
            all_dfs2['Reaction Name'][i] = rxn_d
    except:
        print(f'Could not find info for {r}')
        break
        
all_dfs2.to_excel('../Data/all_dfs2.xlsx')

In [None]:
rxn.find_all("p")

In [None]:
rxn.find_all("p")[2].get_text()

In [None]:
rxn.find_all("div", class_="col-lg-8")

In [None]:
rxn = get_rxninfo('5G2OXPTtx')
print(rxn.title.get_text().endswith('iCHOv1'))


In [None]:
rxn.find_all("p")[3].get_text()

In [None]:
for i,r in enumerate(tqdm((all_dfs2.index))):
    if r.endswith('_cho'):
        print(r,i)
        if all_dfs2.index[i-1] == r.split('_')[0]:
            print(all_dfs2.index[i-1],i-1)
        #print(all_dfs2.index[i-1],i-1)