In [1]:
import pandas as pd
from collections import OrderedDict

#### Load kcat sources

In [2]:
df_kcat0 = pd.read_excel('../input/precursor_files/PARAMS_kapp_manual_curations.xlsx')
df_kcat1 = pd.read_excel('../input/precursor_files/PARAMS_kapp_Chen2021_kcat_invivo.xlsx')
df_kcat2 = pd.read_excel('../input/precursor_files/PARAMS_kapp_Chen2021_kcat_invitro.xlsx', sheet_name='Literature')
df_kcat3 = pd.read_excel('../input/precursor_files/PARAMS_kapp_Chen2021_kcat_invitro.xlsx', sheet_name='SA')
df_kcat4 = pd.read_excel('../input/precursor_files/PARAMS_kapp_Chen2021_kcat_invitro.xlsx', sheet_name='SABIORK20210203')
df_kcat5 = pd.read_excel('../input/precursor_files/PARAMS_kapp_Chen2021_kcat_invitro.xlsx', sheet_name='BRENDA20210203')
df_kcat6 = pd.read_excel('../input/precursor_files/PARAMS_kapp_GECKO_kcat_invitro.xlsx')

In [3]:
df_kcat_dict = OrderedDict({'manual_curation': df_kcat0,
                            'Chen2021_invivo': df_kcat1, 'Chen2021_invitro_literature': df_kcat2,
                            'Chen2021_invitro_SA': df_kcat3, 'Chen2021_invitro_SABIORK': df_kcat4,
                            'Chen2021_invitro_BRENDA': df_kcat5, 'Oftadeh2021_invitro_GECKO': df_kcat6})

#### Load enzyme dataframe

In [4]:
df_enz = pd.read_excel('../input/ENZYME_stoich_curation.xlsx')
df_enz.index = df_enz.id.to_list()

In [5]:
cols = ['id', 'kapp (1/s)', 'source']
df_kapp = pd.DataFrame(index=df_enz.id.to_list(), columns=cols)
df_kapp['id'] = df_enz.id

for tag,df_kcat in df_kcat_dict.items():
    if tag == 'Chen2021_invivo':
        continue
    
    for i in df_kcat.index:
        rxn_match = df_kcat.rxn_match[i]
        if rxn_match == 'skip':
            continue
            
        rxn_match = rxn_match.split(',')
        for rxn in rxn_match:
            if pd.isnull(df_kapp.loc[rxn, 'kapp (1/s)']) == False:
                continue
            
            df_kapp.loc[rxn, 'kapp (1/s)'] = df_kcat.loc[i, 'kcat (1/s)']
            df_kapp.loc[rxn, 'source'] = tag

In [6]:
print('kapp assigned:', df_kapp[df_kapp['kapp (1/s)'].isnull() == False].shape[0])
print('kapp unassigned', df_kapp[df_kapp['kapp (1/s)'].isnull()].shape[0])

kapp assigned: 1445
kapp unassigned 1820


In [7]:
df_kapp.to_excel('../input/PARAMS_kapp_compiled_frompy.xlsx', index=False)

#### Processing the rest of entries
- Add spontaneous tag to kapp (no assignment)
- Add unknown tag to kapp (no assignment)
- Transport reactions => Assign an arbitrarily large kcat, 1e9 1/s
- For enzyme-associated reactions without kcat assignment, assign median kcat = 70.9 1/s

In [8]:
df_kapp.index = df_kapp.id.to_list()

In [9]:
# Spontaneous and unknown tag
idx = [i for i in df_kapp.index if i[-6:] == '-SPONT']
for i in idx:
    df_kapp.loc[i, 'source'] = 'SPONT'
    
idx = [i for i in df_kapp.index if i[-8:] == '-UNKNOWN']
for i in idx:
    df_kapp.loc[i, 'source'] = 'UNKNOWN'

In [10]:
# Transport reactions
idx = df_kapp.index
idx = [i for i in idx if 't' in i]
for i in idx:
    df_kapp.loc[i, 'source'] = 'TRANSPORT'
    df_kapp.loc[i, 'kapp (1/s)'] = 1e9

In [11]:
idx = df_kapp[df_kapp['source'].isnull()].index
for i in idx:
    df_kapp.loc[i, 'source'] = 'median_kcat'
    df_kapp.loc[i, 'kapp (1/s)'] = 70.9

In [12]:
df_kapp.to_excel('../input/PARAMS_kapp_compiled_frompy.xlsx', index=False)