# Enzyme Module Extraction For Sabio-RK Database


In [22]:
# installs the necessary package to run the automation
# https://github.com/helium/helium-python

from helium import *

import pandas as pd

from time import sleep

In [23]:
# Excel file with the UniProt_IDs of the enzymes that are to be extracted from Sabio-RK 
# Make sure the file name matches and is in the same file path as this notebook
ID_table = pd.read_excel("EnzymeModulesList.xlsx", sep='|')
ID_table

Unnamed: 0,Enzymes,UniProt_ID
0,Fructose-bisphosphatase (FBP),P19112
1,Glycogen synthase (ggn -> glygn1) (GLGNS1),P67999


In [24]:
list_of_ids = []
list_of_urls = []

n = len(ID_table.UniProt_ID)

# Loop that goes through each enzyme using it's ID one by one
for i in range(n):
    ids = ID_table.loc[i, 'UniProt_ID']
    url = 'http://sabiork.h-its.org/newSearch?q=UniProtKB_AC:'+ str(ids)
    
# Opens the url mentioned above into Chrome browser (these browsers can be changed... look at documentation)
    browser = start_chrome(url)
    click(CheckBox("Mutant"))

# Makes sure all of the entries are displayed 
    entries = Text(to_right_of="Total number of kinetic law entries found:").value
    if int(entries) > 15:
        select(ComboBox("15"), "100")
        
    
    
#Loop that checks if total number of kinetic entries that are found are all accounted for (if more than 100)
    left_over = int(entries) - 100
    while (left_over > 0):
        click(CheckBox(below = "Add to export cart?"))
        click("Next")
        left_over = left_over - 100

# Adds the entries on the last page to the cart
    click(CheckBox(below = "Add to export cart?"))


# Automation to export the datasheets as .tsv
    click("Entries to export")
    click(Button("write spreadsheet"))
    click("Add all")

# makes sure there are no repeats in the data sets
    click(CheckBox("Export Distinct Rows Only"))

# This line of code can be changed to export a excel file using the ID name
    click(S("#csvExport"))

    sleep(10)
    kill_browser()

# This creates a list of the IDs and the urls that come out from the excel sheet that was inputted
    list_of_ids =  list_of_ids+[str(ids)]
    list_of_urls = list_of_urls + [url]  

In [25]:
print(list_of_ids)
print(list_of_urls)

['P19112', 'P67999']
['http://sabiork.h-its.org/newSearch?q=UniProtKB_AC:P19112', 'http://sabiork.h-its.org/newSearch?q=UniProtKB_AC:P67999']


## Putting the excel files into Pandas Dataframes for Manipulation

In [26]:
## Combining Dataframes for Enzymes
import pandas as pd
n = len(ID_table.UniProt_ID)

df = pd.read_csv("sabioExport.tsv", sep='\t')

for i in range(n):
    if i == 0:
        df = df.append(pd.read_csv("sabioExport.tsv", sep='\t'))
    else:
        df = df.append(pd.read_csv("sabioExport ("+str(i)+").tsv", sep='\t'))
    
df

Unnamed: 0,EntryID,Reaction,Buffer,ECNumber,CellularLocation,UniProtKB_AC,Tissue,Enzyme Variant,Enzymename,Organism,...,parameter.endValue,parameter.standardDeviation,parameter.unit,Pathway,Product,PubMedID,Publication,Rate Equation,SabioReactionID,Substrate
0,196,"H2O + D-Fructose 1,6-bisphosphate = Phosphate ...","[ 50 mM HEPES, 10 mM Sodium phosphate, 10 mM M...",3.1.3.11,"[unknown, unknown, unknown, unknown, unknown]",Q9Z1N1;P19112,kidney,wildtype,fructose-bisphosphatase,Rattus norvegicus,...,,6.0E-7,M,Calvin cycle,Phosphate;D-Fructose 6-phosphate,8396135,"Sola MM, Salto R, Oliver FJ, Gutiérrez M, Varg...",unknown,1117,"H2O;D-Fructose 1,6-bisphosphate"
1,196,"H2O + D-Fructose 1,6-bisphosphate = Phosphate ...","[ 50 mM HEPES, 10 mM Sodium phosphate, 10 mM M...",3.1.3.11,"[unknown, unknown, unknown, unknown, unknown]",Q9Z1N1;P19112,kidney,wildtype,fructose-bisphosphatase,Rattus norvegicus,...,,-,M*s^(-1)*g^(-1),Calvin cycle,Phosphate;D-Fructose 6-phosphate,8396135,"Sola MM, Salto R, Oliver FJ, Gutiérrez M, Varg...",unknown,1117,"H2O;D-Fructose 1,6-bisphosphate"
2,201,"H2O + D-Fructose 1,6-bisphosphate = Phosphate ...","[ 50 mM HEPES, 10 mM Sodium phosphate, 10 mM M...",3.1.3.11,"[unknown, unknown, unknown, unknown, unknown, ...",Q9Z1N1;P19112,kidney,wildtype,fructose-bisphosphatase,Rattus norvegicus,...,,9.0E-7,M,Calvin cycle,Phosphate;D-Fructose 6-phosphate,8396135,"Sola MM, Salto R, Oliver FJ, Gutiérrez M, Varg...",unknown,1117,"D-Fructose 1,6-bisphosphate;H2O"
3,201,"H2O + D-Fructose 1,6-bisphosphate = Phosphate ...","[ 50 mM HEPES, 10 mM Sodium phosphate, 10 mM M...",3.1.3.11,"[unknown, unknown, unknown, unknown, unknown, ...",Q9Z1N1;P19112,kidney,wildtype,fructose-bisphosphatase,Rattus norvegicus,...,0.00005,,M,Calvin cycle,Phosphate;D-Fructose 6-phosphate,8396135,"Sola MM, Salto R, Oliver FJ, Gutiérrez M, Varg...",unknown,1117,"D-Fructose 1,6-bisphosphate;H2O"
4,201,"H2O + D-Fructose 1,6-bisphosphate = Phosphate ...","[ 50 mM HEPES, 10 mM Sodium phosphate, 10 mM M...",3.1.3.11,"[unknown, unknown, unknown, unknown, unknown, ...",Q9Z1N1;P19112,kidney,wildtype,fructose-bisphosphatase,Rattus norvegicus,...,,1.33333E-7,M*s^(-1)*g^(-1),Calvin cycle,Phosphate;D-Fructose 6-phosphate,8396135,"Sola MM, Salto R, Oliver FJ, Gutiérrez M, Varg...",unknown,1117,"D-Fructose 1,6-bisphosphate;H2O"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,53378,ATP + Arg-Pro-Arg-Thr-Ser-Ser-Phe = ADP + Arg-...,"[50 mM Tris-HCl, 0.1 mM EGTA, 0.1 % (v/v) 2-me...",2.7.11.1,"[unknown, unknown, unknown, unknown, unknown]",P67999,liver,wildtype,non-specific serine/threonine protein kinase,Rattus norvegicus,...,,-,M,,ADP;Arg-Pro-Arg-Thr-Ser-Ser-phosphate-Phe,8985174,"Alessi DR, Caudwell FB, Andjelkovic M, Hemming...",Vmax*S/(Km+S),13734,ATP;Arg-Pro-Arg-Thr-Ser-Ser-Phe
8,53379,ATP + Arg-Pro-Arg-Thr-Ser-Thr-Phe = ADP + Arg-...,"[50 mM Tris-HCl, 0.1 mM EGTA, 0.1 % (v/v) 2-me...",2.7.11.1,"[unknown, unknown, unknown, unknown, unknown]",P67999,liver,wildtype,non-specific serine/threonine protein kinase,Rattus norvegicus,...,,-,M,,ADP;Arg-Pro-Arg-Thr-Ser-Thr-phosphate-Phe,8985174,"Alessi DR, Caudwell FB, Andjelkovic M, Hemming...",Vmax*S/(Km+S),13735,ATP;Arg-Pro-Arg-Thr-Ser-Thr-Phe
9,53379,ATP + Arg-Pro-Arg-Thr-Ser-Thr-Phe = ADP + Arg-...,"[50 mM Tris-HCl, 0.1 mM EGTA, 0.1 % (v/v) 2-me...",2.7.11.1,"[unknown, unknown, unknown, unknown, unknown]",P67999,liver,wildtype,non-specific serine/threonine protein kinase,Rattus norvegicus,...,,-,M,,ADP;Arg-Pro-Arg-Thr-Ser-Thr-phosphate-Phe,8985174,"Alessi DR, Caudwell FB, Andjelkovic M, Hemming...",Vmax*S/(Km+S),13735,ATP;Arg-Pro-Arg-Thr-Ser-Thr-Phe
10,53379,ATP + Arg-Pro-Arg-Thr-Ser-Thr-Phe = ADP + Arg-...,"[50 mM Tris-HCl, 0.1 mM EGTA, 0.1 % (v/v) 2-me...",2.7.11.1,"[unknown, unknown, unknown, unknown, unknown]",P67999,liver,wildtype,non-specific serine/threonine protein kinase,Rattus norvegicus,...,,-,-,,ADP;Arg-Pro-Arg-Thr-Ser-Thr-phosphate-Phe,8985174,"Alessi DR, Caudwell FB, Andjelkovic M, Hemming...",Vmax*S/(Km+S),13735,ATP;Arg-Pro-Arg-Thr-Ser-Thr-Phe


In [27]:
## Output Dataframe for Enzymes Separately
import pandas as pd

dataframe_list=[]

for i in range(n):
    if i == 0:
        pd.read_csv("sabioExport.tsv", sep='\t')
    else:
        dataframe_list = dataframe_list+ [pd.read_csv("sabioExport ("+str(i)+").tsv", sep='\t')]

dataframe_list

[    EntryID                                           Reaction  \
 0     53377  ATP + Gly-Arg-Pro-Arg-Thr-Ser-Ser-Phe-Ala-Glu-...   
 1     53377  ATP + Gly-Arg-Pro-Arg-Thr-Ser-Ser-Phe-Ala-Glu-...   
 2     53377  ATP + Gly-Arg-Pro-Arg-Thr-Ser-Ser-Phe-Ala-Glu-...   
 3     53377  ATP + Gly-Arg-Pro-Arg-Thr-Ser-Ser-Phe-Ala-Glu-...   
 4     53378  ATP + Arg-Pro-Arg-Thr-Ser-Ser-Phe = ADP + Arg-...   
 5     53378  ATP + Arg-Pro-Arg-Thr-Ser-Ser-Phe = ADP + Arg-...   
 6     53378  ATP + Arg-Pro-Arg-Thr-Ser-Ser-Phe = ADP + Arg-...   
 7     53378  ATP + Arg-Pro-Arg-Thr-Ser-Ser-Phe = ADP + Arg-...   
 8     53379  ATP + Arg-Pro-Arg-Thr-Ser-Thr-Phe = ADP + Arg-...   
 9     53379  ATP + Arg-Pro-Arg-Thr-Ser-Thr-Phe = ADP + Arg-...   
 10    53379  ATP + Arg-Pro-Arg-Thr-Ser-Thr-Phe = ADP + Arg-...   
 11    53379  ATP + Arg-Pro-Arg-Thr-Ser-Thr-Phe = ADP + Arg-...   
 
                                                Buffer  ECNumber  \
 0   [50 mM Tris-HCl, 0.1 mM EGTA, 0.1 % (v/v) 2-me...  2.7

### Extra Note:

Sometimes the "Entries to export" doesn't take in all of the entries (especially if number of entries are over 200)

(Here is the code that we tried for that section, it would go in the for loop that goes through all of the UniprotIDs)

<code>if (entries == Text(to_left_of = Image(alt="show selected Entries")).value):
    print("you are all good")
else:
    print("you might need to redo this")
</code>