Due to BRENDA's license, you have to download the database as a text file after accepting usage conditions [here](https://www.brenda-enzymes.org/download.php).

# Load data

In [1]:
from brendapyrser import BRENDA
dataFile = 'data/brenda_2023_1.txt'
brenda = BRENDA(dataFile)

# Create Reaction Database

In [4]:
reaction_ec_numbers = []
reaction_ec_annotation = []
reaction_names = []
reaction_systematic_name = []
reaction_str = []
reaction_type = []
reaction_substrates = []
reaction_products = []
data = []

In [6]:
from utils import extract_substrates,extract_products, extract_just_ec_number, extract_ec_annotation, merge_substrates
import json
for r in brenda.reactions:
    reaction_ec_numbers.append(extract_just_ec_number(r.ec_number))
    reaction_ec_annotation.append(extract_ec_annotation(r.ec_number))
    reaction_names.append(r.name)
    reaction_systematic_name.append(r.systematic_name)
    reaction_str.append(r.reaction_str)
    reaction_type.append(r.reaction_type)
    reaction_substrates.append(merge_substrates(extract_substrates(r.substratesAndProducts),r))
    reaction_products.append(extract_products(r.substratesAndProducts))
    item = {
        'ec_number': extract_just_ec_number(r.ec_number),
        'ec_annotation': extract_ec_annotation(r.ec_number),
        'name': r.name,
        'systematic_name': r.systematic_name,
        'str': r.reaction_str,
        'type': r.reaction_type,
        'substrates': json.dumps(extract_substrates(r.substratesAndProducts)),
        'products': json.dumps(extract_products(r.substratesAndProducts))
    }
    for key in item.keys():
        if not item[key]:
            item[key] = None
    if item['ec_number']:
        data.append(item)

In [8]:
# with mysql running
import pymysql
db_config = {
    'host': 'localhost',
    'user': 'root',
    'password': '123456',
    'charset':'utf8mb4'
}
try:
    connection = pymysql.connect(**db_config)
    cursor = connection.cursor()
except pymysql.Error as e:
    print("Database connection error:", e)
try:
    cursor.execute(f"CREATE DATABASE IF NOT EXISTS Reaction")
    cursor.execute(f"USE Reaction")
    cursor.execute("""
            CREATE TABLE IF NOT EXISTS reactions (
                ec_number VARCHAR(20) PRIMARY KEY, 
                ec_annotation TEXT,
                name VARCHAR(100),
                systematic_name VARCHAR(100),
                str TEXT,
                type VARCHAR(100),
                substrates JSON,
                products JSON
            ) CHARACTER SET utf8mb4;""")
    for item in data:
        try:
            insert_query = f"INSERT INTO reactions ({', '.join(item.keys())}) VALUES ({', '.join(['%s'] * len(item))})"
            cursor.execute(insert_query, tuple(item.values()))
        except pymysql.Error as e:
            print(e)
    connection.commit()
    query = f"SELECT * FROM reactions WHERE ec_number = %s"
    cursor.execute(query, ('1.1.1.1'))
    rows = cursor.fetchall()
    cursor.close()
    connection.close()
except pymysql.Error as e:
    print(e)

In [16]:
try:
    connection = pymysql.connect(**db_config)
    cursor = connection.cursor()
except pymysql.Error as e:
    print("Database connection error:", e)
try:
    cursor.execute(f"USE Reaction")
    query = f"SELECT * FROM reactions WHERE ec_number = %s"
    cursor.execute(query, ('1.1.2.10'))
    rows = cursor.fetchall()
except pymysql.Error as e:
    print(e)

In [17]:
rows

(('1.1.2.10',
  None,
  'Lanthanide-dependent methanol dehydrogenase',
  'methanol:cytochrome cL oxidoreductase',
  None,
  None,
  '["2 oxidized cytochrome cL", "oxidized cytochrome c XoxG", "2 oxidized cytochrome cGJ", "2 cytochrome cGJ", "methanol"]',
  '["formaldehyde", "2 reducedcytochrome cGJ", "reducedcytochrome c XoxG", "2 reducedcytochrome cL"]'),)

# Create Enzyme Database

## For $k_{m}$

In [5]:
# this may take a couple of minutes(about 71 mins on Intel i5-13400, wsl2: ubuntu20.04)
from utils import parse_km, extract_just_ec_number
from tqdm import tqdm
import json
i = 1
data_km = []
for r in tqdm(brenda.reactions):
    for substrate in parse_km(r).keys():
        ec_number = extract_just_ec_number(r.ec_number)
        for record in parse_km(r)[substrate]:
            k_m = record.get('value')
            species = json.dumps(record.get('species'))
            meta = record.get('meta')
            refs = json.dumps(record.get('refs'))
            item = {
                'id': str(i),
                'ec_number': ec_number,
                'k_m':k_m,
                'species':species,
                'meta': meta,
                'refs':refs,
                'substrate': substrate
            }
            for key in item.keys():
                if not item[key]:
                    item[key] = None
            if item['ec_number']:
                data_km.append(item)
                i += 1

100%|██████████| 7832/7832 [1:11:00<00:00,  1.84it/s]   


In [12]:
# with mysql running
import pymysql
from tqdm import tqdm
db_config = {
    'host': 'localhost',
    'user': 'root',
    'password': '123456',
    'charset':'utf8mb4'
}
try:
    connection = pymysql.connect(**db_config)
    cursor = connection.cursor()
except pymysql.Error as e:
    print("Database connection error:", e)
try:
    cursor.execute(f"CREATE DATABASE IF NOT EXISTS KM")
    cursor.execute(f"USE KM")
    cursor.execute("""
            CREATE TABLE IF NOT EXISTS km (
                id INT PRIMARY KEY,
                ec_number VARCHAR(20), 
                k_m FLOAT,
                species JSON,
                meta TEXT,
                refs JSON,
                substrate TEXT
            ) CHARACTER SET utf8mb4;""")
    for item in tqdm(data_km):
        try:
            insert_query = f"INSERT INTO km ({', '.join(item.keys())}) VALUES ({', '.join(['%s'] * len(item))})"
            cursor.execute(insert_query, tuple(item.values()))
        except pymysql.Error as e:
            print(e)
    connection.commit()
    query = f"SELECT * FROM km WHERE ec_number = %s"
    cursor.execute(query, ('1.1.1.1'))
    rows = cursor.fetchall()
    cursor.close()
    connection.close()
except pymysql.Error as e:
    print(e)

100%|██████████| 181460/181460 [01:04<00:00, 2793.35it/s]


## For $k_{cat}$

In [5]:
# this may take a couple of minutes(about 19 mins on Intel i5-13400, wsl2: ubuntu20.04)
from utils import parse_kcat, extract_just_ec_number
from tqdm import tqdm
import json
i = 1
data_kcat = []
for r in tqdm(brenda.reactions):
    for substrate in parse_kcat(r).keys():
        ec_number = extract_just_ec_number(r.ec_number)
        for record in parse_kcat(r)[substrate]:
            k_cat = record.get('value')
            species = json.dumps(record.get('species'))
            meta = record.get('meta')
            refs = json.dumps(record.get('refs'))
            item = {
                'id': str(i),
                'ec_number': ec_number,
                'k_cat':k_cat,
                'species':species,
                'meta': meta,
                'refs':refs,
                'substrate': substrate
            }
            for key in item.keys():
                if not item[key]:
                    item[key] = None
            if item['ec_number']:
                data_kcat.append(item)
                i += 1

100%|██████████| 7832/7832 [19:40<00:00,  6.64it/s]  


In [7]:
# with mysql running
import pymysql
from tqdm import tqdm
db_config = {
    'host': 'localhost',
    'user': 'root',
    'password': '123456',
    'charset':'utf8mb4'
}
try:
    connection = pymysql.connect(**db_config)
    cursor = connection.cursor()
except pymysql.Error as e:
    print("Database connection error:", e)
try:
    cursor.execute(f"CREATE DATABASE IF NOT EXISTS KCat")
    cursor.execute(f"USE KCat")
    cursor.execute("""
            CREATE TABLE IF NOT EXISTS kcat (
                id INT PRIMARY KEY,
                ec_number VARCHAR(20), 
                k_cat FLOAT,
                species JSON,
                meta TEXT,
                refs JSON,
                substrate TEXT
            ) CHARACTER SET utf8mb4;""")
    for item in tqdm(data_kcat):
        try:
            insert_query = f"INSERT INTO kcat ({', '.join(item.keys())}) VALUES ({', '.join(['%s'] * len(item))})"
            cursor.execute(insert_query, tuple(item.values()))
        except pymysql.Error as e:
            print(e)
    connection.commit()
    query = f"SELECT * FROM kcat WHERE ec_number = %s"
    cursor.execute(query, ('1.1.1.1'))
    rows = cursor.fetchall()
    cursor.close()
    connection.close()
except pymysql.Error as e:
    print(e)

  0%|          | 0/76811 [00:00<?, ?it/s]

100%|██████████| 76811/76811 [00:25<00:00, 2977.58it/s]


In [8]:
rows

((1,
  '1.1.1.1',
  0.833,
  '["Rattus norvegicus"]',
  'isoenzyme ADH-3, pH 10.0',
  '[" Julia, P.; Farres, J.; Pares, X.: Characterization of threeisoenzymes of rat alcohol dehydrogenase. Tissue distribution andphysical and enzymatic properties. Eur. J. Biochem. (1987) 162,179-189. {Pubmed:3816781} (c)"]',
  '1-butanol'),
 (2,
  '1.1.1.1',
  48.8,
  '["Rattus norvegicus"]',
  'isoenzyme ADH-1, pH 10.0',
  '[" Julia, P.; Farres, J.; Pares, X.: Characterization of threeisoenzymes of rat alcohol dehydrogenase. Tissue distribution andphysical and enzymatic properties. Eur. J. Biochem. (1987) 162,179-189. {Pubmed:3816781} (c)"]',
  '1-butanol'),
 (3,
  '1.1.1.1',
  3.1,
  '["Saccharolobus solfataricus"]',
  'wild type enzyme, in 0.1 M glycine-NaOHbuffer (pH 10.5), at 65°C',
  '[" Pennacchio, A.; Esposito, L.; Zagari, A.; Rossi, M.; Raia, C.A.:Role of Tryptophan 95 in substrate specificity and structural stabilityof Sulfolobus solfataricus alcohol dehydrogenase. Extremophiles (2009)13, 751

# After running mysql

In [3]:
# with mysql running
import pymysql
db_config = {
    'host': 'localhost',
    'user': 'root',
    'password': '123456',
    'charset':'utf8mb4'
}
try:
    connection = pymysql.connect(**db_config)
    cursor = connection.cursor()
except pymysql.Error as e:
    print("Database connection error:", e)
try:
    cursor.execute(f"USE pRAPer")
    query = f"SELECT * FROM kcat WHERE ec_number = %s"
    cursor.execute(query, ('1.1.1.1'))
    rows = cursor.fetchall()
    cursor.close()
    connection.close()
except pymysql.Error as e:
    print(e)

In [4]:
rows

((1,
  '1.1.1.1',
  0.833,
  '["Rattus norvegicus"]',
  'isoenzyme ADH-3, pH 10.0',
  '[" Julia, P.; Farres, J.; Pares, X.: Characterization of threeisoenzymes of rat alcohol dehydrogenase. Tissue distribution andphysical and enzymatic properties. Eur. J. Biochem. (1987) 162,179-189. {Pubmed:3816781} (c)"]',
  None),
 (2,
  '1.1.1.1',
  48.8,
  '["Rattus norvegicus"]',
  'isoenzyme ADH-1, pH 10.0',
  '[" Julia, P.; Farres, J.; Pares, X.: Characterization of threeisoenzymes of rat alcohol dehydrogenase. Tissue distribution andphysical and enzymatic properties. Eur. J. Biochem. (1987) 162,179-189. {Pubmed:3816781} (c)"]',
  None),
 (3,
  '1.1.1.1',
  3.1,
  '["Saccharolobus solfataricus"]',
  'wild type enzyme, in 0.1 M glycine-NaOHbuffer (pH 10.5), at 65°C',
  '[" Pennacchio, A.; Esposito, L.; Zagari, A.; Rossi, M.; Raia, C.A.:Role of Tryptophan 95 in substrate specificity and structural stabilityof Sulfolobus solfataricus alcohol dehydrogenase. Extremophiles (2009)13, 751-761. {Pubmed: