In [1]:
import re
import numpy as np
workDir = 'C:/Users/tinta/OneDrive/Documents/Projects/BRENDA'
dataFile = workDir + '/brenda_download.txt'

In [2]:
from parseBRENDA import BRENDA
brenda = BRENDA(dataFile)

In [5]:
r = brenda.getReactions('2.2.1.1')[0]

In [4]:
r.reaction_type

'keto group transfer'

In [5]:
temp = r.getTemperatureData()

In [7]:
[l['value'] for l in temp['optimum']]

[40.0, 62.0, 37.0, 65.0, 65.0, 30.0, 45.0, 25.0, 20.0]

In [8]:
[l['value'] for l in temp['stability']]

[71.0, 67.5, 65.0, 60.0, 58.3, -999.0, 55.0, 40.0, 20.0, 50.0]

In [9]:
[l['value'] for l in temp['range']]

[[40.0, 70.0]]

In [8]:
r.getMetals().keys()

dict_keys(['Mg2+', 'Mn2+', 'Cu2+', 'Co2+', 'Ca2+'])

In [4]:
r.getSubstratesAndProducts()    

[{'substrates': ['D-ribose5-phosphate', 'D-xylulose 5-phosphate'],
  'products': ['D-glyceraldehyde 3-phosphate', 'sedoheptulose 7-phosphate']},
 {'substrates': ['D-fructose 6-phosphate', 'D-glyceraldehyde 3-phosphate'],
  'products': ['D-xylulose 5-phosphate', 'erythrose 4-phosphate']},
 {'substrates': ['D-glyceraldehyde 3-phosphate', 'glyceraldehyde 3-phosphate'],
  'products': ['D-xylulose 5-phosphate', 'D-xylulose 5-phosphate']},
 {'substrates': ['D-ribose 5-phosphate', 'D-xylulose 5-phosphate'],
  'products': ['D-glyceraldehyde 3-phosphate', 'sedoheptulose 7-phosphate']},
 {'substrates': ['D-glyceraldehyde 3-phosphate', 'sedoheptulose 7-phosphate'],
  'products': ['D-ribose 5-phosphate', 'D-xylulose 5-phosphate']}]

In [10]:
lines = r._getDataLines('NSP')
# line = lines[1]
# print(len(lines))
[r._extractDataLineInfo(line)['value'] for line in lines]

['D-xylulose 5-phosphate + D-ribose5-phosphate = sedoheptulose 7-phosphate + D-glyceraldehyde 3-phosphate',
 'D-xylulose 5-phosphate + D-ribose5-phosphate = sedoheptulose 7-phosphate + D-glyceraldehyde 3-phosphate {}',
 'D-xylulose 5-phosphate + D-ribose5-phosphate = sedoheptulose 7-phosphate + D-glyceraldehyde 3-phosphate',
 'D-fructose 6-phosphate + D-glyceraldehyde 3-phosphate =erythrose 4-phosphate + D-xylulose 5-phosphate',
 'D-fructose 6-phosphate + D-glyceraldehyde 3-phosphate =D-erythrose 4-phosphate + D-xylulose 5-phosphate',
 'D-fructose 6-phosphate + D-glyceraldehyde 3-phosphate =D-erythrose 4-phosphate + D-xylulose 5-phosphate',
 'more = ?  {}',
 'more = ?  {}',
 'D-xylulose 5-phosphate + D-ribose 5-phosphate = ?',
 'glyceraldehyde 3-phosphate + D-glyceraldehyde 3-phosphate =D-xylulose 5-phosphate + D-xylulose 5-phosphate',
 'D-xylulose-5-phosphate + D-ribose-5-phosphate = ?',
 'D-ribose 5-phosphate + D-xylulose 5-phosphate =sedoheptulose 7-phosphate + D-glyceraldehyde 3-ph

In [28]:
a = ['hola', 'a', 'tal']
a.sort()
a

['a', 'hola', 'tal']

In [26]:
def getKMvalues(r, substrate: str=None) -> dict:
    """
    Returns a dictionary with all KM values of the enzyme. 
    If a substrate is given, then results are restricted to that substrate.
    """
    res = {substrate: []}
    lines = r._getDataLines('KM')  
    for line in lines:
        data = r._extractDataLineInfo(line)
        if data['specific_info'] == substrate:
            del data['specific_info']
            data['value'] = float(data['value'])
            res[substrate].append(data)
    return res


In [34]:
res = getKMvalues(r)
res

{}

In [32]:
substrate = 'D-xylulose 5-phosphate'
res = getKMvalues(r, substrate)
[KM['value'] for KM in res[substrate]] 

[0.038,
 0.015,
 0.02,
 0.065,
 0.033,
 0.034,
 0.0056,
 0.067,
 0.23,
 0.255,
 0.303,
 0.12,
 0.16,
 0.1,
 0.18,
 0.41,
 0.04,
 0.49,
 0.011,
 0.2,
 0.15,
 0.44,
 0.0399,
 0.4579,
 0.025,
 0.21,
 0.5,
 0.021,
 0.07,
 0.023,
 0.075,
 0.073,
 1.01,
 0.115,
 0.163,
 1.24,
 0.773,
 3.35,
 0.595,
 4.08,
 0.318,
 0.829,
 0.4]

In [11]:
r.getKMvalues()

{'D-ribose 5-phosphate': [0.356,
  0.739,
  2.316,
  0.436,
  0.949,
  0.837,
  0.882,
  0.8,
  0.12,
  0.61,
  0.48,
  0.33,
  1.4,
  0.063,
  0.3,
  0.63,
  0.53,
  0.72,
  0.0362,
  0.879,
  0.06,
  0.6,
  7.0,
  0.09,
  0.014,
  0.29,
  0.15,
  0.007,
  0.08,
  0.146,
  0.093,
  0.193,
  1.75,
  0.159,
  5.97,
  0.698,
  5.65,
  0.4,
  0.25],
 'D-xylulose 5-phosphate': [0.015,
  0.02,
  0.065,
  0.033,
  0.034,
  0.0056,
  0.067,
  0.23,
  0.255,
  0.303,
  0.12,
  0.16,
  0.1,
  0.18,
  0.41,
  0.04,
  0.49,
  0.011,
  0.2,
  0.15,
  0.44,
  0.0399,
  0.4579,
  0.025,
  0.21,
  0.5,
  0.021,
  0.07,
  0.023,
  0.075,
  0.073,
  1.01,
  0.115,
  0.163,
  1.24,
  0.773,
  3.35,
  0.595,
  4.08,
  0.318,
  0.829,
  0.4],
 'fructose 6-phosphate': [3.2, 0.61, 1.1, 7.0, 1.8],
 'D-xylose': [],
 'L-arabinose': [],
 'L-Lyxose': [],
 'D-ribose': [],
 'fructose-6-phosphate': [],
 'Hydroxypyruvate': [5.3, 18.0, 6.6, 33.0],
 'D-fructose 6-phosphate': [0.72, 0.6, 0.34],
 'D-glyceraldehyde 3-pho

In [None]:
# line = '#1# Meleagris gallopavo  <2>'
# line = '#33# Trypanosoma cruzi Q4D946 UniProt <60>'
line = '#5# Escherichia coli   (#5# large subunit <21>) <21,34,57>'
# re.search('(?<=#)(.*)(?=#)', line).group(1)
#re.search('(?<=#)(.*)(?=<)', line).group(1).strip()
#re.search('(?<=<)(.*)(?=>)', line).group(1).split(',')
meta_search = re.search('(?<=\()(.*)(?=\))', line)
line = line.replace(f'({meta_search.group(1)})', '')
line

In [None]:
# r[0]._reaction_data

In [None]:
r[0].ec_number

In [None]:
reactions = brenda.getReactions()

In [None]:
len(reactions)

In [None]:
reactions[100].name

In [None]:
#brenda.getKMvalues("2.7.1.35")
# '\nKM\t#15# 0.022 {pyridoxal}  (#15# pH 7, 37Â°C <24>) <24>\nKM\t'
# def getKMlines(data, search_indices):
#     return [p.group(1) for p in re.finditer("KM\t(.+?)\nKM",
#                                             data[search_indices[0]:search_indices[1]])]

# #brenda.data[97288258:97360800]
# getKMlines(brenda.data, (97288258, 97360800))

In [None]:
# import importlib
# importlib.reload(module)

%load_ext autoreload
%autoreload 2
from parseBRENDA import BRENDA

In [None]:
brenda = BRENDA(DataFile)

In [None]:
brenda.getKMvalues("2.7.1.35", "ATP")

In [None]:
"""
REFERENCE
RF	<1> Green, P.N.; Gibson, D.M.: Carbohydrate metabolism in some
	methylotrophic bacteria. FEMS Microbiol. Lett. (1984) 23, 31-34.
	{Pubmed:} (c)
RF	<2> Cline, A.L.; Hu, A.S.L.: The isolation of three sugar
	dehydrogenases from a pseudomonad. J. Biol. Chem. (1965) 240,
	4488-4492. {Pubmed:5845847} (c)
"""

"""
PROTEIN
PR	#1# Sus scrofa   (#1# protein comprises a small N-terminal LPMO10
	module named LPMO10A followed by a family 5/12 CBM and a C-terminal
	GH18 module <9>) <9>
PR	#2# Oryctolagus cuniculus   (#2# fragment of dihydropteroate synthase
	<8>) <8>
PR	#3# Pseudomonas sp.   <2,10>
PR	#4# Acinetobacter sp.   <13>
PR	#5# Agrobacterium tumefaciens   <1,3>
PR	#6# Aureobasidium pullulans   <4>
PR	#7# Ovis aries aries   (#7# adh, fragment <6,7>) <6,7>
PR	#8# Trinickia caryophylli   <5>
PR	#9# Saccharolobus solfataricus Q97YM2 SwissProt <11,12>

RECOMMENDED_NAME
"""