# This notebook shows how to interact with a BrendaParser and BrendaProtein object

https://github.com/matthiaskoenig/brendapy

In [1]:
import pandas as pd
import logging
from collections import OrderedDict, defaultdict

from brendapy import BrendaParser, BrendaProtein
from brendapy.taxonomy import Taxonomy

BRENDA_PARSER = BrendaParser()  # reuse parser


def parse_proteins_for_ec(ec="1.1.1.1"):
    """Parse the protein entries for a given EC number in BRENDA.
    """
    proteins = BRENDA_PARSER.get_proteins(ec)
    return proteins


ec_numbers = ['1.14.99.39', '2.7.10.1', '2.7.10.2', '2.7.11.1', '2.7.13.3' ]


ec_dict = parse_proteins_for_ec(ec_numbers[3])

ModuleNotFoundError: No module named 'brendapy'

### We can also check other details

In [None]:
# Check if it has a UniProt ID
for prot_id in range(1, len(ec_dict) + 1):
    if ec_dict[prot_id].uniprot:

        print (f">{ec_dict[prot_id].uniprot}")

In [2]:
ec_dict[3].references
test_dict = defaultdict(list)
test_dict[ec_dict[3].uniprot] = ec_dict[3].references
print (test_dict)

NameError: name 'ec_dict' is not defined

### The ASR curation workflow pulls the values for a given BRENDA column (currently just TN and KM) and puts them in a column

In [12]:

brenda_dict = defaultdict(lambda: defaultdict(list))

for prot_id, protein in sorted(ec_dict.items()):
    if protein.uniprot:
            print (protein.uniprot)
            for bc in brenda_cols:
                attribs = getattr(protein, bc)
                if attribs:
                    for attrib in attribs:
                        print (attrib)
                        
#                         if 'mutant' not in attrib['comment']:
#                             brenda_dict[protein.uniprot][f"{str(bc)}_{str(attrib['substrate'])}"].append(attrib['value'])
#                             brenda_dict[protein.uniprot][f"{str(bc)}"].append(f"{attrib['value']};{attrib['substrate']}")

for k, v in brenda_dict.items():
    print (f"{k}\n{v}\n")
    print (f"Available keys are {', '.join([x for x in v.keys()])}\n")
                      

Q8NQZ9
Q97UB2
{'data': '0.54 {2,3-dihydroxyisovalerate}', 'refs': [28], 'units': 'mM', 'value': 0.54, 'substrate': '2,3-dihydroxyisovalerate', 'chebi': 'CHEBI_11424'}
{'data': '2.1 {2,3-Dihydroxy-3-methylbutanoate}', 'refs': [39], 'comment': '#22# pH 7, 50°C <39>', 'units': 'mM', 'value': 2.1, 'substrate': '2,3-Dihydroxy-3-methylbutanoate'}
{'data': '2.42 {D-gluconate}', 'refs': [28], 'units': 'mM', 'value': 2.42, 'substrate': 'D-gluconate', 'chebi': 'CHEBI_18391'}
{'data': '7.8 {D-gluconate}', 'refs': [39], 'comment': '#22# pH 7, 50°C <39>', 'units': 'mM', 'value': 7.8, 'substrate': 'D-gluconate', 'chebi': 'CHEBI_18391'}
Q0K7F8
{'data': '2.7 {2,3-dihydroxyisovalerate}', 'refs': [40], 'comment': '#23# pH 7.0, 30°C <40>', 'units': 'mM', 'value': 2.7, 'substrate': '2,3-dihydroxyisovalerate', 'chebi': 'CHEBI_11424'}
Q9LIR4


In [39]:
brenda_cols = ['TN', 'TO']

def add_val(brenda_dict, protein, attrib, attrib_count):
    
    print (attrib)
    

    if 'comment' not in attrib or 'mutant' not in attrib['comment']:

        if 'substrate' in attrib:
            
            terms = ['units', 'refs', 'comment']
            
            brenda_dict[protein.uniprot][f"BRENDA_{str(bc)}_{str(attrib['substrate'])}_DATA"].append(f"{attrib['value']}_count={attrib_count}")
            brenda_dict[protein.uniprot][f"BRENDA_{str(bc)}"].append(f"{attrib['value']};{attrib['substrate']}_count={attrib_count}")
                                                                     
            for term in terms:
                if term in attrib:
                    brenda_dict[protein.uniprot][f"BRENDA_{str(bc)}_{str(attrib['substrate'])}_{term.upper()}"].append(f"{attrib[term]}_count={attrib_count}")


        else:
            terms = ['data', 'units', 'refs', 'comment']
                                                    
            for term in terms:
                if term in attrib:
                    brenda_dict[protein.uniprot][f"BRENDA_{str(bc)}_{term.upper()}"].append(f"{attrib[term]}_count={attrib_count}")


# Create a BRENDA dictionary that maps all of the available Uniprot IDs from BRENDA to their annotations
brenda_dict = defaultdict(lambda: defaultdict(list))

# Not getting SN (synonyms) or RN (accepted name (IUPAC)) or IC50
for prot_id, protein in sorted(ec_dict.items()):
    if protein.uniprot:
        # ADD REFERENCES HERE
        for bc in brenda_cols:
            attrib_count = 0
            print (f'bc is {bc}')
            attribs = getattr(protein, bc)
            if attribs:
                for attrib in attribs:
                    attrib_count +=1
                    if bc in [ 'AP', 'AC', 'CF', 'CL', 'CR', 'EXP', 'IC50', 'LO', 'NSP', 'PHO', 'PU', 'PM', 'SP', 'EN',
                              'IN', 'ME', 'MW', 'SA', 'ST', 'SU', 'SY', 'TO', 'TR', 'TS', 'KKM', 'KM', 'TN', 'KI']:
                        add_val(brenda_dict, protein, attrib, attrib_count)
                    if bc == 'GI':
                        print(f"WARNING {bc} is not implemented")
                        print(attrib)
                    if bc == 'GS':
                        print(f"WARNING {bc} is not implemented")
                        print(attrib)
                    if bc == 'OS':
                        print(f"WARNING {bc} is not implemented")
                        print(attrib)
                    if bc == 'OSS':
                        print(f"WARNING {bc} is not implemented")
                        print(attrib)
                    if bc == 'PHR':
                        print(f"WARNING {bc} is not implemented")
                        print(attrib)
                    if bc == 'PHS':
                        print(f"WARNING {bc} is not implemented")
                        print(attrib)
                    if bc == 'PI':
                        print(f"WARNING {bc} is not implemented")
                        print(attrib)
                    if bc == 'REN':
                        print(f"WARNING {bc} is not implemented")
                        print(attrib)
                    if bc == 'SS':
                        print(f"WARNING {bc} is not implemented")
                        print(attrib)


# Add the annotations from BRENDA dictionary to the annotation file
for entry_id, bd in brenda_dict.items():
    print ('Getting BRENDA DF')
    print (entry_id)
    for k, v in bd.items():
        print (k, v)
        print()

bc is TN
bc is TO
bc is TN
{'data': '0.011 {D-glycerate}', 'refs': [39], 'comment': '#22# pH 7, 50°C, non-activated enzyme <39>', 'units': '1/s', 'value': 0.011, 'substrate': 'D-glycerate', 'chebi': 'CHEBI_16659'}
{'data': '0.03 {D-glycerate}', 'refs': [39], 'comment': '#22# pH 7, 50°C, enzyme activated by 2-mercaptoethanol <39>', 'units': '1/s', 'value': 0.03, 'substrate': 'D-glycerate', 'chebi': 'CHEBI_16659'}
{'data': '0.31 {2,3-Dihydroxy-3-methylbutanoate}', 'refs': [39], 'comment': '#22# pH 7, 50°C, enzyme activated by 2-mercaptoethanol <39>; #22# pH 7, 50°C, non-activated enzyme <39>', 'units': '1/s', 'value': 0.31, 'substrate': '2,3-Dihydroxy-3-methylbutanoate'}
{'data': '0.4 {D-gluconate}', 'refs': [39], 'comment': '#22# pH 7, 50°C, non-activated enzyme <39>', 'units': '1/s', 'value': 0.4, 'substrate': 'D-gluconate', 'chebi': 'CHEBI_18391'}
{'data': '1.19 {D-gluconate}', 'refs': [39], 'comment': '#22# pH 7, 50°C, enzyme activated by 2-mercaptoethanol <39>', 'units': '1/s', 'val