# Scrap BRENDA

In [None]:
# IMPORTS
from os import path, mkdir, remove
import re as re
import mmap as mmap
import pandas as pd
import statistics
import numpy as np

In [None]:
# FILES NAMES AND CONSTANTS
BRENDA_INPUT_FILE = './brenda_download.txt'
BRENDA_UNFILTERED_DATA_DIR = './BRENDA_data/unfiltered_dataframes'
KCAT_FILE = BRENDA_UNFILTERED_DATA_DIR + '/k_cat.csv'
SA_FILE = BRENDA_UNFILTERED_DATA_DIR + '/specific_activity.csv'
MW_FILE = BRENDA_UNFILTERED_DATA_DIR + '/molecular_weight.csv'
ORG_NAMES_FILE = BRENDA_UNFILTERED_DATA_DIR + '/org_names.csv'
OPT_PH_FILE = BRENDA_UNFILTERED_DATA_DIR + '/optimum_ph.csv'
PH_RANGE_FILE = BRENDA_UNFILTERED_DATA_DIR + '/ph_range.csv'
OPT_TEMP_FILE = BRENDA_UNFILTERED_DATA_DIR + '/optimum_temp.csv'
TEMP_RANGE_FILE = BRENDA_UNFILTERED_DATA_DIR + '/temp_range.csv'
SEPARATOR = '\t

## Regex

Use regular expressions in order to scrap every parameter from BRENDA's documented proteins, whose are classified by EC number and Protein number (number related to the organism which is from). 

current parameters extracted: 
- turnover number (kcat)
- specific activity (sa)
- molecular weight (mw)
- organism's names (org_names)
- optimum pH (opt_ph)
- pH range (ph_range)
- optimum temperature (opt_temp)
- temperature range (temp_range)

In [None]:
def split_pr_numbers(pr_numbers):
    # brenda_info = [ec_number, pr_numbers, other info (kcat, sa, mw, etc)]
    splited_pr_numbers = pr_numbers.split(',')
    return splited_pr_numbers

def write_info_on_csv(brenda_info, csv_file):
    r''' 
    Write information extracted from BRENDA_download.txt on a csv file. 
    
    Inputs:
    -------
    brenda_info: array with the information to be written per line 
    csv_file: .csv file's name
    '''
    for info in brenda_info:
        ec_number, pr_numbers, *brenda_info_rest = info
        pr_numbers_list = split_pr_numbers(pr_numbers)
        for pr_number in pr_numbers_list:
            towrite = [ec_number, pr_number, *brenda_info_rest]
            csv_file.write(SEPARATOR.join(towrite) + '\n')
    
def is_mutant(line):
    'mutant' in line

### turnover number (kcat) ###

def regex_for_kcat(ec_file, ec_number):
    r''' 
    Obtain the turnover number (kcat) information using regular expressions,
    it contains the EC number, PR number, kcat value and substrate, it might 
    contain pH and temperature from each reaction, if they're not known appears
    like 'not specified', if the information corresponds to a mutant strain it
    will be skipped.
    
    Inputs:
    -------
    ec_file: BRENDA sub file corresponding only to one EC number information
    ec_number: respective EC number to ec_file
    Output:
    -------
    kcat_values: matrix with kcat information, each row corresponds to a specific
    enzyme in BRENDA database
    '''
    # Pattern looking for: TN  #pr_num#  Kcat_value  {substrate} pH T°
    kcat_pattern = r'^TN.*\#\d*[,\d+]*\# \d+\.\d* \{.*\}.*[\(?pH \d+\.\d*\)?]?.*[\d+°C]?.*'
    regex = re.compile(kcat_pattern, flags=re.IGNORECASE|re.MULTILINE)
    kcat_matches = regex.findall(ec_file)
    kcat_values = []
    for kcat_match in kcat_matches:
        if is_mutant(kcat_match): continue
        # regular pattern: extract #num# Kcat_value and {substrate}
        pr_kcat_substrate_pattern = r'^TN.*\#(\d*[,\d*]*)\# (.*) \{(.*)\}.*'
        regex2 = re.compile(pr_kcat_substrate_pattern, flags=re.IGNORECASE|re.MULTILINE)
        pr_kcat_substrate = regex2.match(kcat_match)               
        # get PR number/kcat value/substrate for every match
        # pr_numbers could be a list of numbers: #1,2,3,4,5#
        pr_numbers = pr_kcat_substrate.group(1) 
        kcat_value = pr_kcat_substrate.group(2)
        substrate = pr_kcat_substrate.group(3)
        # extract pH value if exists
        ph_pattern = r'.*pH (\d+\.\d*).*'
        regex3 = re.compile(ph_pattern, flags=re.IGNORECASE|re.MULTILINE)
        ph = regex3.match(kcat_match)
        # get pH value for every match
        if ph is not None: ph_value = ph.group(1)
        else: ph_value = 'not specified'
        # extract T° value if exists 
        temp_pattern = r'.*(\d\d)°C.*'
        regex4 = re.compile(temp_pattern, flags=re.IGNORECASE|re.MULTILINE)
        temp = regex4.match(kcat_match)
        # get temperature value for every match
        if temp is not None: temp_value = temp.group(1)
        else: temp_value = 'not specified'
        kcat_values.append([ec_number, pr_numbers, kcat_value, substrate, \
                            ph_value, temp_value])
    return kcat_values
        
def get_kcat(ec_file, ec_number):
    with open(KCAT_FILE, 'a+') as kcat_file:
        kcat_info = regex_for_kcat(ec_file, ec_number)
        write_info_on_csv(kcat_info, kcat_file)

### specific activity (sa) ###

def regex_for_sa(ec_file, ec_number): 
    r''' 
    Obtain the specific activity (sa) information using regular expressions,
    it contains the EC number, PR number and sa value, it might 
    contain pH and temperature from each reaction, if they're not known appears
    like 'not specified', if the information corresponds to a mutant strain it
    will be skipped.
    
    Inputs:
    -------
    ec_file: BRENDA sub file corresponding only to one EC number information
    ec_number: respective EC number to ec_file
    Output:
    -------
    sa_values: matrix with sa information, each row corresponds to a specific
    enzyme in BRENDA database
    '''
    # Pattern looking for: SA  #pr_num#  sa_value pH T°
    sa_pattern = r'^SA.*\#\d*[,\d+]*\# [0-9]+\.[0-9]*.*[pH \d+\.\d*]?.*[\d+°C]?.*'
    regex = re.compile(sa_pattern, flags=re.IGNORECASE|re.MULTILINE)
    sa_matches = regex.findall(ec_file)
    sa_values = []
    for sa_match in sa_matches:
        if is_mutant(sa_match): continue
        # extract #num# sa_value
        pr_sa_pattern = r'^SA.*\#(\d*[,\d+]*)\# ([0-9]+\.[0-9]*).*'
        regex2 = re.compile(pr_sa_pattern, flags=re.IGNORECASE|re.MULTILINE)
        pr_sa = regex2.match(sa_match)               
        # get PR number/specific activity value for every match
        # pr_numbers could be a list of numbers: #1,2,3,4,5#
        pr_numbers = pr_sa.group(1) 
        sa_value= pr_sa.group(2)
        # extract pH value if exists
        ph_pattern = r'.*pH (\d+\.\d*).*'
        regex3 = re.compile(ph_pattern, flags=re.IGNORECASE|re.MULTILINE)
        ph = regex3.match(sa_match)
        # get pH value for every match
        if ph is not None: ph_value = ph.group(1)
        else: ph_value = 'not specified'
        # extract T° value if exists 
        temp_pattern = r'.*(\d\d)°C.*'
        regex4 = re.compile(temp_pattern, flags=re.IGNORECASE|re.MULTILINE)
        temp = regex4.match(sa_match)
        # get temperature value for every match
        if temp is not None: temp_value = temp.group(1)
        else: temp_value = 'not specified'
        sa_values.append([ec_number, pr_numbers, sa_value, ph_value, \
                          temp_value])
    return sa_values
        
def get_sa(ec_file, ec_number):
    with open(SA_FILE, 'a+') as sa_file:
        sa_info = regex_for_sa(ec_file, ec_number)
        write_info_on_csv(sa_info, sa_file)

### molecular weight (mw) ###

def regex_for_mw(ec_file, ec_number): 
    r''' 
    Obtain the molecular weight (mw) information using regular expressions,
    it contains the EC number, PR number and protein's molecular weight.
    
    Inputs:
    -------
    ec_file: BRENDA sub file corresponding only to one EC number information
    ec_number: respective EC number to ec_file
    Output:
    -------
    mw_values: matrix with mw information, each row corresponds to a specific
    enzyme in BRENDA database
    '''
    # Pattern looking for: SA  #pr_num#  MW_value
    mw_pattern = r'^MW\s\#\d+[,\d*]*\# \d\d\d*.*'
    regex = re.compile(mw_pattern, flags=re.IGNORECASE|re.MULTILINE)
    # generate a list with all matches
    mw_matches = regex.findall(ec_file)
    mw_values =[]
    for mw_match in mw_matches:
        # extract #num# mw_value
        pr_mw_pattern = r'^MW\s\#(\d*[,\d*]*)\# (\d\d\d*).*'
        regex2 = re.compile(pr_mw_pattern, flags=re.IGNORECASE|re.MULTILINE)
        pr_mw = regex2.match(mw_match)               
        # get PR number/molecular weight value for every match
        # pr_numbers could be a list of numbers: #1,2,3,4,5#
        pr_numbers = pr_mw.group(1)
        mw_value= pr_mw.group(2)
        mw_values.append([ec_number, pr_numbers, mw_value])
    return mw_values
        
def get_mw(ec_file, ec_number):
    with open(MW_FILE, 'a+') as mw_file:
        mw_info = regex_for_mw(ec_file, ec_number)
        write_info_on_csv(mw_info, mw_file)
        
### organisms names (org_names) ###

def regex_for_org_names(ec_file, ec_number): 
    r''' 
    Obtain the organism's names (org_names) information using regular expressions,
    it contains the EC number, PR number and the name of the organism which 
    every protein belongs to.
    
    Inputs:
    -------
    ec_file: BRENDA sub file corresponding only to one EC number information
    ec_number: respective EC number to ec_file
    Output:
    -------
    org_names_list: matrix with org_names information, each row corresponds to a 
    specific enzyme in BRENDA database
    '''
    # Pattern looking for: PR org_name
    org_names_pattern = r'^PR\t\#[0-9]+,*[0-9]*\# \w+ \w+.*'
    regex = re.compile(org_names_pattern, flags=re.IGNORECASE|re.MULTILINE)
    # generate a list with all matches
    org_names_matches = regex.findall(ec_file)
    org_names_list = []
    for org_names_match in org_names_matches:
        # extract #num# org_name
        pr_org_names_pattern = r'^PR\t\#([0-9]+,*[0-9]*)\# (\w+ \w+).*'
        regex2 = re.compile(pr_org_names_pattern, flags=re.IGNORECASE|re.MULTILINE)
        pr_org_names = regex2.match(org_names_match)               
        # get PR numbers/organism's name for every match
        # pr_numbers could be a list of numbers: #1,2,3,4,5#
        pr_numbers = pr_org_names.group(1) 
        org_names = pr_org_names.group(2)
        org_names_list.append([ec_number, pr_numbers, org_names])
    return org_names_list
        
def get_org_names(ec_file, ec_number):
    with open(ORG_NAMES_FILE, 'a+') as org_names_file:
        org_names_info = regex_for_org_names(ec_file, ec_number)
        write_info_on_csv(org_names_info, org_names_file)

### optimum pH (opt_ph) ###

def regex_for_opt_ph(ec_file, ec_number): 
    r''' 
    Obtain the optimum pH value (opt_ph) information using regular expressions,
    it contains the EC number, PR number and the optimum pH value.
    
    Inputs:
    -------
    ec_file: BRENDA sub file corresponding only to one EC number information
    ec_number: respective EC number to ec_file
    Output:
    -------
    opt_phs: matrix with opt_ph values information, each row corresponds to a 
    specific enzyme in BRENDA database
    '''
    # Pattern looking for: SA  #pr_num#  opt_ph_value
    opt_ph_pattern = r'^PHO\s\#\d*[,\d*]*\# [0-9]+\.*[0-9]* [\(]?.*'
    regex = re.compile(opt_ph_pattern, flags=re.IGNORECASE|re.MULTILINE)
    # generate a list with all matches
    opt_ph_matches = regex.findall(ec_file)
    opt_phs = []
    for opt_ph_match in opt_ph_matches:
        # extract #num# opt_ph_value
        pr_opt_ph_pattern = r'^PHO\s\#(\d*[,\d*]*)\# ([0-9]+\.*[0-9]*) [\(]?.*'
        regex2 = re.compile(pr_opt_ph_pattern, flags=re.IGNORECASE|re.MULTILINE)
        pr_opt_ph = regex2.match(opt_ph_match)               
        # get PR number/optimum pH value for every match
        pr_numbers = pr_opt_ph.group(1) # this could be a list of numbers: #1,2,3,4,5#
        opt_ph_value = pr_opt_ph.group(2)
        opt_phs.append([ec_number, pr_numbers, opt_ph_value])
    return opt_phs
        
def get_opt_ph(ec_file,ec_number):
    with open(OPT_PH_FILE, 'a+') as opt_ph_file:
        opt_ph_info = regex_for_opt_ph(ec_file, ec_number)
        write_info_on_csv(opt_ph_info, opt_ph_file)

### pH range (ph_range) ###

def regex_for_ph_range(ec_file, ec_number): 
    r''' 
    Obtain the pH ranges (ph-range) information using regular expressions,
    it contains the EC number, PR number, the lower and the upper bound of pH.
    
    Inputs:
    -------
    ec_file: BRENDA sub file corresponding only to one EC number information
    ec_number: respective EC number to ec_file
    Output:
    -------
    ph_ranges: matrix with pH range values information, each row corresponds to a 
    specific enzyme in BRENDA database
    '''
    # Pattern looking for: SA  #pr_num#  opt_ph_value
    ph_range_pattern = r'^PHR\t\#\d*[,\d*]*\# [0-9]+\.*[0-9]*-[0-9]+,*[0-9]*.*'
    regex = re.compile(ph_range_pattern, flags=re.IGNORECASE|re.MULTILINE)
    #generate a list with all matches
    ph_range_matches = regex.findall(ec_file)
    ph_ranges = []
    for ph_range_match in ph_range_matches:
        # extract #num# ph_range_values
        pr_ph_range_pattern = r'^PHR\t\#(\d*[,\d*]*)\# ([0-9]+\.*[0-9]*)-([0-9]+,*[0-9]*).*'
        regex2 = re.compile(pr_ph_range_pattern, flags=re.IGNORECASE|re.MULTILINE)
        pr_ph_range = regex2.match(ph_range_match)               
        # get PR number/lower bound and upper bound range values for every match
        # pr_numbers could be a list of numbers: #1,2,3,4,5#
        pr_numbers = pr_ph_range.group(1)
        ph_range_lower_bound = pr_ph_range.group(2)
        ph_range_upper_bound = pr_ph_range.group(3)
        ph_ranges.append([ec_number, pr_numbers, ph_range_lower_bound, \
                          ph_range_upper_bound])
    return ph_ranges
        
def get_ph_range(ec_file, ec_number):
    with open(PH_RANGE_FILE, 'a+') as ph_range_file:
        ph_range_info = regex_for_ph_range(ec_file, ec_number)
        write_info_on_csv(ph_range_info, ph_range_file)

### optimum temperature (opt_temp) ###

def regex_for_opt_temp(ec_file, ec_number): 
    r''' 
    Obtain the optimum temperature value (opt_temp) information using regular 
    expressions, it contains the EC number, PR number and the optimum temperature
    value.
    
    Inputs:
    -------
    ec_file: BRENDA sub file corresponding only to one EC number information
    ec_number: respective EC number to ec_file
    Output:
    -------
    opt_temps: matrix with opt_temp values information, each row corresponds to a 
    specific enzyme in BRENDA database
    '''
    # Pattern looking for: SA  #pr_num#  opt_temp_value
    opt_temp_pattern = r'^TO.*\#\d*[,\d*]*\# [0-9][0-9] .*'
    regex = re.compile(opt_temp_pattern, flags=re.IGNORECASE|re.MULTILINE)
    # generate a list with all matches
    opt_temp_matches = regex.findall(ec_file)
    opt_temps = []
    for opt_temp_match in opt_temp_matches:
        # extract #num# opt_temp_value
        pr_opt_temp_pattern = r'^TO.*\#(\d*[,\d*]*)\# ([0-9][0-9]) .*'
        regex2 = re.compile(pr_opt_temp_pattern, flags=re.IGNORECASE|re.MULTILINE)
        pr_opt_temp = regex2.match(opt_temp_match)               
        # get PR number/optimum temperature value for every match
        # pr_numbers could be a list of numbers: #1,2,3,4,5#
        pr_numbers = pr_opt_temp.group(1) 
        opt_temp_value = pr_opt_temp.group(2)
        opt_temps.append([ec_number, pr_numbers, opt_temp_value])
    return opt_temps
        
def get_opt_temp(ec_file, ec_number):
    with open(OPT_TEMP_FILE, 'a+') as opt_temp_file:
        opt_temp_info = regex_for_opt_temp(ec_file, ec_number)
        write_info_on_csv(opt_temp_info, opt_temp_file)

### temperature range (temp_range) ###

def regex_for_temp_range(ec_file, ec_number):
    r''' 
    Obtain the temperature ranges (temp_range) information using regular
    expressions, it contains the EC number, PR number, the lower and the upper 
    bound of temperature.
    
    Inputs:
    -------
    ec_file: BRENDA sub file corresponding only to one EC number information
    ec_number: respective EC number to ec_file
    Output:
    -------
    ph_ranges: matrix with temp_ranges values information, each row corresponds to a 
    specific enzyme in BRENDA database
    '''
    # Pattern looking for: SA  #pr_num#  temp_range_values
    temp_range_pattern = r'^TR\t\#\d*[,\d*]*\# [0-9]+-[0-9]+ .*'
    regex = re.compile(temp_range_pattern, flags=re.IGNORECASE|re.MULTILINE)
    # generate a list with all matches
    temp_range_matches = regex.findall(ec_file)
    temp_ranges = []
    for temp_range_match in temp_range_matches:
        # extract #num# temp_range_value
        pr_temp_range_pattern = r'^TR\t\#(\d*[,\d*]*)\# ([0-9]+)-([0-9]+) .*'
        regex2 = re.compile(pr_temp_range_pattern, flags=re.IGNORECASE|re.MULTILINE)
        pr_temp_range = regex2.match(temp_range_match)               
        # get PR number/lower bound and upper bound range values for every match
        # pr_numbers could be a list of numbers: #1,2,3,4,5#
        pr_numbers = pr_temp_range.group(1) 
        temp_range_lower_bound = pr_temp_range.group(2)
        temp_range_upper_bound = pr_temp_range.group(3)
        temp_ranges.append([ec_number, pr_numbers, temp_range_lower_bound, \
                            temp_range_upper_bound])
    return temp_ranges

        
def get_temp_range(ec_file,ec_number):
    with open(TEMP_RANGE_FILE,'a+') as temp_range_file:
        temp_range_info = regex_for_temp_range(ec_file,ec_number)
        write_info_on_csv(temp_range_info,temp_range_file)

## Files Handling

Creates .csv files containning the information for every parameter scrapped named before:

**kcat**:
- EC number
- Protein number 
- kcat value 
- substrate 
- pH
- temperature

**sa**:
- EC number
- Protein number 
- specific activity value
- substrate 
- pH
- temperature

**mw**:
- EC number
- Protein number 
- molecular weight

**org_names**:
- EC number
- Protein number 
- organism's name

**opt_ph**:
- EC number
- Protein number 
- optimum pH value

**ph_range**
- EC number
- Protein number 
- lower bound pH value
- upper bound pH value

**opt_temp**:
- EC number
- Protein number 
- optimum temperature value

**temp_range**
- EC number
- Protein number 
- lower bound temperature value
- upper bound temperature value

In [None]:
def cleanup_data():
    info_files = [KCAT_FILE , SA_FILE, MW_FILE, ORG_NAMES_FILE, OPT_PH_FILE, \
                  PH_RANGE_FILE, OPT_TEMP_FILE, TEMP_RANGE_FILE]
    for info_file in info_files:
        if path.exists(info_file):
            remove(info_file)
    create_parameters_files()

def create_parameters_files():
    # kcat file
    kcat_file = open(KCAT_FILE, 'w+')
    kcat_col_labels = ['ECnumber', 'Protein_number', 'Kcat', 'Substrate', 'pH'\
                       , 'Temperature']
    kcat_file.write(SEPARATOR.join(kcat_col_labels) + '\n')
    kcat_file.close()
    # specific activity file
    sa_file = open(SA_FILE, 'w+')
    sa_col_labels = ['ECnumber', 'Protein_number', 'Specific_activity', 'pH', \
                     'Temperature']
    sa_file.write(SEPARATOR.join(sa_col_labels) + '\n')
    sa_file.close()
    # molecular weight file
    mw_file = open(MW_FILE, 'w+')
    mw_col_labels = ['ECnumber', 'Protein_number', 'Molecular_weight']
    mw_file.write(SEPARATOR.join(mw_col_labels) + '\n')
    mw_file.close()
    #organisms names file
    org_names_file = open(ORG_NAMES_FILE, 'w+')
    org_col_labels = ['ECnumber', 'Protein_number', 'Organism']
    org_names_file.write(SEPARATOR.join(org_col_labels) + '\n')
    org_names_file.close()
    #optimum ph file
    opt_ph_file = open(OPT_PH_FILE, 'w+')
    opt_ph_col_labels = ['ECnumber', 'Protein_number', 'Optimum_pH']
    opt_ph_file.write(SEPARATOR.join(opt_ph_col_labels) + '\n')
    opt_ph_file.close()
    #ph ranges file 
    ph_range_file = open(PH_RANGE_FILE, 'w+')
    ph_range_col_labels = ['ECnumber', 'Protein_number', 'pH_lower_bound', \
                           'pH_upper_bound']
    ph_range_file.write(SEPARATOR.join(ph_range_col_labels) + '\n')
    ph_range_file.close()
    #optimum temperature file
    opt_temp_file = open(OPT_TEMP_FILE, 'w+')
    opt_temp_col_labels = ['ECnumber', 'Protein_number', 'Optimum_temperature']
    opt_temp_file.write(SEPARATOR.join(opt_temp_col_labels) + '\n')
    opt_temp_file.close()
    #temperature ranges file
    temp_range_file = open(TEMP_RANGE_FILE, 'w+')
    temp_range_col_labels = ['ECnumber', 'Protein_number', 'Temp_lower_bound',\
                             'Temp_upper_bound']
    temp_range_file.write(SEPARATOR.join(temp_range_col_labels) + '\n')
    temp_range_file.close()

    
    
def start_ec_info(line):
    EC_PATTERN = r'ID\t\d+\.\d+\.\d+\.\d+'
    ec_regex = re.compile(EC_PATTERN)
    ec_match = ec_regex.findall(line)
    
    if len(ec_match) > 0: 
        return True
    
def end_ec_info(line):
    TERMINATOR_PATTERN = r'\/\/\/'
    ec_regex = re.compile(TERMINATOR_PATTERN)
    ec_match = ec_regex.findall(line)
    if len(ec_match) > 0: 
        return True
    else:
        return False

def get_ec_number(line):
    EC_PATTERN = r'ID\t(\d+\.\d+\.\d+\.\d+)'
    ec_regex = re.compile(EC_PATTERN)
    ec_match = ec_regex.findall(line)
    return ec_match[0]

def get_ec_info_per_subfile(file):    
    r'''
    Read BRENDA's txt. file and generate a BRENDA subfile by EC number 
    and get all the information using get_ec_info(info, ec_number).
    Every ECnumber's information in BRENDA database is delimited by 
    a first line with the EC number (ID   x.x.x.x) and ending with 
    '///', this function recognized these patterns using regular 
    expressions usig auxiliary functions start_ec_info(line) and 
    end_ec_info(line).
    
    Input:
    ------
    file: BRENDA's .txt file
    '''
    info = ''
    while True:
        line = file.readline()
        if line == '': 
            break
        if start_ec_info(line):
            ec_number = get_ec_number(line)
            info += line
            while not end_ec_info(line):
                line = file.readline()
                info += line
            get_ec_info(info, ec_number)
            info = ''
    
def get_ec_info(ec_file, ec_number):
    r'''
    Obtain all the relevant parameters information from each subfile 
    of BRENDA using auxiliary functions previously described.
    
    Inputs:
    -------
    ec_file: a subfile from BRENDA which corresponds to a unique 
    EC number information
    '''
    get_kcat(ec_file, ec_number)
    get_sa(ec_file, ec_number)
    get_opt_ph(ec_file, ec_number)
    get_opt_temp(ec_file, ec_number)
    get_ph_range(ec_file, ec_number)
    get_temp_range(ec_file, ec_number)
    get_mw(ec_file, ec_number)
    get_org_names(ec_file, ec_number)

def get_info_from_BRENDA():
    r'''
    Obtain all the relevant parameters information from BRENDA using
    auxiliary functions previously described. It also creates a new 
    directory in order to save all de parameter information on a csv
    file format
    '''
    if path.isdir(BRENDA_UNFILTERED_DATA_DIR): 
        cleanup_data()
    else:
        mkdir(BRENDA_UNFILTERED_DATA_DIR)
        create_parameters_files()
    with open(BRENDA_INPUT_FILE, 'r') as brenda_file:
        get_ec_info_per_subfile(brenda_file)

In [None]:
get_info_from_BRENDA()