### retrieveTurnoverNumber

This script is designed to fetch turnover numbers and molecular weights for specified enzymes 
from the BRENDA database using their EC (Enzyme Commission) numbers. It utilizes the BRENDA 
SOAP web service to retrieve this biochemical data. The script requires the following:
- A client configured for accessing the BRENDA SOAP API.
- An input Excel file named "EC_Number_to_Reactions.xlsx" that contains the EC numbers.

#### Key Features:
- Fetch kcat (turnover number) and MW (molecular weight) for each enzyme listed in the Excel file.
- Outputs the results to a new sheet within the same Excel file, facilitating easy comparison and analysis.

#### Requirements:
- pandas library for DataFrame operations.
- zeep library for SOAP web service interaction.
- An account with BRENDA for necessary credentials.

#### Usage:
1. Ensure that the "EC_Number_to_Reactions.xlsx" file is present in the specified directory and contains a column 'EC Number'. (please check the code below this box)
2. Configure the client with appropriate credentials (parameters).
3. Run the script to append the fetched data into the Excel file under a new sheet named 'BRENDA Results+name_of_organism '.

Note: This script assumes that the user has already set up and authenticated a client for the BRENDA SOAP API.
"""

In [None]:
import pandas as pd

# Read the excel file (model)
file_path = r".../Chinese Hamster Network Reconstruction.xlsx"
sheet_name = "Attributes"

df = pd.read_excel(file_path, sheet_name=sheet_name)

ec_to_reaction = {}

# List up the EC numbers and corresponding reactions in the model
for index, row in df.iterrows():
    ec_numbers = str(row['EC Number']).split(", ")
    for ec_number in ec_numbers:
        if ec_number not in ec_to_reaction:
            ec_to_reaction[ec_number] = [row['Reaction']]
        else:
            if row['Reaction'] not in ec_to_reaction[ec_number]:
                ec_to_reaction[ec_number].append(row['Reaction'])

# Convert into dataframe
ec_list = []
reactions_list = []
for ec, reactions in ec_to_reaction.items():
    ec_list.append(ec)
    reactions_list.append(", ".join(reactions))

result_df = pd.DataFrame({
    'EC Number': ec_list,
    'Reactions': reactions_list
})

# Save the list of EC numbers in model as new excel sheet
output_file_path = r"C:\Users\user\Documents\DC\Manual curation_iCHO\EC_Number_to_Reactions.xlsx"
result_df.to_excel(output_file_path, index=False)

print("Finish! output path is:", output_file_path)


In [1]:
#!/usr/bin/python
from zeep import Client
import hashlib
import pandas as pd
from openpyxl import load_workbook



###for parameters
email = 'BRENDA ID'
password = "your password"
organism_list = ['Homo sapiens', 'Cricetulus griseus', 'Mus musculus']
# (example) Choose human information for fetching the kcat and MW
organism = organism_list[0]
# Initialize the global dataframe 'results_df'
results_df = pd.DataFrame({"EC Number":[], f"kcat Values_{organism}":[], "MW Values":[]})
                                            
wsdl = "https://www.brenda-enzymes.org/soap/brenda_zeep.wsdl"
password = hashlib.sha256(f"{password}".encode("utf-8")).hexdigest()
client = Client(wsdl)

In [2]:
def get_max_turnover(kcat_results):
    data_list = []
    for item in kcat_results:  
        data_dict = {
            'substrate': item.substrate,
            'organism': item.organism,
            'turnoverNumber': item.turnoverNumber,
            'ecNumber': item.ecNumber,
        }
        data_list.append(data_dict)
    
    # Create DataFrame from the list of dictionaries
    df_tmp = pd.DataFrame(data_list)
    
    # Convert 'turnoverNumber' to numeric, coercing errors to NaN
    df_tmp['turnoverNumber'] = pd.to_numeric(df_tmp['turnoverNumber'], errors='coerce')
    
    # Filter to include only positive 'turnoverNumber' values
    positive_df = df_tmp[df_tmp['turnoverNumber'] > 0]
    
    # Find the maximum value among the positive 'turnoverNumber' values
    max_value = positive_df['turnoverNumber'].max()
    
    return max_value


In [3]:
def get_MW(mw_results):
    data_list_mw = []
    for item in mw_results:  
        data_dict_mw = {
            'molecularWeight': item.molecularWeight,
            'organism': item.organism,
            'ecNumber': item.ecNumber,
        }
        data_list_mw.append(data_dict_mw)
    
    df_tmp = pd.DataFrame(data_list_mw)
    

    df_tmp['molecularWeight'] = pd.to_numeric(df_tmp['molecularWeight'], errors='coerce')
        
    # Filter to include only positive 'turnoverNumber' values
    positive_df = df_tmp[df_tmp['molecularWeight'] > 0]
    MW_value = positive_df['molecularWeight'].min()
    
    return MW_value


In [9]:

def get_kcat_and_mw(client, ec_number, organism):
    global results_df  
    # Parameters for API calls and data processing
    parameters = (f"{email}",password,f"ecNumber*{ec_number}","turnoverNumber*", "turnoverNumberMaximum*", "substrate*", "commentary*",f"organism*{organism}", "ligandStructureId*", "literature*")
    parameters_mw = (f"{email}",password,f"ecNumber*{ec_number}", "molecularWeight*", "molecularWeightMaximum*", "commentary*", f"organism*{organism}", "literature*")
    
    try:
        kcat_results = client.service.getTurnoverNumber(*parameters)
        mw_results = client.service.getMolecularWeight(*parameters_mw)
        max_value = get_max_turnover(kcat_results)
        MW_value = get_MW(mw_results)
        
        print(f"EC Number: {ec_number}")
        print(f"kcat Values_max: {max_value}")
        print(f"MW Values: {MW_value}")
        print("------")
        
        # Add new row to the DataFrame
        new_row = pd.DataFrame({"EC Number": [ec_number], f"kcat Values_{organism}": [max_value], "MW Values": [MW_value]})
        results_df = pd.concat([results_df, new_row], ignore_index=True)

    except Exception as e:
        print(f"Error retrieving data for EC Number {ec_number}: {e}")

# Read Excel file and save results
input_file_path = r"C:\Users\user\Documents\DC\Manual curation_iCHO\EC_Number_to_Reactions.xlsx"
df = pd.read_excel(input_file_path)

# Run the function for each EC Number
for ec_number in df['EC Number']:
    get_kcat_and_mw(client, ec_number, organism)  

# Save the results to a new sheet in the Excel file
with pd.ExcelWriter(input_file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    results_df.to_excel(writer, sheet_name=f"BRENDA Results_{organism}", index=False)

print("The BRENDA search results have been saved to a new sheet in the Excel file.")

EC Number: 2.6.1.44
kcat Values_max: 45.0
MW Values: 42700
------
EC Number: 4.3.2.1
kcat Values_max: 34.5
MW Values: 49000
------
Error retrieving data for EC Number 6.3.4.5: 'turnoverNumber'
EC Number: 3.5.1.1
kcat Values_max: 3.19
MW Values: 12000
------
Error retrieving data for EC Number 3.5.1.38: 'turnoverNumber'
Error retrieving data for EC Number 3.5.5.4: 'turnoverNumber'
EC Number: 6.3.5.4
kcat Values_max: 1.8
MW Values: 64000
------
EC Number: 2.3.1.17
kcat Values_max: 0.035
MW Values: 33900
------
Error retrieving data for EC Number 1.4.1.21: 'turnoverNumber'
Error retrieving data for EC Number 1.4.3.16: 'turnoverNumber'
Error retrieving data for EC Number 1.4.3.2: 'turnoverNumber'
Error retrieving data for EC Number 2.6.1.1: 'turnoverNumber'
EC Number: 1.4.3.1
kcat Values_max: 81.3
MW Values: 37000
------
Error retrieving data for EC Number 1.4.3.15: 'turnoverNumber'
Error retrieving data for EC Number nan: 'turnoverNumber'
Error retrieving data for EC Number 3.5.1.14: 'mol

In [None]:
for ec_number in df['EC Number']:
    get_kcat_and_mw(client, ec_number, organism_list[1])  # 
    
# Save the results to a new sheet in the Excel file
with pd.ExcelWriter(input_file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    results_df.to_excel(writer, sheet_name="BRENDA Results_CHO, index=False)

In [None]:
for ec_number in df['EC Number']:
    get_kcat_and_mw(client, ec_number, organism_list[2])  # 
    
# Save the results to a new sheet in the Excel file
with pd.ExcelWriter(input_file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    results_df.to_excel(writer, sheet_name="BRENDA Results_mouse", index=False)

Error retrieving data for EC Number 2.6.1.44: 'turnoverNumber'
Error retrieving data for EC Number 4.3.2.1: 'turnoverNumber'
Error retrieving data for EC Number 6.3.4.5: 'turnoverNumber'
Error retrieving data for EC Number 3.5.1.1: 'turnoverNumber'
Error retrieving data for EC Number 3.5.1.38: 'turnoverNumber'
Error retrieving data for EC Number 3.5.5.4: 'turnoverNumber'
Error retrieving data for EC Number 6.3.5.4: 'turnoverNumber'
Error retrieving data for EC Number 2.3.1.17: 'turnoverNumber'
Error retrieving data for EC Number 1.4.1.21: 'turnoverNumber'
Error retrieving data for EC Number 1.4.3.16: 'turnoverNumber'
Error retrieving data for EC Number 1.4.3.2: 'turnoverNumber'
Error retrieving data for EC Number 2.6.1.1: 'turnoverNumber'
EC Number: 1.4.3.1
kcat Values_max: 112.1
MW Values: 37620
------
Error retrieving data for EC Number 1.4.3.15: 'turnoverNumber'
Error retrieving data for EC Number nan: 'turnoverNumber'
EC Number: 3.5.1.14
kcat Values_max: 0.6
MW Values: 140000
-----

In [7]:
 organism_list[1]

'Cricetulus griseus'