## Versions

Python: 3.7.2

pandas: 1.3.5

Pip: 23.0.1

Biopython: 1.81

keggtools: 1.0.1

# Retrieve EC codes from Mgx data

Install Pandas package from pip:

> python -m pip install pandas

In [2]:
import pandas as pd

Mgx = pd.read_table('../analysis/data/mgxData')

# split the column by ":" and put EC codes in Mgx_EC
Mgx_EC = Mgx["Gene.Family"].str.split(":", expand=True)[0]

# KEGG pathways from EC codes

Install Biopython package from pip:

> python -m pip install Biopython

In [None]:
from Bio.KEGG import REST
import json

pathways_dict = {}
for ec_number in Mgx_EC:
    if not ec_number.islower(): # against Bad request error from preliminary EC codes (e.g. "1.3.1.n3" -> Mgx_EC[430] )
        pathways = REST.kegg_link("pathway", ec_number).read().strip().split("\n")
        if not pathways == [""]: # against no responses to request
            for pathway in pathways:
                pathway_id, pathway_name = pathway.split("\tpath:")
                pathway_id = pathway_id.split(":")[1]
                if pathway_id not in pathways_dict:
                    pathways_dict[pathway_id] = []
                pathways_dict[pathway_id].append(pathway_name)

with open("data/pathways_dict.json", "w") as file:
    json.dump(pathways_dict, file)  # encode dict into JSON

## Loading the dictionnary of pathways

In [3]:
import json

# Open the file for reading
with open("data/pathways_dict.json", "r") as file:
    # Load the dictionary from the file
    pathways_dict = json.load(file)

# Enrichment and Testing



Download all ec pathways from KEGG

In [12]:
import requests

# Define the URL for the KEGG REST API
url = 'http://rest.kegg.jp/list/pathway/ec'

# Send a GET request to the URL to retrieve the pathway IDs
response = requests.get(url)

# Parse the response to get the pathway IDs
all_pathway_ids = [line.split('\t')[0] for line in response.text.strip().split('\n')]

Attribute the related enzymes for all the pathways

In [None]:
import re
import json

ec_pathways = {}

for pathway_id in all_pathway_ids:
    pathway_url = f'http://rest.kegg.jp/get/{pathway_id}'
    pathway_response = requests.get(pathway_url)
    pathway_text = pathway_response.text.strip()
    
    # find the lines containing enzyme information
    pattern = r"ENZYME\s+(.*?)\s+COMPOUND"
    enzyme_lines = re.search(pattern, pathway_text, re.DOTALL)
    if enzyme_lines:
        enzymes = [s.lstrip() for s in enzyme_lines.group(1).strip().split('\n')]
        ec_pathways[pathway_id] = enzymes
    
    # Write the pathway data to a file
    #with open(f'{pathway_id}.txt', 'w') as f:
    #    f.write(pathway_text)

with open("data/all_pathways_enzymes_dict.json", "w") as file:
    json.dump(ec_pathways, file)  # encode dict into JSON

In [42]:
import json

# Open the file for reading
with open("data/all_pathways_enzymes_dict.json", "r") as file:
    # Load the dictionary from the file
    all_pathways_enzymes_dict = json.load(file)