In [1]:
# Show plots as part of the notebook
%matplotlib inline

# Show images inline
from IPython.display import Image

# Standard library packages
import io
import os

# Import Biopython modules to interact with KEGG
from Bio import SeqIO
from Bio.KEGG import REST
from Bio.KEGG.KGML import KGML_parser
from Bio.Graphics.KGML_vis import KGMLCanvas

# Import Pandas, so we can use dataframes
import pandas as pd

In [2]:
import requests
import re

def retrieve_genes_for_pathway(pathway: str) -> None:
    '''use the link tool to get all genes of a certain pathway'''
    resp = requests.get("https://rest.kegg.jp/link/hsa/" + pathway)
    split_text = re.split(r'[\t\n]', resp.text) #why is my list of odd length?
    genes = []
    for idx in range(1, len(split_text), 2):
        genes.append(split_text[idx])
    return genes
    



In [3]:
gene_ids = retrieve_genes_for_pathway("hsa00010")

In [5]:
def to_df(result):
    return pd.read_table(io.StringIO(result), header=None)

gene_names = []
for id in gene_ids:
    result = REST.kegg_list(id).read()
    gene_names.extend(result.split(';')[0].split(','))

gene_names

['hsa:10327\tAKR1A1',
 ' ALDR1',
 ' ALR',
 ' ARM',
 ' DD3',
 ' HEL-S-6',
 'hsa:124\tADH1A',
 ' ADH1',
 'hsa:125\tADH1B',
 ' ADH2',
 ' HEL-S-117',
 'hsa:126\tADH1C',
 ' ADH3',
 'hsa:127\tADH4',
 ' ADH-2',
 ' HEL-S-4',
 'hsa:128\tADH5',
 ' ADH-3',
 ' ADHX',
 ' AMEDS',
 ' BMFS7',
 ' FALDH',
 ' FDH',
 ' GSH-FDH',
 ' GSNOR',
 ' HEL-S-60p',
 'hsa:130\tADH6',
 ' ADH-5',
 'hsa:130589\tGALM',
 ' BLOCK25',
 ' GALAC4',
 ' GLAT',
 ' HEL-S-63p',
 ' IBD1',
 'hsa:131\tADH7',
 ' ADH4',
 'hsa:160287\tLDHAL6A',
 ' LDH6A',
 'hsa:1737\tDLAT',
 ' DLTA',
 ' E2',
 ' PBC',
 ' PDC-E2',
 ' PDCE2',
 'hsa:1738\tDLD',
 ' DLDD',
 ' DLDH',
 ' E3',
 ' GCSL',
 ' LAD',
 ' OGDC-E3',
 ' PHE3',
 'hsa:2023\tENO1',
 ' ENO1-IT1',
 ' ENO1L1',
 ' HEL-S-17',
 ' MPB1',
 ' NNE',
 ' PPH',
 'hsa:2026\tENO2',
 ' HEL-S-279',
 ' NSE',
 'hsa:2027\tENO3',
 ' GSD13',
 ' MSE',
 'hsa:217\tALDH2',
 ' ALDH-E2',
 ' ALDHI',
 ' ALDM',
 'hsa:218\tALDH3A1',
 ' ALDH3',
 ' ALDHIII',
 'hsa:219\tALDH1B1',
 ' ALDH5',
 ' ALDHX',
 'hsa:2203\tFBP1',
 ' F

In [37]:

result = REST.kegg_get("hsa:10327").read()
print(result)


ENTRY       10327             CDS       T01001
SYMBOL      AKR1A1, ALDR1, ALR, ARM, DD3, HEL-S-6
NAME        (RefSeq) aldo-keto reductase family 1 member A1
ORTHOLOGY   K00002  alcohol dehydrogenase (NADP+) [EC:1.1.1.2]
ORGANISM    hsa  Homo sapiens (human)
PATHWAY     hsa00010  Glycolysis / Gluconeogenesis
            hsa00040  Pentose and glucuronate interconversions
            hsa00053  Ascorbate and aldarate metabolism
            hsa00561  Glycerolipid metabolism
            hsa00620  Pyruvate metabolism
            hsa01100  Metabolic pathways
            hsa01240  Biosynthesis of cofactors
            hsa05208  Chemical carcinogenesis - reactive oxygen species
MODULE      hsa_M00014  Glucuronate pathway (uronate pathway)
NETWORK     nt06226  KEAP1-NRF2 signaling (cancer)
            nt06251  CYP-mediated ROS formation (cancer)
  ELEMENT   N01401  Benzo[a]pyrenre to CYP-mediated metabolism
            N01413  Metals to KEAP1-NRF2 signalig pathway
BRITE       KEGG Orthology (KO) 