In [5]:
# Show plots as part of the notebook
%matplotlib inline

# Show images inline
from IPython.display import Image

# Standard library packages
import io
import os

# Import Biopython modules to interact with KEGG
from Bio import SeqIO
from Bio.KEGG import REST
from Bio.KEGG.KGML import KGML_parser
from Bio.Graphics.KGML_vis import KGMLCanvas

# Import Pandas, so we can use dataframes
import pandas as pd

In [2]:
!pip install reportlab


Collecting reportlab
  Obtaining dependency information for reportlab from https://files.pythonhosted.org/packages/73/12/6444906db1bc65d3a8118afb089d53c7eeca0726164f51eb3599de1d0665/reportlab-4.2.5-py3-none-any.whl.metadata
  Downloading reportlab-4.2.5-py3-none-any.whl.metadata (1.5 kB)
Collecting chardet (from reportlab)
  Obtaining dependency information for chardet from https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl.metadata
  Downloading chardet-5.2.0-py3-none-any.whl.metadata (3.4 kB)
Downloading reportlab-4.2.5-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading chardet-5.2.0-py3-none-any.whl (199 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.4/199.4 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling c

In [6]:
# Perform the query
result = REST.kegg_info("kegg").read()

# Print the result
print(result)

# Convert result to dataframe
# NOTE: kegg_info() requests do not produce a suitable data format for dataframe representation
#to_df(result)

kegg             Kyoto Encyclopedia of Genes and Genomes
kegg             Release 112.0+/11-12, Nov 24
                 Kanehisa Laboratories
                 pathway   1,266,523 entries
                 brite       405,791 entries
                 module          571 entries
                 orthology    27,241 entries
                 genome       25,135 entries
                 genes     56,809,837 entries
                 compound     19,409 entries
                 glycan       11,232 entries
                 reaction     12,157 entries
                 rclass        3,202 entries
                 enzyme        8,201 entries
                 network       1,580 entries
                 variant       1,521 entries
                 disease       2,826 entries
                 drug         12,531 entries
                 dgroup        2,487 entries


In [7]:
import requests
import re

def retrieve_genes_for_pathway(pathway: str) -> None:
    resp = requests.get("https://rest.kegg.jp/link/genes/" + pathway)
    split_text = re.split(r'[\t\n]', resp.text) #why is my list of odd length?
    genes = []
    for idx in range(1, len(split_text), 2):
        genes.append(split_text[idx])
    return genes
    



In [8]:
retrieve_genes_for_pathway("K00500")

['hsa:5053',
 'ptr:741097',
 'pps:100982303',
 'ggo:101125405',
 'pon:100171844',
 'ppyg:129010939',
 'nle:100596095',
 'hmh:116478033',
 'ssyn:129460063',
 'mcc:698696',
 'mcf:102130988',
 'mthb:126931769',
 'mni:105493809',
 'csab:103238962',
 'caty:105573301',
 'panu:101011469',
 'tge:112635151',
 'mleu:105530904',
 'rro:104657462',
 'rbb:108543127',
 'tfn:117089527',
 'pteh:111524073',
 'cang:105506785',
 'cjc:100395351',
 'sbq:101037263',
 'cimi:108312344',
 'cimi:108294863',
 'anan:105728704',
 'csyr:103264538',
 'mmur:105875570',
 'lcat:123639372',
 'pcoq:105815128',
 'oga:100950776',
 'mmu:18478',
 'mcal:110303220',
 'mpah:110326639',
 'rno:24616',
 'mcoc:116076267',
 'anu:117696539',
 'mun:110566216',
 'cge:100774376',
 'maua:101822550',
 'prob:127237829',
 'pleu:114700618',
 'morg:121460508',
 'mfot:126503746',
 'aamp:119803727',
 'ngi:103733720',
 'hgl:101705870',
 'cpoc:100719535',
 'ccan:109697465',
 'dord:105989305',
 'dsp:122119124',
 'plop:125359362',
 'ncar:124982228',