### KEGG

In [1]:
import requests
import pandas as pd

url = "https://rest.kegg.jp/link/pathway/cpd"
response = requests.get(url)

if response.status_code == 200:
    data = response.text.strip().split('\n')
    rows = [line.split('\t') for line in data]
    df = pd.DataFrame(rows, columns=["compound", "pathway"])
    
    df.to_csv("compound_pathway_links.csv", index=False)
    print("Saved 'compound_pathway_links.csv'")
    print(df.head())
else:
    print(f"Failed: {response.status_code}")

Saved 'compound_pathway_links.csv'
     compound        pathway
0  cpd:C00022  path:map00010
1  cpd:C00024  path:map00010
2  cpd:C00031  path:map00010
3  cpd:C00033  path:map00010
4  cpd:C00036  path:map00010


In [1]:
# Download Link: API
# Download Date: 2025-03-21
# Download Version: 2025-03-21
import pandas as pd

comp_pathway = pd.read_csv('compound_pathway_links.csv')

comp_pathway['clean_compound'] = comp_pathway['compound'].str.extract(r'(C\d{5})')
comp_pathway['clean_pathway'] = comp_pathway['pathway'].str.extract(r'(\d{5})')
comp_pathway

Unnamed: 0,compound,pathway,clean_compound,clean_pathway
0,cpd:C00022,path:map00010,C00022,00010
1,cpd:C00024,path:map00010,C00024,00010
2,cpd:C00031,path:map00010,C00031,00010
3,cpd:C00033,path:map00010,C00033,00010
4,cpd:C00036,path:map00010,C00036,00010
...,...,...,...,...
19143,cpd:C13776,path:map07232,C13776,07232
19144,cpd:C13777,path:map07232,C13777,07232
19145,cpd:C13820,path:map07232,C13820,07232
19146,cpd:C07575,path:map07235,C07575,07235


### BioMedGraphica ID

In [2]:
import pandas as pd
import os
from pathlib import Path

current_working_dir = Path(os.getcwd()).resolve()
grandparent_dir = current_working_dir.parent.parent.parent
target_dir_metabolite = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Metabolite' / 'BioMedGraphica_Metabolite.csv'
target_dir_pathway = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Pathway' / 'BioMedGraphica_Pathway.csv'
biomedgraphica_metabolite = pd.read_csv(target_dir_metabolite, dtype=str)
biomedgraphica_pathway = pd.read_csv(target_dir_pathway, dtype=str)

### KEGG Mapping

In [3]:
biomedgraphica_pathway['clean_pathway'] = biomedgraphica_pathway['KEGG_ID'].str.extract(r'(\d{5})')
pathway = pd.merge(comp_pathway, biomedgraphica_pathway,on='clean_pathway',how='left')
pathway = pathway[['clean_compound', 'clean_pathway','BioMedGraphica_ID']]
pathway = pathway.rename(columns={'BioMedGraphica_ID': 'To_ID'})
pathway

Unnamed: 0,clean_compound,clean_pathway,To_ID
0,C00022,00010,BMG_PW0025
1,C00022,00010,BMG_PW0637
2,C00022,00010,BMG_PW0638
3,C00024,00010,BMG_PW0025
4,C00024,00010,BMG_PW0637
...,...,...,...
19617,C13776,07232,
19618,C13777,07232,
19619,C13820,07232,
19620,C07575,07235,


In [4]:
kegg_biomedgraphica = biomedgraphica_metabolite[['BioMedGraphica_ID', 'KEGG_ID']]
kegg_biomedgraphica.dropna(subset=['KEGG_ID'], inplace=True)
kegg_biomedgraphica = kegg_biomedgraphica.assign(KEGG_ID=kegg_biomedgraphica['KEGG_ID'].astype(str).str.split(';')).explode('KEGG_ID')
kegg_biomedgraphica['KEGG_ID'] = kegg_biomedgraphica['KEGG_ID'].astype(str).str.split('.').str[0]

kegg_to_individualID = kegg_biomedgraphica.groupby('KEGG_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()
pathway['From_ID'] = pathway['clean_compound'].map(kegg_to_individualID)
pathway

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kegg_biomedgraphica.dropna(subset=['KEGG_ID'], inplace=True)


Unnamed: 0,clean_compound,clean_pathway,To_ID,From_ID
0,C00022,00010,BMG_PW0025,BMG_MT000163
1,C00022,00010,BMG_PW0637,BMG_MT000163
2,C00022,00010,BMG_PW0638,BMG_MT000163
3,C00024,00010,BMG_PW0025,BMG_MT000929
4,C00024,00010,BMG_PW0637,BMG_MT000929
...,...,...,...,...
19617,C13776,07232,,BMG_MT166118
19618,C13777,07232,,
19619,C13820,07232,,
19620,C07575,07235,,BMG_MT170137


In [5]:
metabolite_pathway = pathway[['From_ID','To_ID']]
metabolite_pathway = metabolite_pathway.dropna(subset=['From_ID'])
metabolite_pathway = metabolite_pathway.dropna(subset=['To_ID'])
metabolite_pathway

Unnamed: 0,From_ID,To_ID
0,BMG_MT000163,BMG_PW0025
1,BMG_MT000163,BMG_PW0637
2,BMG_MT000163,BMG_PW0638
3,BMG_MT000929,BMG_PW0025
4,BMG_MT000929,BMG_PW0637
...,...,...
19487,BMG_MT001847,BMG_PW6793
19488,BMG_MT000448,BMG_PW6793
19489,BMG_MT001442,BMG_PW6793
19490,BMG_MT000539,BMG_PW6793


In [6]:
metabolite_pathway['From_ID'] = metabolite_pathway['From_ID'].str.split(';')
metabolite_pathway['To_ID'] = metabolite_pathway['To_ID'].str.split(';')
metabolite_pathway = metabolite_pathway.explode('From_ID')
metabolite_pathway = metabolite_pathway.explode('To_ID')

metabolite_pathway['To_ID'] = metabolite_pathway['To_ID'].str.strip()
metabolite_pathway['From_ID'] = metabolite_pathway['From_ID'].str.strip()
metabolite_pathway.drop_duplicates(inplace=True)
metabolite_pathway

Unnamed: 0,From_ID,To_ID
0,BMG_MT000163,BMG_PW0025
1,BMG_MT000163,BMG_PW0637
2,BMG_MT000163,BMG_PW0638
3,BMG_MT000929,BMG_PW0025
4,BMG_MT000929,BMG_PW0637
...,...,...
19487,BMG_MT001847,BMG_PW6793
19488,BMG_MT000448,BMG_PW6793
19489,BMG_MT001442,BMG_PW6793
19490,BMG_MT000539,BMG_PW6793


### Metabolite-Pathway Relation

In [7]:
metabolite_pathway['Source'] = 'KEGG'
metabolite_pathway['Type'] = 'Metabolite-Pathway'

max_length = len(str(len(metabolite_pathway)))
metabolite_pathway['BioMedGraphica_ID'] = ['BMG_ED_MTPW' + str(i).zfill(max_length) for i in range(1, len(metabolite_pathway) + 1)]
columns = ['BioMedGraphica_ID'] + [col for col in metabolite_pathway.columns if col != 'BioMedGraphica_ID']  # re-order columns
metabolite_pathway = metabolite_pathway[columns]
metabolite_pathway

Unnamed: 0,BioMedGraphica_ID,From_ID,To_ID,Source,Type
0,BMG_ED_MTPW00001,BMG_MT000163,BMG_PW0025,KEGG,Metabolite-Pathway
1,BMG_ED_MTPW00002,BMG_MT000163,BMG_PW0637,KEGG,Metabolite-Pathway
2,BMG_ED_MTPW00003,BMG_MT000163,BMG_PW0638,KEGG,Metabolite-Pathway
3,BMG_ED_MTPW00004,BMG_MT000929,BMG_PW0025,KEGG,Metabolite-Pathway
4,BMG_ED_MTPW00005,BMG_MT000929,BMG_PW0637,KEGG,Metabolite-Pathway
...,...,...,...,...,...
19487,BMG_ED_MTPW12194,BMG_MT001847,BMG_PW6793,KEGG,Metabolite-Pathway
19488,BMG_ED_MTPW12195,BMG_MT000448,BMG_PW6793,KEGG,Metabolite-Pathway
19489,BMG_ED_MTPW12196,BMG_MT001442,BMG_PW6793,KEGG,Metabolite-Pathway
19490,BMG_ED_MTPW12197,BMG_MT000539,BMG_PW6793,KEGG,Metabolite-Pathway


In [8]:
import os
from pathlib import Path

# get the current working directory
current_working_dir = Path(os.getcwd()).resolve()

# get the output directory
grandparent_dir = current_working_dir.parent.parent.parent

target_folder = grandparent_dir / 'BioMedGraphica' / 'Relation' / 'Metabolite-Pathway'
if not target_folder.exists():
    target_folder.mkdir(parents=True)
    print(f"Folder {target_folder} has been created.")

output_file_path = target_folder / 'BioMedGraphica_Metabolite_Pathway.csv'
metabolite_pathway.to_csv(output_file_path, index=False)
print(f"Data has been saved to {output_file_path}")

Data has been saved to D:\RA\BMG\BioMedGraphica\Relation\Metabolite-Pathway\BioMedGraphica_Metabolite_Pathway.csv
