### MetaNetX

In [2]:
# Download Link: https://www.metanetx.org/ftp/latest/chem_isom.tsv
# Download Date: 2025-03-21
# Download Version: 2022-03-18

import pandas as pd
import numpy as np

metanetx = pd.read_csv('chem_isom.tsv', sep='\t', comment='#', names=['parent','child','description'])
metanetx

Unnamed: 0,parent,child,description
0,MNXM100051,MNXM100344,"cis-9,19-epoxy-3Z,6Z-heptadecadiene -> 9S,10R-..."
1,MNXM100051,MNXM99630,"cis-9,19-epoxy-3Z,6Z-heptadecadiene -> 9R,10S-..."
2,MNXM10011,MNXM31563,3-hydroxy-3-phenylpropionic acid -> (R)-3-hydr...
3,MNXM10011,MNXM31736,3-hydroxy-3-phenylpropionic acid -> (S)-3-hydr...
4,MNXM10026,MNXM1104681,3-hydroxydecanoyl-CoA -> (3S)-hydroxydecanoyl-CoA
...,...,...,...
11718,MNXM99362,MNXM99741,1-methyl-2-ethylcyclohexane -> trans-1-ethyl-2...
11719,MNXM9987,MNXM162802,3-aminoisobutyric acid -> (R)-3-amino-2-methyl...
11720,MNXM9987,MNXM732376,3-aminoisobutyric acid -> (S)-3-amino-2-methyl...
11721,MNXM99960,MNXM101734,"2-Ethyl-1,6-dioxaspiro[4.4]nonane -> 2S,5R-cha..."


In [3]:
# Download Link: https://www.metanetx.org/ftp/latest/chem_xref.tsv
# Download Date: 2025-03-21
# Download Version: 2022-03-18

xref = pd.read_csv('chem_xref.tsv', sep='\t', comment='#', names=['source','ID','description'])
xref[['db', 'db_id']] = xref['source'].str.split(':', n=1, expand=True)
xref

Unnamed: 0,source,ID,description,db,db_id
0,BIOMASS,BIOMASS,BIOMASS,BIOMASS,
1,mnx:BIOMASS,BIOMASS,BIOMASS,mnx,BIOMASS
2,seed.compound:cpd11416,BIOMASS,Biomass,seed.compound,cpd11416
3,seedM:M_cpd11416,BIOMASS,secondary/obsolete/fantasy identifier,seedM,M_cpd11416
4,seedM:cpd11416,BIOMASS,Biomass,seedM,cpd11416
...,...,...,...,...,...
2996505,sabiork.compound:40,WATER,H2O||Water,sabiork.compound,40
2996506,sabiorkM:40,WATER,H2O||Water,sabiorkM,40
2996507,seed.compound:cpd00001,WATER,H2O||H20||H3O+||HO-||Hydroxide ion||OH||OH-||W...,seed.compound,cpd00001
2996508,seedM:M_cpd00001,WATER,secondary/obsolete/fantasy identifier,seedM,M_cpd00001


Filter HMDB

In [4]:
filtered_xref = xref[xref['db'].isin(['hmdb', 'chebi', 'CHEBI'])]
hmdb = filtered_xref[filtered_xref['db'] == 'hmdb']
hmdb = hmdb[['ID', 'db_id']].drop_duplicates()
hmdb = hmdb[hmdb['db_id'].str.match(r'^HMDB\d{7}$', na=False)]
hmdb_unique = hmdb.groupby('ID').agg({
    'db_id': lambda x: ';'.join(x.unique())
}).reset_index()

metanetx = metanetx.merge(hmdb_unique, left_on='parent', right_on='ID', how='left')
metanetx.drop(columns=['ID'], inplace=True)
metanetx.rename(columns={'db_id': 'parent_hmdb'}, inplace=True)

metanetx = metanetx.merge(hmdb_unique, left_on='child', right_on='ID', how='left')
metanetx.drop(columns=['ID'], inplace=True)
metanetx.rename(columns={'db_id': 'child_hmdb'}, inplace=True)

metanetx

Unnamed: 0,parent,child,description,parent_hmdb,child_hmdb
0,MNXM100051,MNXM100344,"cis-9,19-epoxy-3Z,6Z-heptadecadiene -> 9S,10R-...",,
1,MNXM100051,MNXM99630,"cis-9,19-epoxy-3Z,6Z-heptadecadiene -> 9R,10S-...",,
2,MNXM10011,MNXM31563,3-hydroxy-3-phenylpropionic acid -> (R)-3-hydr...,HMDB0124925,
3,MNXM10011,MNXM31736,3-hydroxy-3-phenylpropionic acid -> (S)-3-hydr...,HMDB0124925,
4,MNXM10026,MNXM1104681,3-hydroxydecanoyl-CoA -> (3S)-hydroxydecanoyl-CoA,,
...,...,...,...,...,...
11718,MNXM99362,MNXM99741,1-methyl-2-ethylcyclohexane -> trans-1-ethyl-2...,,
11719,MNXM9987,MNXM162802,3-aminoisobutyric acid -> (R)-3-amino-2-methyl...,HMDB0000485;HMDB0003911,HMDB0002299
11720,MNXM9987,MNXM732376,3-aminoisobutyric acid -> (S)-3-amino-2-methyl...,HMDB0000485;HMDB0003911,HMDB0002166
11721,MNXM99960,MNXM101734,"2-Ethyl-1,6-dioxaspiro[4.4]nonane -> 2S,5R-cha...",,


Filter ChEBI

In [5]:
chebi = filtered_xref[filtered_xref['db'].isin(['chebi', 'CHEBI'])]
chebi = chebi[['ID', 'db_id']].drop_duplicates()

metanetx = metanetx.merge(chebi, left_on='parent', right_on='ID', how='left')
metanetx.drop(columns=['ID'], inplace=True)
metanetx.rename(columns={'db_id': 'parent_chebi'}, inplace=True)

metanetx = metanetx.merge(chebi, left_on='child', right_on='ID', how='left')
metanetx.drop(columns=['ID'], inplace=True)
metanetx.rename(columns={'db_id': 'child_chebi'}, inplace=True)

metanetx

Unnamed: 0,parent,child,description,parent_hmdb,child_hmdb,parent_chebi,child_chebi
0,MNXM100051,MNXM100344,"cis-9,19-epoxy-3Z,6Z-heptadecadiene -> 9S,10R-...",,,,
1,MNXM100051,MNXM99630,"cis-9,19-epoxy-3Z,6Z-heptadecadiene -> 9R,10S-...",,,,
2,MNXM10011,MNXM31563,3-hydroxy-3-phenylpropionic acid -> (R)-3-hydr...,HMDB0124925,,19929,51059
3,MNXM10011,MNXM31563,3-hydroxy-3-phenylpropionic acid -> (R)-3-hydr...,HMDB0124925,,63469,51059
4,MNXM10011,MNXM31736,3-hydroxy-3-phenylpropionic acid -> (S)-3-hydr...,HMDB0124925,,19929,51058
...,...,...,...,...,...,...,...
23706,MNXM9987,MNXM732376,3-aminoisobutyric acid -> (S)-3-amino-2-methyl...,HMDB0000485;HMDB0003911,HMDB0002166,359,390
23707,MNXM9987,MNXM732376,3-aminoisobutyric acid -> (S)-3-amino-2-methyl...,HMDB0000485;HMDB0003911,HMDB0002166,359,41058
23708,MNXM9987,MNXM732376,3-aminoisobutyric acid -> (S)-3-amino-2-methyl...,HMDB0000485;HMDB0003911,HMDB0002166,359,6167
23709,MNXM99960,MNXM101734,"2-Ethyl-1,6-dioxaspiro[4.4]nonane -> 2S,5R-cha...",,,,


### BioMedgraphica ID

In [6]:
import pandas as pd
import os
from pathlib import Path

current_working_dir = Path(os.getcwd()).resolve()
grandparent_dir = current_working_dir.parent.parent.parent
target_dir_metabolite = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Metabolite' / 'BioMedGraphica_Metabolite.csv'
biomedgraphica_metabolite = pd.read_csv(target_dir_metabolite, dtype=str)

### MetaNetX Mapping

HMDB ID

In [7]:
hmdb_biomedgraphica = biomedgraphica_metabolite[['BioMedGraphica_ID', 'HMDB_ID']]
hmdb_biomedgraphica.dropna(subset=['HMDB_ID'], inplace=True)
hmdb_biomedgraphica = hmdb_biomedgraphica.assign(HMDB_ID=hmdb_biomedgraphica['HMDB_ID'].str.split(';')).explode('HMDB_ID')

hmdb_to_individualID = hmdb_biomedgraphica.groupby('HMDB_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

metanetx['From_HMDB'] = metanetx['parent_hmdb'].map(hmdb_to_individualID)
metanetx['To_HMDB'] = metanetx['child_hmdb'].map(hmdb_to_individualID)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hmdb_biomedgraphica.dropna(subset=['HMDB_ID'], inplace=True)


ChEBI ID

In [8]:
chebi_biomedgraphica = biomedgraphica_metabolite[['BioMedGraphica_ID', 'ChEBI_ID']]
chebi_biomedgraphica.dropna(subset=['ChEBI_ID'], inplace=True)
chebi_biomedgraphica = chebi_biomedgraphica.assign(ChEBI_ID=chebi_biomedgraphica['ChEBI_ID'].astype(str).str.split(';')).explode('ChEBI_ID')
chebi_biomedgraphica['ChEBI_ID'] = chebi_biomedgraphica['ChEBI_ID'].astype(str).str.split('.').str[0]

chebi_to_individualID = chebi_biomedgraphica.groupby('ChEBI_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

metanetx['From_CHEBI'] = metanetx['parent_chebi'].astype(str).map(chebi_to_individualID)
metanetx['To_CHEBI'] = metanetx['child_chebi'].astype(str).map(chebi_to_individualID)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  chebi_biomedgraphica.dropna(subset=['ChEBI_ID'], inplace=True)


In [9]:
def merge_string_columns(df, columns, merge_name, separator=';'):
    def merge_strings(row):
        combined = set()
        for column in columns:
            if pd.notnull(row[column]):
                combined.update(row[column].split(separator))
        return separator.join(combined)
    
    # Apply the function to each row and create a new column
    combined_column_name = merge_name
    df[combined_column_name] = df.apply(merge_strings, axis=1)
    df.drop(columns=columns, inplace=True)
    
    return df

metanetx = merge_string_columns(metanetx, ['From_HMDB', 'From_CHEBI'], 'From_ID')
metanetx = merge_string_columns(metanetx, ['To_HMDB', 'To_CHEBI'], 'To_ID')
metanetx.replace('', pd.NA, inplace=True)
metanetx

Unnamed: 0,parent,child,description,parent_hmdb,child_hmdb,parent_chebi,child_chebi,From_ID,To_ID
0,MNXM100051,MNXM100344,"cis-9,19-epoxy-3Z,6Z-heptadecadiene -> 9S,10R-...",,,,,,
1,MNXM100051,MNXM99630,"cis-9,19-epoxy-3Z,6Z-heptadecadiene -> 9R,10S-...",,,,,,
2,MNXM10011,MNXM31563,3-hydroxy-3-phenylpropionic acid -> (R)-3-hydr...,HMDB0124925,,19929,51059,BMG_MT104009,BMG_MT104009
3,MNXM10011,MNXM31563,3-hydroxy-3-phenylpropionic acid -> (R)-3-hydr...,HMDB0124925,,63469,51059,BMG_MT104009,BMG_MT104009
4,MNXM10011,MNXM31736,3-hydroxy-3-phenylpropionic acid -> (S)-3-hydr...,HMDB0124925,,19929,51058,BMG_MT104009,
...,...,...,...,...,...,...,...,...,...
23706,MNXM9987,MNXM732376,3-aminoisobutyric acid -> (S)-3-amino-2-methyl...,HMDB0000485;HMDB0003911,HMDB0002166,359,390,,BMG_MT001440
23707,MNXM9987,MNXM732376,3-aminoisobutyric acid -> (S)-3-amino-2-methyl...,HMDB0000485;HMDB0003911,HMDB0002166,359,41058,,BMG_MT001440
23708,MNXM9987,MNXM732376,3-aminoisobutyric acid -> (S)-3-amino-2-methyl...,HMDB0000485;HMDB0003911,HMDB0002166,359,6167,,BMG_MT001440
23709,MNXM99960,MNXM101734,"2-Ethyl-1,6-dioxaspiro[4.4]nonane -> 2S,5R-cha...",,,,,,


In [10]:
meta_meta = metanetx[['From_ID', 'To_ID']]
meta_meta = meta_meta.dropna(subset=['From_ID'])
meta_meta = meta_meta.dropna(subset=['To_ID'])
meta_meta.drop_duplicates(inplace=True)
meta_meta

Unnamed: 0,From_ID,To_ID
2,BMG_MT104009,BMG_MT104009
20,BMG_MT218069,BMG_MT218077
21,BMG_MT218069,BMG_MT218078
214,BMG_MT217653;BMG_MT041423,BMG_MT041423
215,BMG_MT041423,BMG_MT041423
...,...,...
23492,BMG_MT165996,BMG_MT001199
23545,BMG_MT007770,BMG_MT039789
23611,BMG_MT001450,BMG_MT014623
23631,BMG_MT001970,BMG_MT001536


In [11]:
meta_meta['From_ID'] = meta_meta['From_ID'].str.split(';')
meta_meta['To_ID'] = meta_meta['To_ID'].str.split(';')
meta_meta = meta_meta.explode('From_ID')
meta_meta = meta_meta.explode('To_ID')

meta_meta['To_ID'] = meta_meta['To_ID'].str.strip()
meta_meta['From_ID'] = meta_meta['From_ID'].str.strip()
meta_meta.drop_duplicates(inplace=True)
meta_meta

Unnamed: 0,From_ID,To_ID
2,BMG_MT104009,BMG_MT104009
20,BMG_MT218069,BMG_MT218077
21,BMG_MT218069,BMG_MT218078
214,BMG_MT217653,BMG_MT041423
214,BMG_MT041423,BMG_MT041423
...,...,...
23492,BMG_MT165996,BMG_MT001199
23545,BMG_MT007770,BMG_MT039789
23611,BMG_MT001450,BMG_MT014623
23631,BMG_MT001970,BMG_MT001536


### Metabolite-Metabolite Relation

In [12]:
meta_meta['Source'] = 'MetaNetX'
meta_meta['Type'] = 'Metabolite-Metabolite'

max_length = len(str(len(meta_meta)))
meta_meta['BioMedGraphica_ID'] = ['BMG_ED_MTMT' + str(i).zfill(max_length) for i in range(1, len(meta_meta) + 1)]
columns = ['BioMedGraphica_ID'] + [col for col in meta_meta.columns if col != 'BioMedGraphica_ID']  # re-order columns
meta_meta = meta_meta[columns]
meta_meta

Unnamed: 0,BioMedGraphica_ID,From_ID,To_ID,Source,Type
2,BMG_ED_MTMT001,BMG_MT104009,BMG_MT104009,MetaNetX,Metabolite-Metabolite
20,BMG_ED_MTMT002,BMG_MT218069,BMG_MT218077,MetaNetX,Metabolite-Metabolite
21,BMG_ED_MTMT003,BMG_MT218069,BMG_MT218078,MetaNetX,Metabolite-Metabolite
214,BMG_ED_MTMT004,BMG_MT217653,BMG_MT041423,MetaNetX,Metabolite-Metabolite
214,BMG_ED_MTMT005,BMG_MT041423,BMG_MT041423,MetaNetX,Metabolite-Metabolite
...,...,...,...,...,...
23492,BMG_ED_MTMT927,BMG_MT165996,BMG_MT001199,MetaNetX,Metabolite-Metabolite
23545,BMG_ED_MTMT928,BMG_MT007770,BMG_MT039789,MetaNetX,Metabolite-Metabolite
23611,BMG_ED_MTMT929,BMG_MT001450,BMG_MT014623,MetaNetX,Metabolite-Metabolite
23631,BMG_ED_MTMT930,BMG_MT001970,BMG_MT001536,MetaNetX,Metabolite-Metabolite


In [13]:
import os
from pathlib import Path

# get the current working directory
current_working_dir = Path(os.getcwd()).resolve()

# get the output directory
grandparent_dir = current_working_dir.parent.parent.parent

target_folder = grandparent_dir / 'BioMedGraphica' / 'Relation' / 'Metabolite-Metabolite'
if not target_folder.exists():
    target_folder.mkdir(parents=True)
    print(f"Folder {target_folder} has been created.")

output_file_path = target_folder / 'BioMedGraphica_Metabolite_Metabolite.csv'
meta_meta.to_csv(output_file_path, index=False)
print(f"Data has been saved to {output_file_path}")

Data has been saved to D:\RA\BMG\BioMedGraphica\Relation\Metabolite-Metabolite\BioMedGraphica_Metabolite_Metabolite.csv
