### MDAD

In [19]:
# Download Link: https://github.com/Sun-Yazhou/MDAD/raw/refs/heads/master/MDAD.zip
# Download Date: 2025-03-21
# Download Version: 2018-10-29

import pandas as pd

mdad = pd.read_csv('MDAD\\MDAD\\download\\all data.txt', sep='\t', encoding='ISO-8859-1')
mdad.dropna(how='all', inplace=True)
mdad

Unnamed: 0,Name,Molecular form,Microbe,Strain,Target,PubMed ID
0,"(1-(4-chlorophenoxy)-3-[(4, 6-dimethyl-2-pyrim...",C16H19ClN2O2,Vibrio harveyi,MTCC 3438,,27141809
1,"(10R,11R)-Hydnocarpin",C25H20O9,Staphylococcus aureus,8325-4,,26273725
2,"(10R,11R)-Hydnocarpin D",C25H20O9,Staphylococcus aureus,8325-4,,26273725
3,"(10S,11S)-Hydnocarpin D",C25H20O9,Staphylococcus aureus,8325-4,,26273725
4,(1E)-1-{[(1E)-prop-1-ene-1-sulfinyl]sulfanyl}p...,C6H10OS2,Streptococcus mutans,UA159,,25081571
...,...,...,...,...,...,...
5050,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,M10,,26084709
5051,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,ID4365,,26084709
5052,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,IGB83,,26084709
5053,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,148,,26084709


### PharmacoMicrobiomics

In [29]:
# Download Link: http://pharmacomicrobiomics.com/view/relation/
# Download Date: 2024-11-15
# Download Version: 2021-03-21
import pandas as pd

pm = pd.read_csv('PharmacoMicrobiomics.csv')
pm

Unnamed: 0,Drug_CID,Microbe_Taxon
0,2724385,84112
1,5282192,816
2,10303,2
3,5280961,820
4,5280961,39490
...,...,...
64,46488338,1598
65,5333,1239
66,26879,816
67,4173,1578


### BioMedGraphica ID

In [12]:
import pandas as pd
import os
from pathlib import Path

current_working_dir = Path(os.getcwd()).resolve()
grandparent_dir = current_working_dir.parent.parent.parent
target_dir_microbiota = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Microbiota' / 'BioMedGraphica_Microbiota.csv'
target_dir_drug_gui = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Drug' / 'BioMedGraphica_Drug_GUI_Name.csv'
target_dir_drug = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Drug' / 'BioMedGraphica_Drug.csv'
biomedgraphica_microbiota = pd.read_csv(target_dir_microbiota, dtype=str)
biomedgraphica_drug_gui = pd.read_csv(target_dir_drug_gui, dtype=str)
biomedgraphica_drug = pd.read_csv(target_dir_drug, dtype=str)

### MDAD Mapping

In [24]:
import re

drug_name_biomedgraphica = biomedgraphica_drug_gui[['BioMedGraphica_ID', 'Drug_Name_List']]
drug_name_biomedgraphica.dropna(subset=['Drug_Name_List'], inplace=True)
drug_name_biomedgraphica = drug_name_biomedgraphica.assign(Drug_Name_List=drug_name_biomedgraphica['Drug_Name_List'].str.split(r" \| ")).explode('Drug_Name_List')

drug_name_to_individualID = drug_name_biomedgraphica.groupby('Drug_Name_List')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()
mdad['From_ID'] = mdad['Name'].map(drug_name_to_individualID)
mdad

Unnamed: 0,Name,Molecular form,Microbe,Strain,Target,PubMed ID,From_ID
0,"(1-(4-chlorophenoxy)-3-[(4, 6-dimethyl-2-pyrim...",C16H19ClN2O2,Vibrio harveyi,MTCC 3438,,27141809,
1,"(10R,11R)-Hydnocarpin",C25H20O9,Staphylococcus aureus,8325-4,,26273725,
2,"(10R,11R)-Hydnocarpin D",C25H20O9,Staphylococcus aureus,8325-4,,26273725,
3,"(10S,11S)-Hydnocarpin D",C25H20O9,Staphylococcus aureus,8325-4,,26273725,
4,(1E)-1-{[(1E)-prop-1-ene-1-sulfinyl]sulfanyl}p...,C6H10OS2,Streptococcus mutans,UA159,,25081571,
...,...,...,...,...,...,...,...
5050,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,M10,,26084709,
5051,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,ID4365,,26084709,
5052,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,IGB83,,26084709,
5053,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,148,,26084709,


In [25]:
microbiota_name_biomedgraphica = biomedgraphica_microbiota[['BioMedGraphica_ID', 'NCBI_Taxonomy_Name']]
microbiota_name_biomedgraphica.dropna(subset=['NCBI_Taxonomy_Name'], inplace=True)
microbiota_name_biomedgraphica = microbiota_name_biomedgraphica.assign(NCBI_Taxonomy_Name=microbiota_name_biomedgraphica['NCBI_Taxonomy_Name'].str.split(';')).explode('NCBI_Taxonomy_Name')
microbiota_name_biomedgraphica['NCBI_Taxonomy_Name'] = microbiota_name_biomedgraphica['NCBI_Taxonomy_Name'].str.lower()

microbiota_name_to_individualID = microbiota_name_biomedgraphica.groupby('NCBI_Taxonomy_Name')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()
mdad['To_ID'] = mdad['Microbe'].str.lower().map(microbiota_name_to_individualID)
mdad

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  microbiota_name_biomedgraphica.dropna(subset=['NCBI_Taxonomy_Name'], inplace=True)


Unnamed: 0,Name,Molecular form,Microbe,Strain,Target,PubMed ID,From_ID,To_ID
0,"(1-(4-chlorophenoxy)-3-[(4, 6-dimethyl-2-pyrim...",C16H19ClN2O2,Vibrio harveyi,MTCC 3438,,27141809,,BMG_MC559474
1,"(10R,11R)-Hydnocarpin",C25H20O9,Staphylococcus aureus,8325-4,,26273725,,BMG_MC088814
2,"(10R,11R)-Hydnocarpin D",C25H20O9,Staphylococcus aureus,8325-4,,26273725,,BMG_MC088814
3,"(10S,11S)-Hydnocarpin D",C25H20O9,Staphylococcus aureus,8325-4,,26273725,,BMG_MC088814
4,(1E)-1-{[(1E)-prop-1-ene-1-sulfinyl]sulfanyl}p...,C6H10OS2,Streptococcus mutans,UA159,,25081571,,BMG_MC100324
...,...,...,...,...,...,...,...,...
5050,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,M10,,26084709,,BMG_MC392238
5051,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,ID4365,,26084709,,BMG_MC392238
5052,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,IGB83,,26084709,,BMG_MC392238
5053,ZnO nanoparticles,ZnO,Pseudomonas aeruginosa,148,,26084709,,BMG_MC392238


In [26]:
mdad = mdad[['From_ID', 'To_ID']]
mdad.dropna(subset=['From_ID'], inplace=True)
mdad.dropna(subset=['To_ID'], inplace=True)
mdad

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mdad.dropna(subset=['From_ID'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mdad.dropna(subset=['To_ID'], inplace=True)


Unnamed: 0,From_ID,To_ID
493,BMG_DG112273,BMG_MC088814
559,BMG_DG163217,BMG_MC577286
560,BMG_DG163217,BMG_MC577286
561,BMG_DG163217,BMG_MC577286
562,BMG_DG110067,BMG_MC532241
...,...,...
5034,BMG_DG123836,BMG_MC576284
5035,BMG_DG123836,BMG_MC392238
5036,BMG_DG123836,BMG_MC088814
5037,BMG_DG123836,BMG_MC089655


In [27]:
mdad['From_ID'] = mdad['From_ID'].str.split(';')
mdad['To_ID'] = mdad['To_ID'].str.split(';')

mdad = mdad.explode('From_ID')
mdad = mdad.explode('To_ID')

mdad.drop_duplicates(inplace=True)
mdad.reset_index(drop=True, inplace=True)
mdad

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mdad['From_ID'] = mdad['From_ID'].str.split(';')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mdad['To_ID'] = mdad['To_ID'].str.split(';')


Unnamed: 0,From_ID,To_ID
0,BMG_DG112273,BMG_MC088814
1,BMG_DG163217,BMG_MC577286
2,BMG_DG110067,BMG_MC532241
3,BMG_DG130758,BMG_MC088814
4,BMG_DG212377,BMG_MC088814
...,...,...
800,BMG_DG123836,BMG_MC576284
801,BMG_DG123836,BMG_MC392238
802,BMG_DG123836,BMG_MC088814
803,BMG_DG123836,BMG_MC089655


### PharmacoMicrobiomics Mapping

In [30]:
cid_biomedgraphica = biomedgraphica_drug[['BioMedGraphica_ID', 'PubChem_CID']]
cid_biomedgraphica.dropna(subset=['PubChem_CID'], inplace=True)
cid_biomedgraphica['PubChem_CID'] = cid_biomedgraphica['PubChem_CID'].astype(str)
cid_biomedgraphica = cid_biomedgraphica.assign(PubChem_CID=cid_biomedgraphica['PubChem_CID'].str.split(';')).explode('PubChem_CID')

cid_to_individualID = cid_biomedgraphica.groupby('PubChem_CID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()
pm['From_ID'] = pm['Drug_CID'].astype(str).map(cid_to_individualID)
pm

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cid_biomedgraphica.dropna(subset=['PubChem_CID'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cid_biomedgraphica['PubChem_CID'] = cid_biomedgraphica['PubChem_CID'].astype(str)


Unnamed: 0,Drug_CID,Microbe_Taxon,From_ID
0,2724385,84112,BMG_DG129486
1,5282192,816,BMG_DG153999
2,10303,2,BMG_DG004259
3,5280961,820,BMG_DG153534
4,5280961,39490,BMG_DG153534
...,...,...,...
64,46488338,1598,
65,5333,1239,BMG_DG158549
66,26879,816,BMG_DG129116
67,4173,1578,BMG_DG141410


In [31]:
taxon_biomedgraphica = biomedgraphica_microbiota[['BioMedGraphica_ID', 'NCBI_Taxonomy_ID']]
taxon_biomedgraphica.dropna(subset=['NCBI_Taxonomy_ID'], inplace=True)
taxon_biomedgraphica['NCBI_Taxonomy_ID'] = taxon_biomedgraphica['NCBI_Taxonomy_ID'].astype(str)
taxon_biomedgraphica = taxon_biomedgraphica.assign(NCBI_Taxonomy_ID=taxon_biomedgraphica['NCBI_Taxonomy_ID'].str.split(';')).explode('NCBI_Taxonomy_ID')

taxon_to_individualID = taxon_biomedgraphica.groupby('NCBI_Taxonomy_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()
pm['To_ID'] = pm['Microbe_Taxon'].astype(str).map(taxon_to_individualID)
pm

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  taxon_biomedgraphica.dropna(subset=['NCBI_Taxonomy_ID'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  taxon_biomedgraphica['NCBI_Taxonomy_ID'] = taxon_biomedgraphica['NCBI_Taxonomy_ID'].astype(str)


Unnamed: 0,Drug_CID,Microbe_Taxon,From_ID,To_ID
0,2724385,84112,BMG_DG129486,BMG_MC588851
1,5282192,816,BMG_DG153999,BMG_MC588160
2,10303,2,BMG_DG004259,BMG_MC314078
3,5280961,820,BMG_DG153534,BMG_MC588233
4,5280961,39490,BMG_DG153534,BMG_MC474956
...,...,...,...,...
64,46488338,1598,,BMG_MC208472
65,5333,1239,BMG_DG158549,BMG_MC073270
66,26879,816,BMG_DG129116,BMG_MC588160
67,4173,1578,BMG_DG141410,BMG_MC201072


In [32]:
pm = pm[['From_ID', 'To_ID']]
pm.dropna(subset=['From_ID'], inplace=True)
pm.dropna(subset=['To_ID'], inplace=True)
pm

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pm.dropna(subset=['From_ID'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pm.dropna(subset=['To_ID'], inplace=True)


Unnamed: 0,From_ID,To_ID
0,BMG_DG129486,BMG_MC588851
1,BMG_DG153999,BMG_MC588160
2,BMG_DG004259,BMG_MC314078
3,BMG_DG153534,BMG_MC588233
4,BMG_DG153534,BMG_MC474956
...,...,...
63,BMG_DG082189,BMG_MC231424
65,BMG_DG158549,BMG_MC073270
66,BMG_DG129116,BMG_MC588160
67,BMG_DG141410,BMG_MC201072


In [33]:
pm['From_ID'] = pm['From_ID'].str.split(';')
pm['To_ID'] = pm['To_ID'].str.split(';')

pm = pm.explode('From_ID')
pm = pm.explode('To_ID')

pm.drop_duplicates(inplace=True)
pm.reset_index(drop=True, inplace=True)
pm

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pm['From_ID'] = pm['From_ID'].str.split(';')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pm['To_ID'] = pm['To_ID'].str.split(';')


Unnamed: 0,From_ID,To_ID
0,BMG_DG129486,BMG_MC588851
1,BMG_DG153999,BMG_MC588160
2,BMG_DG004259,BMG_MC314078
3,BMG_DG153534,BMG_MC588233
4,BMG_DG153534,BMG_MC474956
...,...,...
62,BMG_DG082189,BMG_MC231424
63,BMG_DG158549,BMG_MC073270
64,BMG_DG129116,BMG_MC588160
65,BMG_DG141410,BMG_MC201072


### Drug-Microbiota Relation

In [34]:
mdad['Type'] = 'Drug-Microbiota'
pm['Type'] = 'Drug-Microbiota'

mdad['source1'] = 'MDAD'
pm['source2'] = 'PharmacoMicrobiomics'

def merge_string_columns(df, columns, merge_name, separator=';'):
    def merge_strings(row):
        combined = set()
        for column in columns:
            if pd.notnull(row[column]):
                combined.update(row[column].split(separator))
        return separator.join(combined)
    
    # Apply the function to each row and create a new column
    combined_column_name = merge_name
    df[combined_column_name] = df.apply(merge_strings, axis=1)
    df.drop(columns=columns, inplace=True)
    
    return df

drug_microbiota = pd.merge(mdad, pm, how='outer')
drug_microbiota = merge_string_columns(drug_microbiota, ['source1', 'source2'], 'Source')
drug_microbiota

Unnamed: 0,From_ID,To_ID,Type,Source
0,BMG_DG000254,BMG_MC392238,Drug-Microbiota,MDAD
1,BMG_DG001877,BMG_MC073270,Drug-Microbiota,PharmacoMicrobiomics
2,BMG_DG002315,BMG_MC100324,Drug-Microbiota,MDAD
3,BMG_DG002315,BMG_MC392238,Drug-Microbiota,MDAD
4,BMG_DG002315,BMG_MC477936,Drug-Microbiota,MDAD
...,...,...,...,...
861,BMG_DG266720,BMG_MC088814,Drug-Microbiota,MDAD
862,BMG_DG266720,BMG_MC089655,Drug-Microbiota,MDAD
863,BMG_DG267430,BMG_MC392238,Drug-Microbiota,MDAD
864,BMG_DG268007,BMG_MC392238,Drug-Microbiota,MDAD


In [35]:
max_length = len(str(len(drug_microbiota)))
drug_microbiota['BioMedGraphica_ID'] = ['BMG_ED_DGMC' + str(i).zfill(max_length) for i in range(1, len(drug_microbiota) + 1)]
columns = ['BioMedGraphica_ID'] + [col for col in drug_microbiota.columns if col != 'BioMedGraphica_ID']  # re-order columns
drug_microbiota = drug_microbiota[columns]
drug_microbiota

Unnamed: 0,BioMedGraphica_ID,From_ID,To_ID,Type,Source
0,BMG_ED_DGMC001,BMG_DG000254,BMG_MC392238,Drug-Microbiota,MDAD
1,BMG_ED_DGMC002,BMG_DG001877,BMG_MC073270,Drug-Microbiota,PharmacoMicrobiomics
2,BMG_ED_DGMC003,BMG_DG002315,BMG_MC100324,Drug-Microbiota,MDAD
3,BMG_ED_DGMC004,BMG_DG002315,BMG_MC392238,Drug-Microbiota,MDAD
4,BMG_ED_DGMC005,BMG_DG002315,BMG_MC477936,Drug-Microbiota,MDAD
...,...,...,...,...,...
861,BMG_ED_DGMC862,BMG_DG266720,BMG_MC088814,Drug-Microbiota,MDAD
862,BMG_ED_DGMC863,BMG_DG266720,BMG_MC089655,Drug-Microbiota,MDAD
863,BMG_ED_DGMC864,BMG_DG267430,BMG_MC392238,Drug-Microbiota,MDAD
864,BMG_ED_DGMC865,BMG_DG268007,BMG_MC392238,Drug-Microbiota,MDAD


In [36]:
import os
from pathlib import Path

# get the current working directory
current_working_dir = Path(os.getcwd()).resolve()

# get the output directory
grandparent_dir = current_working_dir.parent.parent.parent

target_folder = grandparent_dir / 'BioMedGraphica' / 'Relation' / 'Drug-Microbiota'
if not target_folder.exists():
    target_folder.mkdir(parents=True)
    print(f"Folder {target_folder} has been created.")

output_file_path = target_folder / 'BioMedGraphica_Drug_Microbiota.csv'
drug_microbiota.to_csv(output_file_path, index=False)
print(f"Data has been saved to {output_file_path}")

Data has been saved to D:\RA\BMG\BioMedGraphica\Relation\Drug-Microbiota\BioMedGraphica_Drug_Microbiota.csv
