### DrugBank

In [None]:
# Download Link: https://go.drugbank.com/releases/5-1-13/downloads/all-full-database
# Download Date: 2025-03-21
# Download Version: 2025-03-02

# cost time: 193min
import os
import pandas as pd
from bs4 import BeautifulSoup

# https://github.com/mims-harvard/PrimeKG/blob/main/datasets/processing_scripts/drugbank_drug_drug.py 

xml_filename = 'full database.xml'

with open(xml_filename, 'r', encoding='utf-8') as file:
    xml_data = file.read()

soup = BeautifulSoup(xml_data, "xml")

interactions_data = []

for drug in soup.find_all("drug"):
    drug_name = drug.find("drugbank-id").text
    interactions = drug.find_all("drug-interaction")
    if not interactions:
        continue
    for interaction in interactions:
        interaction_id = interaction.find("drugbank-id").text
        interactions_data.append([drug_name, interaction_id])

interactions_df = pd.DataFrame(interactions_data, columns=['drug1', 'drug2'])

interactions_df.to_csv('drug_drug.csv', index=False)
print("Drug-drug interactions have been saved to 'drug_drug.csv'")

Drug-drug interactions have been saved to 'drug_drug.csv'


In [1]:
import pandas as pd

df_d_d = pd.read_csv('drug_drug.csv')
df_d_d

Unnamed: 0,drug1,drug2
0,DB00001,DB06605
1,DB00001,DB06695
2,DB00001,DB01254
3,DB00001,DB01609
4,DB00001,DB01586
...,...,...
2855843,DB19413,DB13509
2855844,DB19413,DB13538
2855845,DB19413,DB13612
2855846,DB19413,DB14006


### BioMedGraphica ID

In [2]:
import pandas as pd
import os
from pathlib import Path

current_working_dir = Path(os.getcwd()).resolve()
grandparent_dir = current_working_dir.parent.parent.parent
target_dir_drug = grandparent_dir / 'BioMedGraphica' / 'Entity' / 'Drug' / 'BioMedGraphica_Drug.csv'
biomedgraphica_drug = pd.read_csv(target_dir_drug, dtype=str)

### DrugBank Mapping

In [3]:
drugbank_drug_id = biomedgraphica_drug[['DrugBank_ID', 'BioMedGraphica_ID']]
drugbank_drug_id.dropna(subset = ['DrugBank_ID'], inplace=True)
drugbank_drug_id = drugbank_drug_id.assign(drugbank_id=drugbank_drug_id['DrugBank_ID'].str.split(';')).explode('DrugBank_ID')

drugbank_drug_to_individualid = biomedgraphica_drug.groupby('DrugBank_ID')['BioMedGraphica_ID'].apply(lambda x: ';'.join(x.dropna().unique())).to_dict()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  drugbank_drug_id.dropna(subset = ['DrugBank_ID'], inplace=True)


In [4]:
df_d_d['From_ID'] = df_d_d['drug1'].map(drugbank_drug_to_individualid)
df_d_d['To_ID'] = df_d_d['drug2'].map(drugbank_drug_to_individualid)
df_d_d

Unnamed: 0,drug1,drug2,From_ID,To_ID
0,DB00001,DB06605,BMG_DG224733,BMG_DG002602
1,DB00001,DB06695,BMG_DG224733,BMG_DG083812;BMG_DG176217
2,DB00001,DB01254,BMG_DG224733,BMG_DG134958
3,DB00001,DB01609,BMG_DG224733,BMG_DG116501;BMG_DG166533
4,DB00001,DB01586,BMG_DG224733,BMG_DG136255
...,...,...,...,...
2855843,DB19413,DB13509,BMG_DG269087,BMG_DG188118;BMG_DG263353
2855844,DB19413,DB13538,BMG_DG269087,BMG_DG181832;BMG_DG263379
2855845,DB19413,DB13612,BMG_DG269087,BMG_DG117810;BMG_DG263448
2855846,DB19413,DB14006,BMG_DG269087,BMG_DG165432;BMG_DG263774


In [5]:
df_drug_drug = df_d_d[['From_ID', 'To_ID']]

df_drug_drug.dropna(subset=['From_ID'], inplace=True)
df_drug_drug.dropna(subset=['To_ID'], inplace=True)
df_drug_drug

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_drug_drug.dropna(subset=['From_ID'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_drug_drug.dropna(subset=['To_ID'], inplace=True)


Unnamed: 0,From_ID,To_ID
0,BMG_DG224733,BMG_DG002602
1,BMG_DG224733,BMG_DG083812;BMG_DG176217
2,BMG_DG224733,BMG_DG134958
3,BMG_DG224733,BMG_DG116501;BMG_DG166533
4,BMG_DG224733,BMG_DG136255
...,...,...
2855843,BMG_DG269087,BMG_DG188118;BMG_DG263353
2855844,BMG_DG269087,BMG_DG181832;BMG_DG263379
2855845,BMG_DG269087,BMG_DG117810;BMG_DG263448
2855846,BMG_DG269087,BMG_DG165432;BMG_DG263774


In [6]:
df_drug_drug['From_ID'] = df_drug_drug['From_ID'].str.split(';')
df_drug_drug['To_ID'] = df_drug_drug['To_ID'].str.split(';')

df_drug_drug = df_drug_drug.explode('From_ID')
df_drug_drug = df_drug_drug.explode('To_ID')

df_drug_drug.drop_duplicates(inplace=True)
df_drug_drug.reset_index(drop=True, inplace=True)
df_drug_drug

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_drug_drug['From_ID'] = df_drug_drug['From_ID'].str.split(';')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_drug_drug['To_ID'] = df_drug_drug['To_ID'].str.split(';')


Unnamed: 0,From_ID,To_ID
0,BMG_DG224733,BMG_DG002602
1,BMG_DG224733,BMG_DG083812
2,BMG_DG224733,BMG_DG176217
3,BMG_DG224733,BMG_DG134958
4,BMG_DG224733,BMG_DG116501
...,...,...
3882577,BMG_DG269087,BMG_DG117810
3882578,BMG_DG269087,BMG_DG263448
3882579,BMG_DG269087,BMG_DG165432
3882580,BMG_DG269087,BMG_DG263774


### Drug-Drug Relation

In [7]:
df_drug_drug['Source'] = 'DrugBank'
df_drug_drug['Type'] = 'Drug-Drug'

max_length = len(str(len(df_drug_drug)))
df_drug_drug['BioMedGraphica_ID'] = ['BMG_ED_DGDG' + str(i).zfill(max_length) for i in range(1, len(df_drug_drug) + 1)]
columns = ['BioMedGraphica_ID'] + [col for col in df_drug_drug.columns if col != 'BioMedGraphica_ID']  # re-order columns
df_drug_drug = df_drug_drug[columns]
df_drug_drug

Unnamed: 0,BioMedGraphica_ID,From_ID,To_ID,Source,Type
0,BMG_ED_DGDG0000001,BMG_DG224733,BMG_DG002602,DrugBank,Drug-Drug
1,BMG_ED_DGDG0000002,BMG_DG224733,BMG_DG083812,DrugBank,Drug-Drug
2,BMG_ED_DGDG0000003,BMG_DG224733,BMG_DG176217,DrugBank,Drug-Drug
3,BMG_ED_DGDG0000004,BMG_DG224733,BMG_DG134958,DrugBank,Drug-Drug
4,BMG_ED_DGDG0000005,BMG_DG224733,BMG_DG116501,DrugBank,Drug-Drug
...,...,...,...,...,...
3882577,BMG_ED_DGDG3882578,BMG_DG269087,BMG_DG117810,DrugBank,Drug-Drug
3882578,BMG_ED_DGDG3882579,BMG_DG269087,BMG_DG263448,DrugBank,Drug-Drug
3882579,BMG_ED_DGDG3882580,BMG_DG269087,BMG_DG165432,DrugBank,Drug-Drug
3882580,BMG_ED_DGDG3882581,BMG_DG269087,BMG_DG263774,DrugBank,Drug-Drug


In [8]:
import os
from pathlib import Path

# get the current working directory
current_working_dir = Path(os.getcwd()).resolve()

# get the output directory
grandparent_dir = current_working_dir.parent.parent.parent

target_folder = grandparent_dir / 'BioMedGraphica' / 'Relation' / 'Drug-Drug'
if not target_folder.exists():
    target_folder.mkdir(parents=True)
    print(f"Folder {target_folder} has been created.")

output_file_path = target_folder / 'BioMedGraphica_Drug_Drug.csv'
df_drug_drug.to_csv(output_file_path, index=False)
print(f"Data has been saved to {output_file_path}")

Data has been saved to D:\RA\BMG\BioMedGraphica\Relation\Drug-Drug\BioMedGraphica_Drug_Drug.csv
