In [21]:
# import dependecies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
from scipy import stats
import kagglehub
import json
import os

In [22]:
path = kagglehub.dataset_download("shayanhusain/drug-food-interactions-dataset")

print("Path to dataset files:", path)

Path to dataset files: /Users/mikayla/.cache/kagglehub/datasets/shayanhusain/drug-food-interactions-dataset/versions/1


In [23]:
json_file = os.path.join(path, 'Drug to Food Interactions Dataset.json')

In [24]:
print("Looking for file:", json_file)
print("File exists?", os.path.exists(json_file))

Looking for file: /Users/mikayla/.cache/kagglehub/datasets/shayanhusain/drug-food-interactions-dataset/versions/1/Drug to Food Interactions Dataset.json
File exists? True


In [25]:
if os.path.exists(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    print("Successfully loaded JSON file!")
    print(type(data))

Successfully loaded JSON file!
<class 'list'>


In [26]:
data = pd.DataFrame(data)

In [27]:
data.head()

Unnamed: 0,name,reference,food_interactions
0,Lepirudin,"Knox C, Wilson M, Klinger CM, et al. DrugBank ...",[Avoid herbs and supplements with anticoagulan...
1,Bivalirudin,"Knox C, Wilson M, Klinger CM, et al. DrugBank ...","[Avoid echinacea., Avoid herbs and supplements..."
2,Peginterferon alfa-2a,"Knox C, Wilson M, Klinger CM, et al. DrugBank ...",[Drink plenty of fluids.]
3,Alteplase,"Knox C, Wilson M, Klinger CM, et al. DrugBank ...",[Avoid herbs and supplements with anticoagulan...
4,Interferon alfa-n1,"Knox C, Wilson M, Klinger CM, et al. DrugBank ...",[Avoid alcohol.]


In [28]:
data = data.drop('reference', axis=1)

In [29]:
data['drug_index'] = data.index

In [30]:
data.head()

Unnamed: 0,name,food_interactions,drug_index
0,Lepirudin,[Avoid herbs and supplements with anticoagulan...,0
1,Bivalirudin,"[Avoid echinacea., Avoid herbs and supplements...",1
2,Peginterferon alfa-2a,[Drink plenty of fluids.],2
3,Alteplase,[Avoid herbs and supplements with anticoagulan...,3
4,Interferon alfa-n1,[Avoid alcohol.],4


In [31]:
data['food_interactions'] = data['food_interactions'].astype(str)

In [32]:
def cleanInteractions(text):
    if not isinstance(text, str):
        return ""
    text = text.replace("St. John's Wort", "St John's Wort")
    text = text.replace(']', '').replace('[', '')
    text = text.strip()
    return text

In [33]:
def extractInteractions(text):
    if isinstance(text, str):
        sentences = text.split('.')
        return [s.strip() for s in sentences
            if s.strip() and not s.strip().lower().startswith('examples include')]
        
    return []            

In [34]:
data['cleaned_text'] = data['food_interactions'].apply(cleanInteractions)
data['interaction'] = data['cleaned_text'].apply(extractInteractions)

In [35]:
data = data.explode('interaction').reset_index(drop=True)
data = data[data['interaction'].str.len() > 2]

In [36]:
data.head()

Unnamed: 0,name,food_interactions,drug_index,cleaned_text,interaction
0,Lepirudin,['Avoid herbs and supplements with anticoagula...,0,'Avoid herbs and supplements with anticoagulan...,'Avoid herbs and supplements with anticoagulan...
2,Bivalirudin,"['Avoid echinacea.', 'Avoid herbs and suppleme...",1,"'Avoid echinacea.', 'Avoid herbs and supplemen...",'Avoid echinacea
3,Bivalirudin,"['Avoid echinacea.', 'Avoid herbs and suppleme...",1,"'Avoid echinacea.', 'Avoid herbs and supplemen...","', 'Avoid herbs and supplements with anticoagu..."
5,Peginterferon alfa-2a,['Drink plenty of fluids.'],2,'Drink plenty of fluids.','Drink plenty of fluids
7,Alteplase,['Avoid herbs and supplements with anticoagula...,3,'Avoid herbs and supplements with anticoagulan...,'Avoid herbs and supplements with anticoagulan...


In [37]:
data['interaction_count'] = data['interaction'].apply(lambda x: len(x) if isinstance(x, list) else 0)

In [38]:
data.head()

Unnamed: 0,name,food_interactions,drug_index,cleaned_text,interaction,interaction_count
0,Lepirudin,['Avoid herbs and supplements with anticoagula...,0,'Avoid herbs and supplements with anticoagulan...,'Avoid herbs and supplements with anticoagulan...,0
2,Bivalirudin,"['Avoid echinacea.', 'Avoid herbs and suppleme...",1,"'Avoid echinacea.', 'Avoid herbs and supplemen...",'Avoid echinacea,0
3,Bivalirudin,"['Avoid echinacea.', 'Avoid herbs and suppleme...",1,"'Avoid echinacea.', 'Avoid herbs and supplemen...","', 'Avoid herbs and supplements with anticoagu...",0
5,Peginterferon alfa-2a,['Drink plenty of fluids.'],2,'Drink plenty of fluids.','Drink plenty of fluids,0
7,Alteplase,['Avoid herbs and supplements with anticoagula...,3,'Avoid herbs and supplements with anticoagulan...,'Avoid herbs and supplements with anticoagulan...,0


In [19]:
print("Total interactions (should match post-explode row count):", data['interaction_count'].sum())

Total interactions (should match post-explode row count): 0


In [20]:
data.to_csv('drug_to_food_interactions.csv')