In [1]:
import requests
import pandas as pd

# API endpoint
url = "https://api.fda.gov/drug/label.json?limit=100"  # limit = number of records per request

response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    results = data['results']

    # Extract some useful fields for your portfolio
    records = []
    for item in results:
        record = {
            "brand_name": item.get("openfda", {}).get("brand_name", [""])[0],
            "generic_name": item.get("openfda", {}).get("generic_name", [""])[0],
            "manufacturer_name": item.get("openfda", {}).get("manufacturer_name", [""])[0],
            "adverse_reactions": item.get("adverse_reactions", [""])[0] if "adverse_reactions" in item else "",
            "indications_and_usage": item.get("indications_and_usage", [""])[0] if "indications_and_usage" in item else ""
        }
        records.append(record)

    # Convert to DataFrame
    df = pd.DataFrame(records)

    # Save to CSV
    df.to_csv("drug_data.csv", index=False)
    print("Data saved to drug_data.csv")

else:
    print("Failed to retrieve data:", response.status_code)


Data saved to drug_data.csv


In [1]:
import pandas as pd

In [3]:
data=pd.read_csv("drug_data.csv")

In [5]:
data.head()

Unnamed: 0,brand_name,generic_name,manufacturer_name,adverse_reactions,indications_and_usage
0,SILICEA,SILICEA,"Rxhomeo Private Limited d.b.a. Rxhomeo, Inc",,INDICATIONS Condition listed above or as direc...
1,,,,,Uses Multi-purpose mineral powder provides bro...
2,Betadine,POVIDONE-IODINE,"Atlantis Consumer Healthcare, Inc.",,Uses First aid to help prevent infection in mi...
3,,,,,INDICATIONS Indications: MEZEREUM Itching
4,,,,ADVERSE REACTIONS Ophthalmic use: The most fre...,INDICATIONS AND USAGE Ofloxacin ophthalmic sol...


In [7]:
data.dropna()

Unnamed: 0,brand_name,generic_name,manufacturer_name,adverse_reactions,indications_and_usage
5,Naproxen,NAPROXEN,A-S Medication Solutions,6 ADVERSE REACTIONS The following adverse reac...,1 INDICATIONS AND USAGE Naproxen tablets and n...
10,Quick Action,SALICYLIC ACID,Walmart Inc.,ADVERSE REACTION DISTRIBUTED BY: Wal-Mart Stor...,Uses for the treatment of acne
12,Mekinist,TRAMETINIB,Novartis Pharmaceuticals Corporation,6 ADVERSE REACTIONS The following clinically s...,1 INDICATIONS AND USAGE MEKINIST is a kinase i...
14,Glimepiride,GLIMEPIRIDE,American Health Packaging,6 ADVERSE REACTIONS The following serious adve...,1 INDICATIONS AND USAGE Glimepiride tablets ar...
16,Methocarbamol,METHOCARBAMOL,Proficient Rx LP,ADVERSE REACTIONS Adverse reactions reported c...,"INDICATIONS AND USAGE Methocarbamol tablets, U..."
18,NP Thyroid 120,"LEVOTHYROXINE, LIOTHYRONINE","Acella Pharmaceuticals, LLC",ADVERSE REACTIONS Adverse reactions other than...,INDICATIONS AND USAGE NP Thyroid ® tablets (th...
19,Varenicline,VARENICLINE,Aurobindo Pharma Limited,6 ADVERSE REACTIONS The following serious adve...,1 INDICATIONS AND USAGE Varenicline tablets ar...
41,Betamethasone Dipropionate,BETAMETHASONE DIPROPIONATE,"Sun Pharmaceutical Industries, Inc.",6 ADVERSE REACTIONS The most common adverse re...,1 INDICATIONS AND USAGE Betamethasone dipropio...
45,Calcium Acetate,CALCIUM ACETATE,"Cipla USA Inc.,",6 ADVERSE REACTIONS Hypercalcemia is discussed...,1 INDICATIONS AND USAGE Calcium acetate is a p...
49,ENTRESTO,SACUBITRIL AND VALSARTAN,Novartis Pharmaceuticals Corporation,6 ADVERSE REACTIONS Clinically significant adv...,1 INDICATIONS AND USAGE ENTRESTO is a combinat...


In [9]:
new_1 = data.drop(["brand_name"],axis=1)

In [11]:
new_1.dropna()

Unnamed: 0,generic_name,manufacturer_name,adverse_reactions,indications_and_usage
5,NAPROXEN,A-S Medication Solutions,6 ADVERSE REACTIONS The following adverse reac...,1 INDICATIONS AND USAGE Naproxen tablets and n...
10,SALICYLIC ACID,Walmart Inc.,ADVERSE REACTION DISTRIBUTED BY: Wal-Mart Stor...,Uses for the treatment of acne
12,TRAMETINIB,Novartis Pharmaceuticals Corporation,6 ADVERSE REACTIONS The following clinically s...,1 INDICATIONS AND USAGE MEKINIST is a kinase i...
14,GLIMEPIRIDE,American Health Packaging,6 ADVERSE REACTIONS The following serious adve...,1 INDICATIONS AND USAGE Glimepiride tablets ar...
16,METHOCARBAMOL,Proficient Rx LP,ADVERSE REACTIONS Adverse reactions reported c...,"INDICATIONS AND USAGE Methocarbamol tablets, U..."
18,"LEVOTHYROXINE, LIOTHYRONINE","Acella Pharmaceuticals, LLC",ADVERSE REACTIONS Adverse reactions other than...,INDICATIONS AND USAGE NP Thyroid ® tablets (th...
19,VARENICLINE,Aurobindo Pharma Limited,6 ADVERSE REACTIONS The following serious adve...,1 INDICATIONS AND USAGE Varenicline tablets ar...
41,BETAMETHASONE DIPROPIONATE,"Sun Pharmaceutical Industries, Inc.",6 ADVERSE REACTIONS The most common adverse re...,1 INDICATIONS AND USAGE Betamethasone dipropio...
45,CALCIUM ACETATE,"Cipla USA Inc.,",6 ADVERSE REACTIONS Hypercalcemia is discussed...,1 INDICATIONS AND USAGE Calcium acetate is a p...
49,SACUBITRIL AND VALSARTAN,Novartis Pharmaceuticals Corporation,6 ADVERSE REACTIONS Clinically significant adv...,1 INDICATIONS AND USAGE ENTRESTO is a combinat...


In [13]:
import nltk

In [15]:
nltk.download("stopwords")

[nltk_data] Downloading package stopwords to C:\Users\Hi-
[nltk_data]     TecH\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [17]:
all_side_effects = ' '.join(new_1['adverse_reactions'].dropna().tolist())


In [19]:
words = all_side_effects.lower().replace(',',' ').split()

In [21]:
from collections import Counter

In [23]:
side_effect = Counter(words)

In [25]:
side_effect.most_common(5)

[('and', 965), ('in', 923), ('of', 904), ('the', 746), ('adverse', 479)]

In [27]:
from nltk.corpus import stopwords

In [29]:
stop_words = set(stopwords.words("english"))
filtered = [w for w in words if w.lower() not in stop_words]
counts = Counter(filtered)

In [31]:
print(counts)



In [33]:
counts.most_common(20)

[('adverse', 479),
 ('patients', 458),
 ('reactions', 355),
 ('events', 205),
 ('clinical', 186),
 ('reported', 182),
 ('1', 166),
 ('trials', 162),
 ('3', 145),
 ('pain', 141),
 ('0', 134),
 ('increased', 126),
 ('system', 124),
 ('–', 122),
 ('mekinist', 116),
 ('skin', 113),
 ('2', 108),
 ('rash', 104),
 ('6', 102)]

In [35]:
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to C:\Users\Hi-
[nltk_data]     TecH\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to C:\Users\Hi-
[nltk_data]     TecH\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [37]:
from nltk.corpus import wordnet as wn

In [39]:
def is_medical(word):
    synsets = wn.synsets(word)
    if not synsets:
        return False
    for w in synsets:
        if w.lexname() in ["noun.body","noun.state","noun.substance"]:
            return True
        else:
            return False

In [41]:
medical_terms = [w for w in counts if is_medical(w)]
print(medical_terms)


['stinging', 'redness', 'chemical', 'edema', 'body', 'photophobia', 'dryness', 'eye', 'dizziness', 'nausea', 'bleeding', 'ulceration', 'hypertension', 'hyperkalemia', 'skin', 'dyspepsia', 'abdominal', 'pain', 'rash', 'ecchymosis', 'conditions', 'arthritis', 'osteoarthritis', 'complaints', 'diarrhea', 'stomatitis', 'lightheadedness', 'vertigo', 'purpura', 'palpitations', 'thirst', 'flatulence', 'ulcers', 'anemia', 'liver', 'enzymes', 'rashes', 'pancreatitis', 'jaundice', 'melena', 'thrombocytopenia', 'agranulocytosis', 'concentrate', 'disorders', 'pyrexia', 'vasculitis', 'inflammation', 'esophagitis', 'colitis', 'bowel', 'disease', 'hepatitis', 'eosinophilia', 'leucopenia', 'granulocytopenia', 'hyperglycemia', 'hypoglycemia', 'depression', 'abnormalities', 'insomnia', 'malaise', 'myalgia', 'muscle', 'weakness', 'meningitis', 'dysfunction', 'convulsions', 'pneumonitis', 'asthma', 'alopecia', 'urticaria', 'erythema', 'lichen', 'lupus', 'dermatitis', 'porphyria', 'symptoms', 'optic', 'neur

In [43]:
def clean_text_2(text):
    if not isinstance(text, str):   # skip NaN or non-strings
        return ""
    words = text.split()
    medical_words = [w for w in words if is_medical(w)]
    return " ".join(medical_words)

# Apply safely
new_1["adverse_reactions"] = new_1["adverse_reactions"].apply(clean_text_2)


In [45]:
new_1.head().dropna()

Unnamed: 0,generic_name,manufacturer_name,adverse_reactions,indications_and_usage
0,SILICEA,"Rxhomeo Private Limited d.b.a. Rxhomeo, Inc",,INDICATIONS Condition listed above or as direc...
2,POVIDONE-IODINE,"Atlantis Consumer Healthcare, Inc.",,Uses First aid to help prevent infection in mi...


In [47]:
new_1 = new_1.rename(columns={"indications_and_usage":"drug_use"})

In [49]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Use TF-IDF to represent each drug's side effects

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(new_1["adverse_reactions"])


In [51]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute similarity matrix between all drugs
similarity_matrix = cosine_similarity(X, X)


In [53]:
def recommend_drugs_2(drug_name, top_n=5):
    drug_name = drug_name.strip().lower()  # normalize input
    matches = new_1[new_1["generic_name"].str.lower().str.strip() == drug_name]
    
    if matches.empty:
        return f"Drug '{drug_name}' not found in dataset."
    
    idx = matches.index[0]
    
    sim_scores = list(enumerate(similarity_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = [i for i in sim_scores if i[0] != idx]
    top_indices = [i[0] for i in sim_scores[:top_n]]
    
    return new_1.iloc[top_indices][["generic_name", "adverse_reactions", "drug_use"]]


In [55]:
new_1.head(50)

Unnamed: 0,generic_name,manufacturer_name,adverse_reactions,drug_use
0,SILICEA,"Rxhomeo Private Limited d.b.a. Rxhomeo, Inc",,INDICATIONS Condition listed above or as direc...
1,,,,Uses Multi-purpose mineral powder provides bro...
2,POVIDONE-IODINE,"Atlantis Consumer Healthcare, Inc.",,Uses First aid to help prevent infection in mi...
3,,,,INDICATIONS Indications: MEZEREUM Itching
4,,,chemical body eye dizziness nausea,INDICATIONS AND USAGE Ofloxacin ophthalmic sol...
5,NAPROXEN,A-S Medication Solutions,Hypertension Edema Hyperkalemia Skin abdominal...,1 INDICATIONS AND USAGE Naproxen tablets and n...
6,,,,Use surgical hand scrub: significantly reduces...
7,BENZALKONIUM CHLORIDE,Walmart Inc.,,Use For handwashing to decrease bacteria on th...
8,,,,Directions: Acne Clearing Cleanser Acne Cleari...
9,,,,For the temporary relief of nasal decongestiom...


In [59]:
recommend_drugs_2("NAPROXEN", top_n=5)


Unnamed: 0,generic_name,adverse_reactions,drug_use
60,ETODOLAC,abdominal ulcers liver bleeding complaints bod...,INDICATIONS AND USAGE Carefully consider the p...
43,,abdominal ulcers liver bleeding complaints bod...,INDICATIONS AND USAGE Carefully consider the p...
26,,Bleeding purpura at at conditions be safety Bl...,1 INDICATIONS AND USAGE Plavix is a P2Y 12 pla...
36,,colitis colitis abdominal nausea colitis sympt...,"INDICATIONS AND USAGE Clindamycin Injection, U..."
72,,be abnormalities angina angina as as edema nau...,INDICATIONS AND USAGE Diltiazem hydrochloride ...


In [61]:
new_1["generic_name"].fillna("unknown",inplace=True)

In [63]:
recommend_drugs_2("NAPROXEN", top_n=5)

Unnamed: 0,generic_name,adverse_reactions,drug_use
60,ETODOLAC,abdominal ulcers liver bleeding complaints bod...,INDICATIONS AND USAGE Carefully consider the p...
43,unknown,abdominal ulcers liver bleeding complaints bod...,INDICATIONS AND USAGE Carefully consider the p...
26,unknown,Bleeding purpura at at conditions be safety Bl...,1 INDICATIONS AND USAGE Plavix is a P2Y 12 pla...
36,unknown,colitis colitis abdominal nausea colitis sympt...,"INDICATIONS AND USAGE Clindamycin Injection, U..."
72,unknown,be abnormalities angina angina as as edema nau...,INDICATIONS AND USAGE Diltiazem hydrochloride ...
