In [6]:
# Step 2: Load and clean dataset
import pandas as pd

med = pd.read_csv('/content/drive/MyDrive/drugs.csv')  # Adjust if needed
med = med.dropna(subset=['drugName', 'condition', 'review', 'rating'])
med.reset_index(drop=True, inplace=True)
med['text'] = med['drugName'].astype(str) + ' ' + med['condition'].astype(str)


In [10]:
# Step 3: Feature Engineering (Fixed version)
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import hstack

# TF-IDF on combined text
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(med['text'])

# Normalize ratings
scaler = MinMaxScaler()
rating_vector = scaler.fit_transform(med[['rating']])

# Combine features and convert to CSR (required for indexing)
from scipy.sparse import csr_matrix
combined_features = hstack([tfidf_matrix, rating_vector])
combined_features = csr_matrix(combined_features)


In [11]:
# Step 4: Recommendation Function
import numpy as np

def recommend(drug_name, top_n=5):
    indices = med[med['drugName'].str.lower() == drug_name.lower()].index.tolist()
    if not indices:
        return f"❌ '{drug_name}' not found in data."

    index = indices[0]
    sim_scores = cosine_similarity(combined_features[index], combined_features).flatten()
    similar_indices = sim_scores.argsort()[::-1][1:top_n+1]

    results = []
    for i in similar_indices:
        row = med.iloc[i]
        results.append({
            '🧪 Medicine': row['drugName'],
            '📋 Condition': row['condition'],
            '⭐ Rating': row['rating'],
            '🗣 Review': row['review'][:300] + "..." if len(row['review']) > 300 else row['review'],
            '📊 Similarity Score': round(sim_scores[i], 3)
        })
    return results


In [12]:
# Step 5: Try it!
drug_input = "Afatinib"
recommendations = recommend(drug_input)

if isinstance(recommendations, str):
    print(recommendations)
else:
    for r in recommendations:
        for k, v in r.items():
            print(f"{k}: {v}")
        print("\n" + "-"*50 + "\n")


🧪 Medicine: Afatinib
📋 Condition: Non-Small Cell Lung Cance
⭐ Rating: 10
🗣 Review: "I started Gilotrif/Afatinib 7 days ago. I can feel tumor shrinking, I can move now -I use to be so stiffed. 
I was diagnosed NSCLC Lung Cancer stage 4. My diagnoses is EGFR exon 19 so this meds is perfect for my mutation.
After one day on Gilotrif, I had diaarhea. But the doctor gives me Lopera...
📊 Similarity Score: 0.933

--------------------------------------------------

🧪 Medicine: Tarceva
📋 Condition: Non-Small Cell Lung Cance
⭐ Rating: 7
🗣 Review: "Stage 4 lung cancer that had got into my bones from my shoulders to my hips and spine and liver was found 5/6/2015. After 6 rounds of radiation on my spine to defer some of the pain and 2 months in the hospital I started taking Tarceva as the only option to chemo or dying within 5-6 months. Here I ...
📊 Similarity Score: 0.774

--------------------------------------------------

🧪 Medicine: Nivolumab
📋 Condition: Non-Small Cell Lung Cance
⭐ Rating:

In [13]:
import pickle

with open("med.pkl", "wb") as f:
    pickle.dump(med, f)

with open("combined_features.pkl", "wb") as f:
    pickle.dump(combined_features, f)
