<a href="https://colab.research.google.com/github/Ashwini028/Personal-Recommendation-system--Healthcare/blob/main/Healthcare_Recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

-Import Libraries

In [None]:
# import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib
from textblob import TextBlob

In [None]:
#Load Data
df = pd.read_csv('/content/healthcare_dataset.csv')
print("Dataset Shape:", df.shape)

Dataset Shape: (55500, 15)
            Name  Age  Gender Blood Type Medical Condition Date of Admission  \
0  Bobby JacksOn   30    Male         B-            Cancer        2024-01-31   
1   LesLie TErRy   62    Male         A+           Obesity        2019-08-20   
2    DaNnY sMitH   76  Female         A-           Obesity        2022-09-22   
3   andrEw waTtS   28  Female         O+          Diabetes        2020-11-18   
4  adrIENNE bEll   43  Female        AB+            Cancer        2022-09-19   

             Doctor                    Hospital Insurance Provider  \
0     Matthew Smith             Sons and Miller         Blue Cross   
1   Samantha Davies                     Kim Inc           Medicare   
2  Tiffany Mitchell                    Cook PLC              Aetna   
3       Kevin Wells  Hernandez Rogers and Vang,           Medicare   
4    Kathleen Hanna                 White-White              Aetna   

   Billing Amount  Room Number Admission Type Discharge Date   Medicati

In [None]:
#Data Preprocessing
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)


In [None]:
#Handle missing values
df.fillna(df.mean(numeric_only=True), inplace=True)
df.fillna("Unknown", inplace=True)

In [None]:
# Encode categorical variables
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


In [None]:
# Feature Scaling
scaler = StandardScaler()
features = ['Age'] # Corrected column name to 'Age' and removed unavailable columns
df[features] = scaler.fit_transform(df[features])

In [None]:
# 4. Disease Prediction Model
X = df.drop('Medical Condition', axis=1)  # Use all columns except 'Medical Condition' as features
y = df['Medical Condition']  # Use encoded 'Medical Condition' as the target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)

y_pred = model_rf.predict(X_test)
print("\nDisease Prediction Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Disease Prediction Accuracy: 0.29252319446971076
              precision    recall  f1-score   support

           0       0.28      0.31      0.29      1867
           1       0.28      0.29      0.28      1823
           2       0.30      0.29      0.29      1830
           3       0.31      0.30      0.30      1886
           4       0.29      0.29      0.29      1757
           5       0.30      0.28      0.29      1831

    accuracy                           0.29     10994
   macro avg       0.29      0.29      0.29     10994
weighted avg       0.29      0.29      0.29     10994



In [None]:
# Save model
joblib.dump(model_rf, "disease_prediction_model.pkl")


['disease_prediction_model.pkl']

In [None]:
# 5. Medicine Recommendation (Content-Based)
medicine_data = pd.DataFrame({
    'medicine_id': [1, 2, 3, 4],
    'medicine_name': ['MedA', 'MedB', 'MedC', 'MedD'],
    'description': [
        'Used for high blood pressure and heart conditions',
        'Treats diabetes and controls glucose levels',
        'Relieves fever and reduces inflammation',
        'Cures bacterial infections and boosts immunity'
    ]
})
# Vectorize medicine descriptions
vectorizer = TfidfVectorizer()
medicine_vectors = vectorizer.fit_transform(medicine_data['description'])

def recommend_medicines(med_name, top_n=3):
    idx = medicine_data[medicine_data['medicine_name'] == med_name].index[0]
    sim_scores = list(enumerate(cosine_similarity(medicine_vectors[idx], medicine_vectors)[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_indices = [i[0] for i in sim_scores[1:top_n+1]]
    return medicine_data.iloc[top_indices]['medicine_name'].tolist()

print("\nMedicine Recommendations for MedA:", recommend_medicines("MedA"))


Medicine Recommendations for MedA: ['MedC', 'MedB', 'MedD']


In [None]:
# 6. Sentiment Analysis on Reviews
reviews = [
    "This medicine worked great for me!",
    "I had side effects after taking it.",
    "It was okay, not very effective."
]

for review in reviews:
    sentiment = TextBlob(review).sentiment.polarity
    print(f"Review: {review} | Sentiment Score: {sentiment}")


Review: This medicine worked great for me! | Sentiment Score: 1.0
Review: I had side effects after taking it. | Sentiment Score: 0.0
Review: It was okay, not very effective. | Sentiment Score: 0.13461538461538464


In [None]:
# 7. Hybrid Suggestion Placeholder
# Example: Use disease prediction → map to top medicines
def hybrid_recommendation(patient_data):
    # Ensure patient_data has the same features and order as X_train
    # Create a DataFrame for the single patient example
    patient_df = pd.DataFrame([patient_data], columns=X_train.columns)

    # Scale the patient data using a scaler fitted on the training data
    patient_scaled = new_scaler.transform(patient_df)

    # Convert the scaled numpy array back to a DataFrame with feature names
    patient_scaled_df = pd.DataFrame(patient_scaled, columns=X_train.columns)

    pred = model_rf.predict(patient_scaled_df)[0]
    # map predicted disease to a medicine (example mapping)
    disease_to_meds = {
        0: ["MedA", "MedC"],
        1: ["MedB"],
        2: ["MedD"]
    }
    return disease_to_meds.get(pred, ["No suggestion available"])

# Create a new scaler and fit it on the training data
new_scaler = StandardScaler()
new_scaler.fit(X_train)

# Example patient data (using dummy values for demonstration)
# The number of values should match the number of features in X_train
# Replace with actual patient data when available
example_patient_values = [0] * len(X_train.columns) # Replace 0 with meaningful values
example_patient_values[X_train.columns.get_loc('Age')] = 45 # Set age as an example

print("\nHybrid Recommendation:", hybrid_recommendation(example_patient_values))


Hybrid Recommendation: ['MedD']


