In [2]:
import pandas as pd
import ast
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib
import os

In [4]:
# Load the merged dataset
df = pd.read_csv("data.csv")  # adjust path if needed

# Convert stringified list back to actual list
df['Symptoms'] = df['Symptoms'].apply(ast.literal_eval)

In [6]:
# Encode symptoms into multi-hot vectors
mlb_symptoms = MultiLabelBinarizer()
X = mlb_symptoms.fit_transform(df['Symptoms'])

# Encode diseases into integers
le_disease = LabelEncoder()
y = le_disease.fit_transform(df['Disease'])


In [8]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)


In [10]:
# Predict and evaluate
y_pred = clf.predict(X_test)
report = classification_report(y_test, y_pred, target_names=le_disease.classes_)
print(report)


                                         precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        18
                                   AIDS       1.00      1.00      1.00        30
                                   Acne       1.00      1.00      1.00        24
                    Alcoholic hepatitis       1.00      1.00      1.00        25
                                Allergy       1.00      1.00      1.00        24
                              Arthritis       1.00      1.00      1.00        23
                       Bronchial Asthma       1.00      1.00      1.00        33
                   Cervical spondylosis       1.00      1.00      1.00        23
                            Chicken pox       1.00      1.00      1.00        21
                    Chronic cholestasis       1.00      1.00      1.00        15
                            Common Cold       1.00      1.00      1.00        23
                           

In [16]:
# Save trained model and encoders in current folder
joblib.dump(clf, "disease_model.pkl")
joblib.dump(mlb_symptoms, "symptom_encoder.pkl")
joblib.dump(le_disease, "disease_encoder.pkl")

print("✅ Model and encoders saved in current directory.")



✅ Model and encoders saved in current directory.


In [20]:
import pandas as pd
import joblib
import ast

# Load trained model and encoders
model = joblib.load("disease_model.pkl")
symptom_encoder = joblib.load("symptom_encoder.pkl")
disease_encoder = joblib.load("disease_encoder.pkl")

# Load master dataset
df_master = pd.read_csv("data.csv")  # Your merged file


In [22]:
# Convert stringified list to actual list
df_master['Symptoms'] = df_master['Symptoms'].apply(ast.literal_eval)

# Safe parser for 'Precautions'
def safe_eval_precaution(val):
    if isinstance(val, str) and val.strip().startswith('['):
        try:
            return ast.literal_eval(val)
        except:
            return []
    elif isinstance(val, list):
        return val
    else:
        return []

df_master['Precautions'] = df_master['Precautions'].apply(safe_eval_precaution)


In [24]:
# ✅ Input your symptoms here
input_symptoms = ['fatigue', 'headache', 'nausea']  # Change this list

# Filter valid symptoms
valid_symptoms = symptom_encoder.classes_
input_symptoms = [s for s in input_symptoms if s in valid_symptoms]

# Encode for prediction
X_input = symptom_encoder.transform([input_symptoms])
y_pred = model.predict(X_input)
predicted_disease = disease_encoder.inverse_transform(y_pred)[0]


In [26]:
# Get disease info
info = df_master[df_master['Disease'] == predicted_disease].iloc[0]

print(f"🩺 Predicted Disease: {predicted_disease}")
print("\n📖 Description:")
print(info['Description'])

print("\n🛡️ Precautions:")
for i, p in enumerate(info['Precautions'], 1):
    if p and isinstance(p, str) and p.strip():
        print(f"{i}. {p}")


🩺 Predicted Disease: (vertigo) Paroymsal  Positional Vertigo

📖 Description:
Benign paroxysmal positional vertigo (BPPV) is one of the most common causes of vertigo — the sudden sensation that you're spinning or that the inside of your head is spinning. Benign paroxysmal positional vertigo causes brief episodes of mild to intense dizziness.

🛡️ Precautions:
1. lie down
2. avoid sudden change in body
3. avoid abrupt head movment
4. relax
