In [1]:
# ============================================================
# PROJECT: Disease Prediction System using Naive Bayes Algorithm
# AUTHOR : T. Chanakyan (Reg No: 2117240070043)
# ============================================================
# Run this file directly in VS Code using Python extension
# Make sure disease_dataset.csv is in the same folder
# ============================================================

# Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
import warnings
warnings.filterwarnings("ignore")

# ============================================================
# STEP 1: LOAD THE DATASET
# ============================================================

data = pd.read_csv("disease_dataset.csv")   # Ensure file is in the same folder

print("✅ Dataset Loaded Successfully!")
print("\nDataset Shape:", data.shape)
print("\nFirst 5 Records:\n", data.head())

# ============================================================
# STEP 2: DATA PREPARATION
# ============================================================
X = data.drop("Disease", axis=1)
y = data["Disease"]

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ============================================================
# STEP 3: MODEL TRAINING
# ============================================================
model = GaussianNB()
model.fit(X_train, y_train)
print("\n✅ Model Training Completed Successfully!")

# ============================================================
# STEP 4: EVALUATION
# ============================================================
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("\n📊 Model Evaluation Results:")
print("--------------------------------------------")
print("Accuracy :", round(accuracy * 100, 2), "%")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# ============================================================
# STEP 5: SAVE THE MODEL
# ============================================================
joblib.dump(model, "disease_prediction_model.pkl")
print("\n💾 Model saved as 'disease_prediction_model.pkl'")

# ============================================================
# STEP 6: PREDICT DISEASE FROM USER INPUT
# ============================================================
print("\n===============================================")
print("🩺 Disease Prediction System (User Mode)")
print("===============================================")

symptoms = list(X.columns)

print("\nAvailable Symptoms:")
for i, s in enumerate(symptoms, 1):
    print(f"{i}. {s}")

print("\nEnter 1 if symptom is present, else 0:")

user_input = []
for s in symptoms:
    while True:
        try:
            val = int(input(f"{s}: "))
            if val in [0, 1]:
                user_input.append(val)
                break
            else:
                print("⚠ Enter only 0 or 1!")
        except ValueError:
            print("⚠ Invalid input, enter numeric value (0 or 1).")

user_data = np.array(user_input).reshape(1, -1)
prediction = model.predict(user_data)
print("\n🎯 Predicted Disease:", prediction[0])
print("===============================================")

# ============================================================
# STEP 7: VERIFY LOADED MODEL
# ============================================================
loaded_model = joblib.load("disease_prediction_model.pkl")
test_pred = loaded_model.predict(X_test)
print("\nReloaded Model Accuracy:", round(accuracy_score(y_test, test_pred) * 100, 2), "%")

print("\n✅ Program Executed Successfully!")

✅ Dataset Loaded Successfully!

Dataset Shape: (10, 6)

First 5 Records:
    Fever  Cough  Headache  Fatigue  SoreThroat  Disease
0      1      1         0        1           1      Flu
1      1      0         1        1           0  Typhoid
2      0      1         0        0           1     Cold
3      1      1         1        1           0   Dengue
4      0      1         0        1           1     Cold

✅ Model Training Completed Successfully!

📊 Model Evaluation Results:
--------------------------------------------
Accuracy : 50.0 %
Confusion Matrix:
 [[1 0]
 [1 0]]
Classification Report:
               precision    recall  f1-score   support

      Dengue       0.50      1.00      0.67         1
     Typhoid       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2


💾 Model saved as 'disease_prediction_model.pkl'

🩺 Disease Prediction 