In [9]:
# 📌 Step 1: Import Libraries
import pandas as pd
import numpy as np
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 📌 Step 2: Load Preprocessed Data
df = pd.read_csv('../Data/preprocessed_data.csv')
print("✅ Preprocessed Data Loaded Successfully")
display(df.head())

# 📌 Step 3: Define Features and Target
X = df.drop('Disease_Outbreak', axis=1)
y = df['Disease_Outbreak']

# 📌 Step 4: Split Data (Stratify to keep class balance)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 📌 Step 5: Feature Scaling (For Logistic Regression & SVM only)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 📌 Step 6: Initialize Models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000, random_state=42),
    'SVM': SVC(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'AdaBoost': AdaBoostClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42)
}

# 📌 Step 7: Train & Evaluate Each Model
accuracy_scores = {}

for name, model in models.items():
    print(f"\n🚀 Training {name}...")
    
    if name in ['Logistic Regression', 'SVM']:
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    accuracy_scores[name] = acc
    
    print(f"✅ {name} Accuracy: {acc:.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

# 📌 Step 8: Display All Accuracies Summary
print("\n📊 Model Accuracy Summary:")
for model_name, accuracy in accuracy_scores.items():
    print(f"{model_name}: {accuracy:.4f}")


✅ Preprocessed Data Loaded Successfully


Unnamed: 0,Week,State_Code,State_Name,Disease_Code,Incidence_per_Capita,Disease_Outbreak
0,8,8,4,1,0.36,0
1,38,3,6,1,2.5,1
2,16,4,2,0,0.31,0
3,17,3,3,2,4.46,1
4,31,3,9,0,0.52,0



🚀 Training Logistic Regression...
✅ Logistic Regression Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       200
           1       1.00      1.00      1.00       200

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400

Confusion Matrix:
[[200   0]
 [  0 200]]

🚀 Training SVM...
✅ SVM Accuracy: 0.9975
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       200
           1       1.00      0.99      1.00       200

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400

Confusion Matrix:
[[200   0]
 [  1 199]]

🚀 Training Random Forest...
✅ Random Forest Accuracy: 1.0000
Classification Report:
              pre