In [1]:
# step3_model_training.py

import pandas as pd
import numpy as np
import pickle
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns
import matplotlib.pyplot as plt

# Load cleaned dataset
df = pd.read_csv("cleaned_heart_disease_dataset.csv")

# Create output folders
model_dir = "saved_models"
report_dir = "model_reports"
os.makedirs(model_dir, exist_ok=True)
os.makedirs(report_dir, exist_ok=True)

# Features and target
X = df.drop("target", axis=1)
y = df["target"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save scaler
with open(f"{model_dir}/scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

# Models dictionary
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "SVM": SVC(probability=True, random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42)
}

# Train, evaluate, save models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    y_proba = model.predict_proba(X_test_scaled)[:, 1]
    
    # Save model
    with open(f"{model_dir}/{name.lower().replace(' ', '_')}_model.pkl", "wb") as f:
        pickle.dump(model, f)
    
    # Save report
    report = classification_report(y_test, y_pred, output_dict=True)
    auc = roc_auc_score(y_test, y_proba)
    
    with open(f"{report_dir}/{name.replace(' ', '_')}_report.txt", "w") as f:
        f.write(f"Classification Report for {name}:\n")
        f.write(classification_report(y_test, y_pred))
        f.write(f"\nROC AUC Score: {auc:.4f}\n")

    # Save confusion matrix plot
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(4, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["No", "Yes"], yticklabels=["No", "Yes"])
    plt.title(f'{name} - Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(f"{report_dir}/{name.replace(' ', '_')}_confusion_matrix.png")
    plt.close()

print("✅ Model training complete. Models and reports saved.")


✅ Model training complete. Models and reports saved.
