In [1]:
# --- Cell 1: Imports ---
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score, roc_curve
import matplotlib.pyplot as plt

# --- Cell 2: Load and Prepare Data ---
df = pd.read_csv('../data/heart_disease_cleaned.csv')
X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- Cell 3: Define and Train Models ---
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(probability=True, random_state=42)
}

results = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    y_proba = model.predict_proba(X_test_scaled)[:, 1]
    
    accuracy = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_proba)
    report = classification_report(y_test, y_pred, output_dict=True)
    
    results[name] = {
        'accuracy': accuracy,
        'auc': auc,
        'report': report
    }
    print(f"--- Results for {name} ---")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"AUC: {auc:.4f}")
    print(classification_report(y_test, y_pred))

# --- Cell 4: Save Evaluation Metrics ---
with open("../results/evaluation_metrics.txt", "w") as f:
    for name, result in results.items():
        f.write(f"--- Baseline Model: {name} ---\n")
        f.write(f"Accuracy: {result['accuracy']:.4f}\n")
        f.write(f"AUC: {result['auc']:.4f}\n")
        f.write(f"Classification Report:\n{classification_report(y_test, models[name].predict(X_test_scaled))}\n\n")

print("\nBaseline model evaluation metrics saved to results/evaluation_metrics.txt")

--- Results for Logistic Regression ---
Accuracy: 0.8488
AUC: 0.9415
              precision    recall  f1-score   support

           0       0.90      0.78      0.83       100
           1       0.81      0.91      0.86       105

    accuracy                           0.85       205
   macro avg       0.86      0.85      0.85       205
weighted avg       0.85      0.85      0.85       205

--- Results for Decision Tree ---
Accuracy: 0.9854
AUC: 0.9857
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       100
           1       1.00      0.97      0.99       105

    accuracy                           0.99       205
   macro avg       0.99      0.99      0.99       205
weighted avg       0.99      0.99      0.99       205

--- Results for Random Forest ---
Accuracy: 1.0000
AUC: 1.0000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       100
           1       1.00      1.00      1.0