In [9]:
import numpy as np # type: ignore
import pandas as pd # type: ignore
import matplotlib.pyplot as plt # type: ignore
import seaborn as sns # type: ignore
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, roc_curve
)

In [13]:
data = pd.read_csv("/home/stranger/Desktop/Heart_Disease_Project/results/cleaned_heart_disease.csv")

In [14]:
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,target,ca_encoded,thal_encoded
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0,0,1
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,2,3,0
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,1,2,2
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0,0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0,0,0


In [30]:
data["target"] = (data["target"] > 0).astype(int)
X = data.drop(columns=["target"], axis=1).astype(int)
y = data["target"]

In [31]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [32]:
models = {
    "Logistic Regression": Pipeline([
        ("scaler", StandardScaler()),
        ("clf", LogisticRegression(max_iter=5000, solver="lbfgs", class_weight="balanced"))
    ]),
    "Decision Tree": DecisionTreeClassifier(random_state=42, class_weight="balanced"),
    "Random Forest": RandomForestClassifier(
        random_state=42, class_weight="balanced", n_estimators=300
    ),
    "SVM": Pipeline([
        ("scaler", StandardScaler()),
        ("clf", SVC(probability=True, random_state=42, class_weight="balanced"))
    ])
}

In [33]:
results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    y_prob = model.predict_proba(X_test)[:, 1]

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_prob)

    results[name] = [acc, prec, rec, f1, auc]

    print(f"\n{name} — Classification Report")
    print(classification_report(y_test, y_pred, digits=3))




Logistic Regression — Classification Report
              precision    recall  f1-score   support

           0      0.923     0.727     0.814        33
           1      0.743     0.929     0.825        28

    accuracy                          0.820        61
   macro avg      0.833     0.828     0.819        61
weighted avg      0.840     0.820     0.819        61


Decision Tree — Classification Report
              precision    recall  f1-score   support

           0      0.786     0.667     0.721        33
           1      0.667     0.786     0.721        28

    accuracy                          0.721        61
   macro avg      0.726     0.726     0.721        61
weighted avg      0.731     0.721     0.721        61


Random Forest — Classification Report
              precision    recall  f1-score   support

           0      0.966     0.848     0.903        33
           1      0.844     0.964     0.900        28

    accuracy                          0.902        61
   ma

In [34]:
results_df = pd.DataFrame(results, index=["Accuracy","Precision","Recall","F1-Score","AUC"]).T
print("\nModel Performance Comparison:\n")
print(results_df.sort_values("AUC", ascending=False))


Model Performance Comparison:

                     Accuracy  Precision    Recall  F1-Score       AUC
Random Forest        0.901639   0.843750  0.964286  0.900000  0.944264
SVM                  0.803279   0.722222  0.928571  0.812500  0.920996
Logistic Regression  0.819672   0.742857  0.928571  0.825397  0.892857
Decision Tree        0.721311   0.666667  0.785714  0.721311  0.726190
