In [60]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, matthews_corrcoef
)

import joblib
import os

In [61]:
df = pd.read_csv("heartdisease_dataset.csv")

df.replace("?", np.nan, inplace=True)

df = df.apply(pd.to_numeric, errors='coerce')

df.dropna(inplace=True)

X = df.drop("target", axis=1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

os.makedirs("model", exist_ok=True)
joblib.dump(scaler, "model/scaler.pkl")

['model/scaler.pkl']

In [62]:
os.makedirs("model", exist_ok=True)

In [63]:
models = {
"Logistic Regression": LogisticRegression(max_iter=2000),
"Decision Tree": DecisionTreeClassifier(),
"KNN": KNeighborsClassifier(),
"Naive Bayes": GaussianNB(),
"Random Forest": RandomForestClassifier(),
"XGBoost": XGBClassifier(eval_metric='logloss')
}

In [64]:
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1]
    
    results.append([
        name,
        accuracy_score(y_test,y_pred),
        roc_auc_score(y_test,y_prob),
        precision_score(y_test,y_pred),
        recall_score(y_test,y_pred),
        f1_score(y_test,y_pred),
        matthews_corrcoef(y_test,y_pred)
    ])
    
    joblib.dump(model,f"model/{name}.pkl")


In [65]:
results = []

for name, model in models.items():
    print(f"\n Training {name}...")
    
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1]
    
    acc = accuracy_score(y_test,y_pred)
    auc = roc_auc_score(y_test,y_prob)
    prec = precision_score(y_test,y_pred)
    rec = recall_score(y_test,y_pred)
    f1 = f1_score(y_test,y_pred)
    mcc = matthews_corrcoef(y_test,y_pred)
    
    print(f"Accuracy : {acc:.4f}")
    print(f"AUC      : {auc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1 Score : {f1:.4f}")
    print(f"MCC      : {mcc:.4f}")
    
    results.append([name,acc,auc,prec,rec,f1,mcc])
    
    joblib.dump(model,f"model/{name}.pkl")



 Training Logistic Regression...
Accuracy : 0.7249
AUC      : 0.8016
Precision: 0.6903
Recall   : 0.8211
F1 Score : 0.7500
MCC      : 0.4576

 Training Decision Tree...
Accuracy : 0.9524
AUC      : 0.9524
Precision: 0.9624
Recall   : 0.9421
F1 Score : 0.9521
MCC      : 0.9050

 Training KNN...
Accuracy : 0.9021
AUC      : 0.9489
Precision: 0.8525
Recall   : 0.9737
F1 Score : 0.9091
MCC      : 0.8124

 Training Naive Bayes...
Accuracy : 0.6878
AUC      : 0.7761
Precision: 0.6607
Recall   : 0.7789
F1 Score : 0.7150
MCC      : 0.3813

 Training Random Forest...
Accuracy : 0.9603
AUC      : 0.9941
Precision: 0.9679
Recall   : 0.9526
F1 Score : 0.9602
MCC      : 0.9208

 Training XGBoost...
Accuracy : 0.9497
AUC      : 0.9865
Precision: 0.9430
Recall   : 0.9579
F1 Score : 0.9504
MCC      : 0.8996


In [66]:
results_df = pd.DataFrame(results, columns=[
"Model","Accuracy","AUC","Precision",
"Recall","F1","MCC"
])

print("\n Model Comparison Table")
results_df



 Model Comparison Table


Unnamed: 0,Model,Accuracy,AUC,Precision,Recall,F1,MCC
0,Logistic Regression,0.724868,0.801568,0.690265,0.821053,0.75,0.45756
1,Decision Tree,0.952381,0.952436,0.962366,0.942105,0.952128,0.904973
2,KNN,0.902116,0.948936,0.852535,0.973684,0.909091,0.812425
3,Naive Bayes,0.687831,0.776148,0.660714,0.778947,0.714976,0.381282
4,Random Forest,0.960317,0.994121,0.967914,0.952632,0.960212,0.920755
5,XGBoost,0.949735,0.986506,0.943005,0.957895,0.950392,0.899573
