In [23]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, matthews_corrcoef
)

import joblib
import os

In [24]:
df = pd.read_csv("heartdisease_dataset.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalachh,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [25]:
X = df.drop("target", axis=1)
y = df["target"]

In [26]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [27]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [28]:
os.makedirs("model", exist_ok=True)

In [29]:
models = {
"Logistic Regression": LogisticRegression(max_iter=2000),
"Decision Tree": DecisionTreeClassifier(),
"KNN": KNeighborsClassifier(),
"Naive Bayes": GaussianNB(),
"Random Forest": RandomForestClassifier(),
"XGBoost": XGBClassifier(eval_metric='logloss')
}

In [30]:
results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1]
    
    results.append([
        name,
        accuracy_score(y_test,y_pred),
        roc_auc_score(y_test,y_prob),
        precision_score(y_test,y_pred),
        recall_score(y_test,y_pred),
        f1_score(y_test,y_pred),
        matthews_corrcoef(y_test,y_pred)
    ])
    
    joblib.dump(model,f"model/{name}.pkl")


In [31]:
results = []

for name, model in models.items():
    print(f"\n Training {name}...")
    
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:,1]
    
    acc = accuracy_score(y_test,y_pred)
    auc = roc_auc_score(y_test,y_prob)
    prec = precision_score(y_test,y_pred)
    rec = recall_score(y_test,y_pred)
    f1 = f1_score(y_test,y_pred)
    mcc = matthews_corrcoef(y_test,y_pred)
    
    print(f"Accuracy : {acc:.4f}")
    print(f"AUC      : {auc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1 Score : {f1:.4f}")
    print(f"MCC      : {mcc:.4f}")
    
    results.append([name,acc,auc,prec,rec,f1,mcc])
    
    joblib.dump(model,f"model/{name}.pkl")



 Training Logistic Regression...
Accuracy : 0.7249
AUC      : 0.8016
Precision: 0.6903
Recall   : 0.8211
F1 Score : 0.7500
MCC      : 0.4576

 Training Decision Tree...
Accuracy : 0.9577
AUC      : 0.9577
Precision: 0.9628
Recall   : 0.9526
F1 Score : 0.9577
MCC      : 0.9154

 Training KNN...
Accuracy : 0.9021
AUC      : 0.9489
Precision: 0.8525
Recall   : 0.9737
F1 Score : 0.9091
MCC      : 0.8124

 Training Naive Bayes...
Accuracy : 0.6878
AUC      : 0.7761
Precision: 0.6607
Recall   : 0.7789
F1 Score : 0.7150
MCC      : 0.3813

 Training Random Forest...
Accuracy : 0.9656
AUC      : 0.9937
Precision: 0.9784
Recall   : 0.9526
F1 Score : 0.9653
MCC      : 0.9316

 Training XGBoost...
Accuracy : 0.9497
AUC      : 0.9865
Precision: 0.9430
Recall   : 0.9579
F1 Score : 0.9504
MCC      : 0.8996


In [32]:
results_df = pd.DataFrame(results, columns=[
"Model","Accuracy","AUC","Precision",
"Recall","F1","MCC"
])

print("\n Model Comparison Table")
results_df



 Model Comparison Table


Unnamed: 0,Model,Accuracy,AUC,Precision,Recall,F1,MCC
0,Logistic Regression,0.724868,0.801568,0.690265,0.821053,0.75,0.45756
1,Decision Tree,0.957672,0.957699,0.962766,0.952632,0.957672,0.915398
2,KNN,0.902116,0.948936,0.852535,0.973684,0.909091,0.812425
3,Naive Bayes,0.687831,0.776148,0.660714,0.778947,0.714976,0.381282
4,Random Forest,0.965608,0.993743,0.978378,0.952632,0.965333,0.931551
5,XGBoost,0.949735,0.986506,0.943005,0.957895,0.950392,0.899573
