# 4. Supervised Learning - Classification Models

In [1]:
#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,confusion_matrix, roc_curve, classification_report)

In [2]:
# Load daat set
df = pd.read_csv('../data/heart_disease.csv')
X = df.iloc[:, 0:7]
y = df['target']

In [3]:
X

Unnamed: 0,thal,ca,cp,oldpeak,thalach,age,exang
0,6.0,0.0,1,0.370968,0.603053,0.708333,0
1,3.0,3.0,4,0.241935,0.282443,0.791667,1
2,7.0,2.0,4,0.419355,0.442748,0.791667,1
3,3.0,0.0,3,0.564516,0.885496,0.166667,0
4,3.0,0.0,2,0.225806,0.770992,0.250000,0
...,...,...,...,...,...,...,...
298,7.0,0.0,1,0.193548,0.465649,0.333333,0
299,7.0,2.0,4,0.548387,0.534351,0.812500,0
300,7.0,1.0,4,0.193548,0.335878,0.583333,1
301,3.0,1.0,2,0.000000,0.786260,0.583333,0


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
models = {}

In [6]:
lr = LogisticRegression(random_state=42, multi_class='ovr')
lr.fit(X_train, y_train)


y_pred = lr.predict(X_test)
y_proba = lr.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(y_test, y_proba, multi_class='ovr')

models["Logistic Regression"] = [accuracy, precision, recall, f1, roc_auc]
print(f"\nAccuracy (Accuracy): {accuracy:.4f}")
print(f"Precision (Precision): {precision:.4f}")
print(f"Recall (Recall): {recall:.4f}")
print(f"F1: {f1:.4f}")
print(f"ROC: {roc_auc:.4f}")
print(models)




Accuracy (Accuracy): 0.5574
Precision (Precision): 0.5555
Recall (Recall): 0.5574
F1: 0.4692
ROC: 0.8383
{'Logistic Regression': [0.5573770491803278, 0.5554708349218452, 0.5573770491803278, 0.46921675774134797, np.float64(0.8382791744212538)]}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [7]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)


y_pred = dt.predict(X_test)
y_proba = dt.predict_proba(X_test)


accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(y_test, y_proba, multi_class='ovr')

models["Decision Tree"] = [accuracy, precision, recall, f1, roc_auc]
print(f"\nAccuracy (Accuracy): {accuracy:.4f}")
print(f"Precision (Precision): {precision:.4f}")
print(f"Recall (Recall): {recall:.4f}")
print(f"F1: {f1:.4f}")
print(f"ROC: {roc_auc:.4f}")
print(models)


Accuracy (Accuracy): 0.5246
Precision (Precision): 0.4729
Recall (Recall): 0.5246
F1: 0.4966
ROC: 0.5864
{'Logistic Regression': [0.5573770491803278, 0.5554708349218452, 0.5573770491803278, 0.46921675774134797, np.float64(0.8382791744212538)], 'Decision Tree': [0.5245901639344263, 0.47292598112270245, 0.5245901639344263, 0.4966208608391804, np.float64(0.5864064083332621)]}


In [8]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
y_proba = rf.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(y_test, y_proba, multi_class='ovr')

models["Random Forest"] = [accuracy, precision, recall, f1, roc_auc]
print(f"\nAccuracy (Accuracy): {accuracy:.4f}")
print(f"Precision (Precision): {precision:.4f}")
print(f"Recall (Recall): {recall:.4f}")
print(f"F1: {f1:.4f}")
print(f"ROC: {roc_auc:.4f}")
print(models)


Accuracy (Accuracy): 0.5410
Precision (Precision): 0.5259
Recall (Recall): 0.5410
F1: 0.5159
ROC: 0.7617
{'Logistic Regression': [0.5573770491803278, 0.5554708349218452, 0.5573770491803278, 0.46921675774134797, np.float64(0.8382791744212538)], 'Decision Tree': [0.5245901639344263, 0.47292598112270245, 0.5245901639344263, 0.4966208608391804, np.float64(0.5864064083332621)], 'Random Forest': [0.5409836065573771, 0.5259402121504339, 0.5409836065573771, 0.5158801126014241, np.float64(0.7616878300019343)]}


In [9]:
svm = SVC(probability=True, random_state=42)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)
y_proba = svm.predict_proba(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(y_test, y_proba, multi_class='ovr')

models["Support Vector Machine (SVM)"] = [accuracy, precision, recall, f1, roc_auc]
print(f"\nAccuracy (Accuracy): {accuracy:.4f}")
print(f"Precision (Precision): {precision:.4f}")
print(f"Recall (Recall): {recall:.4f}")
print(f"F1: {f1:.4f}")
print(f"ROC: {roc_auc:.4f}")
print(models)


Accuracy (Accuracy): 0.5410
Precision (Precision): 0.3891
Recall (Recall): 0.5410
F1: 0.4418
ROC: 0.7909
{'Logistic Regression': [0.5573770491803278, 0.5554708349218452, 0.5573770491803278, 0.46921675774134797, np.float64(0.8382791744212538)], 'Decision Tree': [0.5245901639344263, 0.47292598112270245, 0.5245901639344263, 0.4966208608391804, np.float64(0.5864064083332621)], 'Random Forest': [0.5409836065573771, 0.5259402121504339, 0.5409836065573771, 0.5158801126014241, np.float64(0.7616878300019343)], 'Support Vector Machine (SVM)': [0.5409836065573771, 0.38907103825136613, 0.5409836065573771, 0.44183066900276613, np.float64(0.7908919802506139)]}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
with open("../results/evaluation_metrics.txt", "w") as f:
    for key, value in models.items():
        f.write("\n")
        f.write(f'{key}:')
        f.write(f"\nAccuracy: {value[0]}\nPrecision: {value[1]}\nRecall: {value[2]}\nF1: {value[3]}\nROC: {value[4]}\n")
        f.write("----------------------------")