# Supervised Learning
This notebook trains various supervised learning classification models including Logistic Regression, Decision Trees, Random Forest and SVM, to predict heart disease.

## 1. Importing necessary libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

## 2. Loading the feature selected dataset

In [None]:
df = pd.read_csv("../data/heart_disease.csv")

X = df.drop("target", axis=1)
y = df["target"]

## 3. Splitting the data into training and testing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

y_test_bin = label_binarize(y_test, classes=np.unique(y_test))

## 4. Training and Evaluating Models

### 4.1 Logistic Regression model

In [None]:
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)

y_pred = lr.predict(X_test)

print("=== Logistic Regression Evaluation ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average="weighted"))
print("Recall:", recall_score(y_test, y_pred, average="weighted"))
print("F1 Score:", f1_score(y_test, y_pred, average="weighted"))
print("AUC Score:", roc_auc_score(y_test, lr.predict_proba(X_test), multi_class="ovr"))

plt.figure(figsize=(8,6))
for i, cols in enumerate(np.unique(y_test)):
    fpr, tpr, threshold = roc_curve(y_test_bin[:, i], lr.predict_proba(X_test)[:, i])
    plt.plot(fpr, tpr, label=f"Class {cols} (AUC={auc(fpr, tpr):.2f})")

plt.plot([0,1],[0,1],'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for Logistic Regression")
plt.legend()
plt.show()

### 4.2 Decision Tree model

In [None]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

y_pred = dt.predict(X_test)

print("=== Decision Tree Evaluation ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average="weighted"))
print("Recall:", recall_score(y_test, y_pred, average="weighted"))
print("F1 Score:", f1_score(y_test, y_pred, average="weighted"))
print("AUC Score:", roc_auc_score(y_test, dt.predict_proba(X_test), multi_class="ovr"))

plt.figure(figsize=(8,6))
for i, cols in enumerate(np.unique(y_test)):
    fpr, tpr, threshold = roc_curve(y_test_bin[:, i], dt.predict_proba(X_test)[:, i])
    plt.plot(fpr, tpr, label=f"Class {cols} (AUC={auc(fpr, tpr):.2f})")

plt.plot([0,1],[0,1],'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for Decision Tree")
plt.legend()
plt.show()

### 4.3 Random Forest model

In [None]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print("=== Random Forest Evaluation ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average="weighted"))
print("Recall:", recall_score(y_test, y_pred, average="weighted"))
print("F1 Score:", f1_score(y_test, y_pred, average="weighted"))
print("AUC Score:", roc_auc_score(y_test, rf.predict_proba(X_test), multi_class="ovr"))

plt.figure(figsize=(8,6))
for i, cols in enumerate(np.unique(y_test)):
    fpr, tpr, threshold = roc_curve(y_test_bin[:, i], rf.predict_proba(X_test)[:, i])
    plt.plot(fpr, tpr, label=f"Class {cols} (AUC={auc(fpr, tpr):.2f})")

plt.plot([0,1],[0,1],'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for Random Forest")
plt.legend()
plt.show()

### 4.4 Support Vector Machine model

In [None]:
svm = SVC(probability=True, random_state=42)
svm.fit(X_train, y_train)

y_pred = svm.predict(X_test)

print("=== SVM Evaluation ===")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average="weighted"))
print("Recall:", recall_score(y_test, y_pred, average="weighted"))
print("F1 Score:", f1_score(y_test, y_pred, average="weighted"))
print("AUC Score:", roc_auc_score(y_test, svm.predict_proba(X_test), multi_class="ovr"))

plt.figure(figsize=(8,6))
for i, cols in enumerate(np.unique(y_test)):
    fpr, tpr, threshold = roc_curve(y_test_bin[:, i], svm.predict_proba(X_test)[:, i])
    plt.plot(fpr, tpr, label=f"Class {cols} (AUC={auc(fpr, tpr):.2f})")

plt.plot([0,1],[0,1],'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for SVM")
plt.legend()
plt.show()

## 5. Evaluation between models using ROC curve

In [None]:
plt.figure(figsize=(10, 6))

# Logistic Regression
fpr, tpr, threshold = roc_curve(y_test_bin.ravel(), lr.predict_proba(X_test).ravel())
plt.plot(fpr, tpr, label=f'Logistic Regression (AUC={auc(fpr, tpr):.2f})')

# Decision Tree
fpr, tpr, threshold = roc_curve(y_test_bin.ravel(), dt.predict_proba(X_test).ravel())
plt.plot(fpr, tpr, label=f'Decision Tree (AUC={auc(fpr, tpr):.2f})')

# Random Forest
fpr, tpr, threshold = roc_curve(y_test_bin.ravel(), rf.predict_proba(X_test).ravel())
plt.plot(fpr, tpr, label=f'Random Forest (AUC={auc(fpr, tpr):.2f})')

# SVM
fpr, tpr, threshold = roc_curve(y_test_bin.ravel(), svm.predict_proba(X_test).ravel())
plt.plot(fpr, tpr, label=f'SVM (AUC={auc(fpr, tpr):.2f})')

plt.plot([0,1], [0,1], "k--")
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve between models')
plt.legend()
plt.show()