# 🩺 Heart Disease Prediction Using Supervised Learning
This notebook demonstrates how to build machine learning models to predict heart disease based on healthcare data.

**Models Implemented:**
- Support Vector Machine (SVM)
- Random Forest
- Gradient Boosting (GBM)

**Evaluation Metrics:** Accuracy, Precision, Recall, F1-score, AUC-ROC

**Dataset Source:** UCI Machine Learning Repository (Cleveland Dataset)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import warnings
warnings.filterwarnings('ignore')

In [None]:
column_names = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg",
    "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"]
url = "https://raw.githubusercontent.com/rahulrajpl/UCI-Heart-Disease-Data-Cleaned/main/heart.csv"
df = pd.read_csv(url, names=column_names)
df.replace('?', np.nan, inplace=True)
df.dropna(inplace=True)
df['ca'] = pd.to_numeric(df['ca'])
df['thal'] = pd.to_numeric(df['thal'])
df['target'] = df['target'].apply(lambda x: 1 if int(x) > 0 else 0)
df.head()

In [None]:
X = df.drop("target", axis=1)
y = df["target"]
X = pd.get_dummies(X, columns=["cp", "thal", "slope"], drop_first=True)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
param_grid_svm = {'C': [1], 'gamma': ['scale'], 'kernel': ['rbf']}
param_grid_rf = {'n_estimators': [100], 'max_depth': [None], 'min_samples_split': [2]}
param_grid_gbm = {'n_estimators': [100], 'learning_rate': [0.1], 'max_depth': [3]}

grid_svm = GridSearchCV(SVC(probability=True), param_grid_svm, cv=5)
grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=5)
grid_gbm = GridSearchCV(GradientBoostingClassifier(), param_grid_gbm, cv=5)

grid_svm.fit(X_train, y_train)
grid_rf.fit(X_train, y_train)
grid_gbm.fit(X_train, y_train)

In [None]:
from sklearn.metrics import classification_report, roc_auc_score, roc_curve

def evaluate_model(model, name):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]
    report = classification_report(y_test, y_pred, output_dict=True)
    auc = roc_auc_score(y_test, y_prob)
    return {
        'Model': name,
        'Accuracy': report['accuracy'],
        'Precision': report['1']['precision'],
        'Recall': report['1']['recall'],
        'F1-score': report['1']['f1-score'],
        'AUC-ROC': auc
    }

results = [
    evaluate_model(grid_svm.best_estimator_, "SVM (RBF)"),
    evaluate_model(grid_rf.best_estimator_, "Random Forest"),
    evaluate_model(grid_gbm.best_estimator_, "Gradient Boosting")
]
results_df = pd.DataFrame(results)
results_df.round(4)

In [None]:
plt.figure(figsize=(10, 6))
models = [grid_svm, grid_rf, grid_gbm]
labels = ['SVM', 'Random Forest', 'Gradient Boosting']

for model, label in zip(models, labels):
    y_prob = model.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    plt.plot(fpr, tpr, label=f'{label}')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison')
plt.legend()
plt.grid(True)
plt.show()