# Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.model_selection import cross_val_score

In [2]:
# Load feature-selected dataset
df_selected = pd.read_csv("data/heart_disease_selected.csv")

label_col = "target"
X = df_selected.drop(columns=[label_col])
y = df_selected[label_col]

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Grid Search

In [3]:
# Define parameter grid for GridSearch
param_grid = {
    "C": [0.1, 1, 10, 100],
    "gamma": [1, 0.1, 0.01, 0.001],
    "kernel": ["rbf", "poly", "sigmoid"]
}

grid = GridSearchCV(
    SVC(probability=True, random_state=42),
    param_grid,
    cv=5,
    n_jobs=-1,
    verbose=1
)

grid.fit(X_train, y_train)

print("Best Parameters (GridSearch):", grid.best_params_)
print("Best AUC (GridSearch):", grid.best_score_)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best Parameters (GridSearch): {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
Best AUC (GridSearch): 0.8427721088435375


# Randomized Search

In [4]:
# Define parameter distributions for RandomizedSearch
param_dist = {
    "C": np.logspace(-2, 2, 20),
    "gamma": np.logspace(-3, 1, 20),
    "kernel": ["rbf", "poly", "sigmoid"]
}

random_search = RandomizedSearchCV(
    SVC(probability=True, random_state=42),
    param_distributions=param_dist,
    n_iter=100,
    cv=5,
    random_state=42,
    n_jobs=-1,
    verbose=1
)

random_search.fit(X_train, y_train)

print("Best Parameters (RandomizedSearch):", random_search.best_params_)
print("Best AUC (RandomizedSearch):", random_search.best_score_)

Fitting 5 folds for each of 100 candidates, totalling 500 fits
Best Parameters (RandomizedSearch): {'kernel': 'rbf', 'gamma': np.float64(0.011288378916846888), 'C': np.float64(1.2742749857031335)}
Best AUC (RandomizedSearch): 0.8468537414965986


# Comparing Results

In [5]:
best_svm = random_search.best_estimator_

y_pred = best_svm.predict(X_test)
y_proba = best_svm.predict_proba(X_test)[:,1]

print("Classification Report (Test Data):\n", classification_report(y_test, y_pred))
print("Test AUC:", roc_auc_score(y_test, y_proba))

Classification Report (Test Data):
               precision    recall  f1-score   support

           0       0.94      0.91      0.92        33
           1       0.90      0.93      0.91        28

    accuracy                           0.92        61
   macro avg       0.92      0.92      0.92        61
weighted avg       0.92      0.92      0.92        61

Test AUC: 0.9588744588744589


In [6]:
baseline_svm = SVC(kernel="rbf", probability=True, random_state=42)
scores_base = cross_val_score(baseline_svm, X, y, cv=5, scoring="roc_auc")

tuned_svm = random_search.best_estimator_
scores_tuned = cross_val_score(tuned_svm, X, y, cv=5, scoring="roc_auc")

print("Baseline mean AUC:", scores_base.mean())
print("Tuned mean AUC:", scores_tuned.mean())

Baseline mean AUC: 0.8990886042969375
Tuned mean AUC: 0.9092968374218374


# Thanks