In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import joblib
import numpy as np


In [None]:
#  Load cleaned dataset
df = pd.read_csv("../data/heart_disease_cleaned.csv")

X = df.drop(columns=["target", "target_bin"], errors="ignore")
y = df["target_bin"]

print("Shape:", X.shape, y.shape)


Shape: (303, 20) (303,)


In [3]:
#  Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)


In [4]:
#  Randomized Search for SVM
param_dist = {
    "C": np.logspace(-3, 3, 7),
    "gamma": np.logspace(-3, 3, 7),
    "kernel": ["rbf", "poly", "sigmoid"]
}

svm = SVC(probability=True, random_state=42)

random_search = RandomizedSearchCV(
    estimator=svm,
    param_distributions=param_dist,
    n_iter=10,
    scoring="roc_auc",
    cv=5,
    random_state=42,
    n_jobs=-1
)

random_search.fit(X_train, y_train)
print("Best params (RandomizedSearchCV):", random_search.best_params_)


Best params (RandomizedSearchCV): {'kernel': 'poly', 'gamma': np.float64(1.0), 'C': np.float64(0.01)}


In [5]:
# Cell 4: Grid Search (refine around best params)
best_params = random_search.best_params_

param_grid = {
    "C": [best_params["C"]/2, best_params["C"], best_params["C"]*2],
    "gamma": [best_params["gamma"]/2, best_params["gamma"], best_params["gamma"]*2],
    "kernel": [best_params["kernel"]]
}

grid_search = GridSearchCV(
    estimator=svm,
    param_grid=param_grid,
    scoring="roc_auc",
    cv=5,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)
print("Best params (GridSearchCV):", grid_search.best_params_)


Best params (GridSearchCV): {'C': np.float64(0.005), 'gamma': np.float64(0.5), 'kernel': 'poly'}


In [6]:
#  Evaluate the tuned model on test set
best_svm = grid_search.best_estimator_

y_pred = best_svm.predict(X_test)
y_proba = best_svm.predict_proba(X_test)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("ROC-AUC:", roc_auc_score(y_test, y_proba))


Accuracy: 0.819672131147541
Precision: 1.0
Recall: 0.6071428571428571
F1: 0.7555555555555555
ROC-AUC: 0.9491341991341992


In [7]:
#  Save the tuned SVM model
MODEL_DIR = "../models"
best_model_path = MODEL_DIR + "/svm_tuned.joblib"

joblib.dump(best_svm, best_model_path)
print("Tuned SVM model saved to:", best_model_path)


Tuned SVM model saved to: ../models/svm_tuned.joblib
