# SVM for Heart Disease Dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

X = pd.read_csv("../preprocessing/preprocessed_heart_disease_X.csv")
y = pd.read_csv("../preprocessing/preprocessed_heart_disease_y.csv")

# convert to binary classification
y_binary = (y > 0).astype(int).values   # convert to NumPy array
scaler = StandardScaler()
Xs = scaler.fit_transform(X)  # this becomes a NumPy array

assert len(X) == len(y_binary), "X and y lengths mismatch — check indexes."
print("OK — X and y aligned.")

OK — X and y aligned.


## Training & Evaluation

In [2]:
y = np.ravel(y)  # flatten to 1D
y_binary = (y > 0).astype(int)
scaler = StandardScaler()
Xs = scaler.fit_transform(X)  
k = 5
kfold = KFold(n_splits=k, shuffle=True, random_state=42)
acc_linear = []
acc_rbf = []

# --- K-Fold Cross-Validation loop ---
for fold, (train_index, test_index) in enumerate(kfold.split(Xs)):
    print(f"\n--- Fold {fold + 1} ---")
    
    # Split data
    X_train, X_test = Xs[train_index], Xs[test_index]
    y_train, y_test = y_binary[train_index], y_binary[test_index]
    
    # --- Linear SVM ---
    svm_linear = SVC(kernel='linear', C=1, random_state=42)
    svm_linear.fit(X_train, y_train)
    y_pred_linear = svm_linear.predict(X_test)
    acc_l = accuracy_score(y_test, y_pred_linear)
    print(f"Linear SVM Accuracy: {acc_l:.4f}")
    acc_linear.append(acc_l)
    
    # --- RBF SVM ---
    svm_rbf = SVC(kernel='rbf', C=1, gamma='scale', random_state=42)
    svm_rbf.fit(X_train, y_train)
    y_pred_rbf = svm_rbf.predict(X_test)
    acc_r = accuracy_score(y_test, y_pred_rbf)
    print(f"RBF SVM Accuracy: {acc_r:.4f}")
    acc_rbf.append(acc_r)


print("\n=== Cross-Validation Results ===")
print(f"Linear SVM Average Accuracy: {np.mean(acc_linear):.4f} ± {np.std(acc_linear):.4f}")
print(f"RBF    SVM Average Accuracy: {np.mean(acc_rbf):.4f} ± {np.std(acc_rbf):.4f}")


--- Fold 1 ---
Linear SVM Accuracy: 0.8500
RBF SVM Accuracy: 0.8500

--- Fold 2 ---
Linear SVM Accuracy: 0.7333
RBF SVM Accuracy: 0.7500

--- Fold 3 ---
Linear SVM Accuracy: 0.8305
RBF SVM Accuracy: 0.7966

--- Fold 4 ---
Linear SVM Accuracy: 0.8644
RBF SVM Accuracy: 0.7627

--- Fold 5 ---
Linear SVM Accuracy: 0.8305
RBF SVM Accuracy: 0.8305

=== Cross-Validation Results ===
Linear SVM Average Accuracy: 0.8218 ± 0.0460
RBF    SVM Average Accuracy: 0.7980 ± 0.0382
