In [5]:
from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.svm import SVC
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer, recall_score, accuracy_score, f1_score
import numpy as np



import pandas as pd

# Load the uploaded dataset
file_path = r"C:\Users\tobia\Desktop\Rensetdata.csv"
data = pd.read_csv(file_path)

# Separate features and target
X = data.drop(columns=["HeartDisease"])
y = data["HeartDisease"]

# Handle missing values using median imputation
imputer = SimpleImputer(strategy="median")
X_imputed = imputer.fit_transform(X)

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# Define the SVM with RBF kernel
svm_rbf = SVC(kernel="rbf")

# Set up the parameter grid for hyperparameter tuning
param_grid = {
    "C": [0.1, 1, 10, 100],
    "gamma": [0.001, 0.01, 0.1, 1]
}

# Define a custom scoring metric prioritizing recall but balancing accuracy
scoring = {
    "accuracy": make_scorer(accuracy_score),
    "recall": make_scorer(recall_score),
    "f1": make_scorer(f1_score)
}

# Use StratifiedKFold for consistent class distribution across folds
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Grid search with cross-validation
grid_search = GridSearchCV(
    estimator=svm_rbf,
    param_grid=param_grid,
    scoring=scoring,
    refit="f1",  # Optimize for recall
    cv=cv,
    verbose=1
)

# Fit the model
grid_search.fit(X_scaled, y)

# Extract the best parameters and corresponding metrics
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
best_scores = grid_search.cv_results_

# Compile and display results
results = {
    "Best Parameters": best_params,
    "Best Recall": grid_search.best_score_,
    "Accuracy (CV)": cross_val_score(best_model, X_scaled, y, cv=cv, scoring="accuracy").mean(),
    "F1 Score (CV)": cross_val_score(best_model, X_scaled, y, cv=cv, scoring="f1").mean()
}
results


Fitting 10 folds for each of 16 candidates, totalling 160 fits


{'Best Parameters': {'C': 1, 'gamma': 0.1},
 'Best Recall': np.float64(0.8469549287851056),
 'Accuracy (CV)': np.float64(0.8267801251956183),
 'F1 Score (CV)': np.float64(0.8469549287851056)}