# Notebook 04 â€” Hyperparameter Tuning Experiments

Goal: tune key classical models and compare performance before and after tuning.
We also compare tuning on the full feature set versus the selected feature set.

Models tuned
- Logistic Regression
- SVM with RBF kernel
- Random Forest

Evaluation protocol
- Cross validation F1 score on the training split for model selection
- Hold out test metrics on the fixed test split for reporting

Key outputs
- A tuned full versus tuned selected comparison table saved in outputs
- A before versus after tuning table saved in outputs
- Best parameter sets saved in outputs



## Setup and shared utilities

Load the dataset, define the fixed train and test split, and define helper functions.
Artifacts are saved at the project root to keep notebooks clean.



In [1]:
# Import required libraries.
import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from pathlib import Path

ROOT_DIR = Path("..")
FIG_DIR = ROOT_DIR / "figures"
OUT_DIR = ROOT_DIR / "outputs"
FIG_DIR.mkdir(parents=True, exist_ok=True)
OUT_DIR.mkdir(parents=True, exist_ok=True)

SEED = 42
np.random.seed(SEED)

data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="target")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=SEED, stratify=y
)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

## Test evaluation helper

Compute test accuracy, F1, and ROC AUC for a fitted pipeline.
The positive class is class 1 as in scikit learn breast cancer target encoding.



In [2]:
# Run the next step of the pipeline.
def eval_on_test(pipe, X_test, y_test):
    y_pred = pipe.predict(X_test)
    y_proba = pipe.predict_proba(X_test)[:, 1]
    return {
        "test_acc": accuracy_score(y_test, y_pred),
        "test_f1": f1_score(y_test, y_pred),
        "test_auc": roc_auc_score(y_test, y_proba),
    }

## Baseline models

Define default pipelines for each model to serve as the before tuning reference.



In [3]:

# Define baseline pipelines.
lr_before = Pipeline([
    ("scaler", StandardScaler()),
    ("model", LogisticRegression(max_iter=5000, random_state=SEED))
])

svm_before = Pipeline([
    ("scaler", StandardScaler()),
    ("model", SVC(kernel="rbf", probability=True, random_state=SEED))
])

rf_before = Pipeline([
    ("scaler", StandardScaler()),
    ("model", RandomForestClassifier(random_state=SEED))
])

print("Baseline LR test:", eval_on_test(lr_before.fit(X_train, y_train), X_test, y_test))
print("Baseline SVM test:", eval_on_test(svm_before.fit(X_train, y_train), X_test, y_test))
print("Baseline RF test:", eval_on_test(rf_before.fit(X_train, y_train), X_test, y_test))


Baseline LR test: {'test_acc': 0.9824561403508771, 'test_f1': 0.9861111111111112, 'test_auc': 0.9953703703703703}
Baseline SVM test: {'test_acc': 0.9824561403508771, 'test_f1': 0.9861111111111112, 'test_auc': 0.9950396825396826}
Baseline RF test: {'test_acc': 0.956140350877193, 'test_f1': 0.9655172413793104, 'test_auc': 0.9938822751322751}


## Hyperparameter tuning on full features

Tune each model on the full feature set using randomized search.



In [4]:
# Scale numeric features using training data only.
lr_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("model", LogisticRegression(max_iter=5000, random_state=SEED))
])

lr_params = {
    "model__C": np.logspace(-3, 2, 20),
    "model__penalty": ["l2"],
    "model__solver": ["lbfgs"]
}

lr_search = RandomizedSearchCV(
    lr_pipe, lr_params, n_iter=15, scoring="f1", cv=cv, random_state=SEED, n_jobs=-1
)
lr_search.fit(X_train, y_train)

print("Best LR params:", lr_search.best_params_)
print("Best LR CV F1:", lr_search.best_score_)

lr_best = lr_search.best_estimator_
print("LR tuned test:", eval_on_test(lr_best, X_test, y_test))

Best LR params: {'model__solver': 'lbfgs', 'model__penalty': 'l2', 'model__C': np.float64(0.12742749857031335)}
Best LR CV F1: 0.9878255609037587
LR tuned test: {'test_acc': 0.9736842105263158, 'test_f1': 0.9793103448275862, 'test_auc': 0.9957010582010581}




In [5]:
# Scale numeric features using training data only.
svm_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("model", SVC(probability=True, random_state=SEED))
])

svm_params = {
    "model__kernel": ["rbf"],
    "model__C": np.logspace(-2, 2, 25),
    "model__gamma": np.logspace(-4, 0, 25),
}

svm_search = RandomizedSearchCV(
    svm_pipe, svm_params, n_iter=25, scoring="f1", cv=cv, random_state=SEED, n_jobs=-1
)
svm_search.fit(X_train, y_train)

print("Best SVM params:", svm_search.best_params_)
print("Best SVM CV F1:", svm_search.best_score_)

svm_best = svm_search.best_estimator_
print("SVM tuned test:", eval_on_test(svm_best, X_test, y_test))

Best SVM params: {'model__kernel': 'rbf', 'model__gamma': np.float64(0.01), 'model__C': np.float64(6.812920690579608)}
Best SVM CV F1: 0.9810153888265501
SVM tuned test: {'test_acc': 0.9824561403508771, 'test_f1': 0.9861111111111112, 'test_auc': 0.9976851851851851}


In [6]:
# Scale numeric features using training data only.
rf_pipe = Pipeline([
    ("scaler", StandardScaler()),  # keep consistent; RF does not strictly need scaling
    ("model", RandomForestClassifier(random_state=SEED))
])

rf_params = {
    "model__n_estimators": [200, 400, 600, 800],
    "model__max_depth": [None, 3, 5, 8, 12],
    "model__min_samples_leaf": [1, 2, 4, 8],
    "model__max_features": ["sqrt", "log2", None],
}

rf_search = RandomizedSearchCV(
    rf_pipe, rf_params, n_iter=25, scoring="f1", cv=cv, random_state=SEED, n_jobs=-1
)
rf_search.fit(X_train, y_train)

print("Best RF params:", rf_search.best_params_)
print("Best RF CV F1:", rf_search.best_score_)

rf_best = rf_search.best_estimator_
print("RF tuned test:", eval_on_test(rf_best, X_test, y_test))


Best RF params: {'model__n_estimators': 600, 'model__min_samples_leaf': 1, 'model__max_features': 'log2', 'model__max_depth': 5}
Best RF CV F1: 0.9701813114156721
RF tuned test: {'test_acc': 0.956140350877193, 'test_f1': 0.9655172413793104, 'test_auc': 0.9920634920634921}


## Selected feature set

Load the selected features from Notebook 03 if available.
If the file is not found, fall back to a default list.



In [7]:

import json

default_selected_features = [
    "mean texture",
    "mean concave points",
    "radius error",
    "fractal dimension error",
    "worst radius",
    "worst texture",
    "worst area",
    "worst smoothness",
    "worst concave points",
    "worst symmetry",
]

sel_path = OUT_DIR / "03_selected_features_l1.json"
if sel_path.exists():
    with open(sel_path, "r") as f:
        selected_features = json.load(f)
else:
    selected_features = default_selected_features

X_train_sel = X_train[selected_features]
X_test_sel = X_test[selected_features]

print("Selected features:", len(selected_features))
print("Full X_train:", X_train.shape, "| Selected X_train:", X_train_sel.shape)
print("Full X_test:", X_test.shape, "| Selected X_test:", X_test_sel.shape)


Selected features: 10
Full X_train: (455, 30) | Selected X_train: (455, 10)
Full X_test: (114, 30) | Selected X_test: (114, 10)


## Hyperparameter tuning on selected features

Repeat the tuning procedure using only the selected feature subset.



In [8]:

# Tune Logistic Regression on selected features.
lr_pipe_sel = Pipeline([
    ("scaler", StandardScaler()),
    ("model", LogisticRegression(max_iter=5000, random_state=SEED))
])

lr_params_sel = {
    "model__C": np.logspace(-3, 2, 20),
    "model__penalty": ["l2"],
    "model__solver": ["lbfgs"],
}

lr_search_sel = RandomizedSearchCV(
    lr_pipe_sel, lr_params_sel, n_iter=15, scoring="f1", cv=cv, random_state=SEED, n_jobs=-1
)
lr_search_sel.fit(X_train_sel, y_train)

print("Best LR selected params:", lr_search_sel.best_params_)
print("Best LR selected CV F1:", lr_search_sel.best_score_)

lr_best_sel = lr_search_sel.best_estimator_
print("LR tuned selected test:", eval_on_test(lr_best_sel, X_test_sel, y_test))


# Tune SVM on selected features.
svm_pipe_sel = Pipeline([
    ("scaler", StandardScaler()),
    ("model", SVC(kernel="rbf", probability=True, random_state=SEED))
])

svm_params_sel = {
    "model__kernel": ["rbf"],
    "model__C": np.logspace(-2, 2, 25),
    "model__gamma": np.logspace(-4, 0, 25),
}

svm_search_sel = RandomizedSearchCV(
    svm_pipe_sel, svm_params_sel, n_iter=25, scoring="f1", cv=cv, random_state=SEED, n_jobs=-1
)
svm_search_sel.fit(X_train_sel, y_train)

print("Best SVM selected params:", svm_search_sel.best_params_)
print("Best SVM selected CV F1:", svm_search_sel.best_score_)

svm_best_sel = svm_search_sel.best_estimator_
print("SVM tuned selected test:", eval_on_test(svm_best_sel, X_test_sel, y_test))


# Tune Random Forest on selected features.
rf_pipe_sel = Pipeline([
    ("scaler", StandardScaler()),
    ("model", RandomForestClassifier(random_state=SEED))
])

rf_params_sel = {
    "model__n_estimators": [200, 400, 600, 800],
    "model__max_depth": [None, 3, 5, 8, 12],
    "model__min_samples_leaf": [1, 2, 4, 8],
    "model__max_features": ["sqrt", "log2", None],
}

rf_search_sel = RandomizedSearchCV(
    rf_pipe_sel, rf_params_sel, n_iter=25, scoring="f1", cv=cv, random_state=SEED, n_jobs=-1
)
rf_search_sel.fit(X_train_sel, y_train)

print("Best RF selected params:", rf_search_sel.best_params_)
print("Best RF selected CV F1:", rf_search_sel.best_score_)

rf_best_sel = rf_search_sel.best_estimator_
print("RF tuned selected test:", eval_on_test(rf_best_sel, X_test_sel, y_test))


Best LR selected params: {'model__solver': 'lbfgs', 'model__penalty': 'l2', 'model__C': np.float64(2.636650898730358)}
Best LR selected CV F1: 0.982485315330225
LR tuned selected test: {'test_acc': 0.9736842105263158, 'test_f1': 0.9790209790209791, 'test_auc': 0.9966931216931216}




Best SVM selected params: {'model__kernel': 'rbf', 'model__gamma': np.float64(0.004641588833612777), 'model__C': np.float64(14.67799267622069)}
Best SVM selected CV F1: 0.9826668447152942
SVM tuned selected test: {'test_acc': 0.9824561403508771, 'test_f1': 0.9861111111111112, 'test_auc': 0.9953703703703702}
Best RF selected params: {'model__n_estimators': 800, 'model__min_samples_leaf': 1, 'model__max_features': 'log2', 'model__max_depth': None}
Best RF selected CV F1: 0.9772347819855971
RF tuned selected test: {'test_acc': 0.9649122807017544, 'test_f1': 0.9722222222222222, 'test_auc': 0.996031746031746}


## Tuned full versus tuned selected comparison

Build a single table that compares tuned models trained on full features versus selected features.
Save the table for the report.



In [9]:

def _row(name, cv_f1, model, Xte, yte):
    metrics = eval_on_test(model, Xte, yte)
    return {"model": name, "cv_f1": float(cv_f1), **metrics}

comparison = pd.DataFrame([
    _row("LogReg_tuned_full", lr_search.best_score_, lr_best, X_test, y_test),
    _row("SVM_tuned_full", svm_search.best_score_, svm_best, X_test, y_test),
    _row("RF_tuned_full", rf_search.best_score_, rf_best, X_test, y_test),
    _row("LogReg_tuned_selected", lr_search_sel.best_score_, lr_best_sel, X_test_sel, y_test),
    _row("SVM_tuned_selected", svm_search_sel.best_score_, svm_best_sel, X_test_sel, y_test),
    _row("RF_tuned_selected", rf_search_sel.best_score_, rf_best_sel, X_test_sel, y_test),
])

comparison["model_family"] = comparison["model"].str.extract(r"^(LogReg|SVM|RF)", expand=False)
comparison["feature_set"] = comparison["model"].str.extract(r"_(full|selected)$", expand=False)

family_order = {"LogReg": 0, "SVM": 1, "RF": 2}
feature_order = {"full": 0, "selected": 1}

comparison["_fam"] = comparison["model_family"].map(family_order)
comparison["_fs"] = comparison["feature_set"].map(feature_order)

comparison = comparison.sort_values(["_fam", "_fs"]).drop(columns=["_fam", "_fs"])

comparison.to_csv(OUT_DIR / "04_tuned_full_vs_selected.csv", index=False)
print("Saved:", OUT_DIR / "04_tuned_full_vs_selected.csv")

comparison


Saved: ../outputs/04_tuned_full_vs_selected.csv


Unnamed: 0,model,cv_f1,test_acc,test_f1,test_auc,model_family,feature_set
0,LogReg_tuned_full,0.987826,0.973684,0.97931,0.995701,LogReg,full
3,LogReg_tuned_selected,0.982485,0.973684,0.979021,0.996693,LogReg,selected
1,SVM_tuned_full,0.981015,0.982456,0.986111,0.997685,SVM,full
4,SVM_tuned_selected,0.982667,0.982456,0.986111,0.99537,SVM,selected
2,RF_tuned_full,0.970181,0.95614,0.965517,0.992063,RF,full
5,RF_tuned_selected,0.977235,0.964912,0.972222,0.996032,RF,selected


## Before versus after tuning

Compute baseline cross validation metrics and compare them with tuned results.
Save the table for the report.



In [10]:

from sklearn.model_selection import cross_validate

def cv_f1_auc(pipe, Xtr, ytr):
    scores = cross_validate(pipe, Xtr, ytr, cv=cv, scoring={"f1": "f1", "auc": "roc_auc"})
    return float(np.mean(scores["test_f1"])), float(np.mean(scores["test_auc"]))

rows = []

lr_before_f1, lr_before_auc = cv_f1_auc(lr_before, X_train, y_train)
rows.append({"model": "LogReg", "stage": "before (default)", "cv_f1": lr_before_f1, "cv_auc": lr_before_auc})
rows.append({"model": "LogReg", "stage": "after tuned (full)", "cv_f1": float(lr_search.best_score_), "cv_auc": cv_f1_auc(lr_best, X_train, y_train)[1]})
rows.append({"model": "LogReg", "stage": "after tuned (selected)", "cv_f1": float(lr_search_sel.best_score_), "cv_auc": cv_f1_auc(lr_best_sel, X_train_sel, y_train)[1]})

svm_before_f1, svm_before_auc = cv_f1_auc(svm_before, X_train, y_train)
rows.append({"model": "SVM(RBF)", "stage": "before (default)", "cv_f1": svm_before_f1, "cv_auc": svm_before_auc})
rows.append({"model": "SVM(RBF)", "stage": "after tuned (full)", "cv_f1": float(svm_search.best_score_), "cv_auc": cv_f1_auc(svm_best, X_train, y_train)[1]})
rows.append({"model": "SVM(RBF)", "stage": "after tuned (selected)", "cv_f1": float(svm_search_sel.best_score_), "cv_auc": cv_f1_auc(svm_best_sel, X_train_sel, y_train)[1]})

rf_before_f1, rf_before_auc = cv_f1_auc(rf_before, X_train, y_train)
rows.append({"model": "RandomForest", "stage": "before (default)", "cv_f1": rf_before_f1, "cv_auc": rf_before_auc})
rows.append({"model": "RandomForest", "stage": "after tuned (full)", "cv_f1": float(rf_search.best_score_), "cv_auc": cv_f1_auc(rf_best, X_train, y_train)[1]})
rows.append({"model": "RandomForest", "stage": "after tuned (selected)", "cv_f1": float(rf_search_sel.best_score_), "cv_auc": cv_f1_auc(rf_best_sel, X_train_sel, y_train)[1]})

before_after = pd.DataFrame(rows)

before_vals = before_after[before_after["stage"] == "before (default)"][["model", "cv_f1", "cv_auc"]].rename(
    columns={"cv_f1": "before_f1", "cv_auc": "before_auc"}
)
before_after = before_after.merge(before_vals, on="model", how="left")
before_after["delta_f1_vs_before"] = before_after["cv_f1"] - before_after["before_f1"]
before_after["delta_auc_vs_before"] = before_after["cv_auc"] - before_after["before_auc"]
before_after = before_after.drop(columns=["before_f1", "before_auc"])

stage_order = {"before (default)": 0, "after tuned (full)": 1, "after tuned (selected)": 2}
model_order = {"LogReg": 0, "SVM(RBF)": 1, "RandomForest": 2}

before_after["_m"] = before_after["model"].map(model_order)
before_after["_s"] = before_after["stage"].map(stage_order)
before_after = before_after.sort_values(["_m", "_s"]).drop(columns=["_m", "_s"])

before_after.to_csv(OUT_DIR / "04_before_after_tuning.csv", index=False)
print("Saved:", OUT_DIR / "04_before_after_tuning.csv")

before_after




Saved: ../outputs/04_before_after_tuning.csv


Unnamed: 0,model,stage,cv_f1,cv_auc,delta_f1_vs_before,delta_auc_vs_before
0,LogReg,before (default),0.982544,0.995872,0.0,0.0
1,LogReg,after tuned (full),0.987826,0.995253,0.005281,-0.000619195
2,LogReg,after tuned (selected),0.982485,0.995872,-5.9e-05,1.110223e-16
3,SVM(RBF),before (default),0.975615,0.995562,0.0,0.0
4,SVM(RBF),after tuned (full),0.981015,0.996491,0.0054,0.0009287926
5,SVM(RBF),after tuned (selected),0.982667,0.994324,0.007052,-0.00123839
6,RandomForest,before (default),0.969935,0.989577,0.0,0.0
7,RandomForest,after tuned (full),0.970181,0.990402,0.000246,0.0008255934
8,RandomForest,after tuned (selected),0.977235,0.993292,0.0073,0.00371517


## Best parameter sets

Save the best parameter sets so the final configuration is explicit and reproducible.



In [11]:

import json

params = {
    "logreg_full": getattr(lr_search, "best_params_", None),
    "svm_full": getattr(svm_search, "best_params_", None),
    "rf_full": getattr(rf_search, "best_params_", None),
    "logreg_selected": getattr(lr_search_sel, "best_params_", None),
    "svm_selected": getattr(svm_search_sel, "best_params_", None),
    "rf_selected": getattr(rf_search_sel, "best_params_", None),
}

with open(OUT_DIR / "04_best_params.json", "w") as f:
    json.dump(params, f, indent=2, default=str)

print("Saved:", OUT_DIR / "04_best_params.json")


Saved: ../outputs/04_best_params.json
