In [1]:
# Jupyter Notebook Example: SVM Hyperparameter Tuning on Digits Dataset

# ==============================================
# 1. Import necessary libraries
# ==============================================
import numpy as np
import pandas as pd

from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

import warnings
warnings.filterwarnings("ignore")  # To keep the notebook clean

# ==============================================
# 2. Load dataset
# ==============================================
digits = load_digits()
X = digits.data
y = digits.target

print("Dataset shape:", X.shape)
print("Number of classes:", len(np.unique(y)))

# ==============================================
# 3. Split the data
# ==============================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Training set shape:", X_train.shape)
print("Test set shape:    ", X_test.shape)

# ==============================================
# 4. Baseline model (default hyperparameters)
# ==============================================
# We'll use a simple Pipeline that scales the data 
# and then applies a default SVC.
baseline_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(random_state=42))
])

baseline_pipeline.fit(X_train, y_train)
y_pred_baseline = baseline_pipeline.predict(X_test)

baseline_accuracy = accuracy_score(y_test, y_pred_baseline)
print("\nBaseline Accuracy:", baseline_accuracy)
print("Classification Report (Baseline):")
print(classification_report(y_test, y_pred_baseline))

# ==============================================
# 5. Hyperparameter tuning with GridSearchCV
# ==============================================
# Let's define a parameter grid for SVC. 
# We'll tune the regularization parameter C, 
# the kernel type, and gamma (for the 'rbf' or 'poly' kernel).
param_grid = {
    'svc__C': [0.1, 1, 10],
    'svc__kernel': ['rbf', 'poly'],
    'svc__gamma': ['scale', 'auto', 0.01, 0.001]
}

# We'll reuse the same pipeline structure (Scaler + SVC).
tuned_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(random_state=42))
])

grid_search = GridSearchCV(
    estimator=tuned_pipeline,
    param_grid=param_grid,
    cv=5,              # 5-fold cross-validation
    scoring='accuracy',
    n_jobs=-1,         # Use all available cores
    verbose=1
)

grid_search.fit(X_train, y_train)

print("\nBest Hyperparameters:", grid_search.best_params_)
print("Best CV Score:       ", grid_search.best_score_)

# ==============================================
# 6. Evaluate the tuned model on the test set
# ==============================================
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)

tuned_accuracy = accuracy_score(y_test, y_pred_tuned)
print("\nTuned Model Accuracy:", tuned_accuracy)
print("Classification Report (Tuned):")
print(classification_report(y_test, y_pred_tuned))

# ==============================================
# 7. Compare Baseline vs. Tuned
# ==============================================
print(f"Baseline Accuracy: {baseline_accuracy:.4f}")
print(f"Tuned Accuracy:    {tuned_accuracy:.4f}")

improvement = (tuned_accuracy - baseline_accuracy) * 100
print(f"Accuracy Improvement: {improvement:.2f}%")


Dataset shape: (1797, 64)
Number of classes: 10
Training set shape: (1437, 64)
Test set shape:     (360, 64)

Baseline Accuracy: 0.975
Classification Report (Baseline):
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        36
           1       0.95      0.97      0.96        36
           2       1.00      1.00      1.00        35
           3       1.00      1.00      1.00        37
           4       0.95      0.97      0.96        36
           5       0.97      1.00      0.99        37
           6       0.97      1.00      0.99        36
           7       0.92      0.97      0.95        36
           8       1.00      0.91      0.96        35
           9       1.00      0.92      0.96        36

    accuracy                           0.97       360
   macro avg       0.98      0.97      0.97       360
weighted avg       0.98      0.97      0.97       360

Fitting 5 folds for each of 24 candidates, totalling 120 fits

Best Hype