# Q30: k-Fold Cross-Validation and Hyperparameter Tuning

- Implement k-fold cross-validation on a dataset.
- Use GridSearchCV or RandomizedSearchCV to tune hyperparameters for models like SVM or Decision Trees.
- Document improvements.

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler

In [2]:
# Load dataset
iris = load_iris()
X = iris.data
y = iris.target

In [3]:
# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
# --- k-Fold Cross-Validation (SVM) ---
svm = SVC()
cv_scores = cross_val_score(svm, X_scaled, y, cv=5)
print('SVM mean CV accuracy (default params):', np.mean(cv_scores))

SVM mean CV accuracy (default params): 0.9666666666666666


In [5]:
# --- GridSearchCV for SVM ---
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}
grid = GridSearchCV(SVC(), param_grid, cv=5)
grid.fit(X_scaled, y)
print('Best SVM params:', grid.best_params_)
print('Best SVM CV accuracy:', grid.best_score_)

Best SVM params: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
Best SVM CV accuracy: 0.9733333333333334


In [6]:
# --- Decision Tree with GridSearchCV ---
param_grid_tree = {
    'max_depth': [2, 4, 6, None],
    'min_samples_split': [2, 5, 10]
}

In [7]:
grid_tree = GridSearchCV(DecisionTreeClassifier(), param_grid_tree, cv=5)
grid_tree.fit(X_scaled, y)
print('Best Decision Tree params:', grid_tree.best_params_)
print('Best Decision Tree CV accuracy:', grid_tree.best_score_)

Best Decision Tree params: {'max_depth': 4, 'min_samples_split': 2}
Best Decision Tree CV accuracy: 0.9666666666666668


In [8]:
# --- Document Improvements ---
print('GridSearchCV improves model selection by finding optimal hyperparameters, leading to higher cross-validation accuracy compared to default settings.')

GridSearchCV improves model selection by finding optimal hyperparameters, leading to higher cross-validation accuracy compared to default settings.
