# Cross-Validation

Demonstrate K-Fold cross-validation and a GridSearch for a Ridge regression alpha parameter.

In [None]:
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.datasets import make_regression
import numpy as np
np.random.seed(0)

In [None]:
# Create synthetic regression data
X, y = make_regression(n_samples=200, n_features=10, noise=10.0, random_state=1)
kf = KFold(n_splits=5, shuffle=True, random_state=1)
scores = cross_val_score(Ridge(alpha=1.0), X, y, cv=kf, scoring='neg_mean_squared_error')
print('5-fold CV MSE (neg):', scores)
print('Mean MSE:', np.mean(scores))

In [None]:
# Grid search over alpha for Ridge
param_grid = {'alpha': [0.01, 0.1, 1.0, 10.0]}
grid = GridSearchCV(Ridge(), param_grid, scoring='neg_mean_squared_error', cv=5)
grid.fit(X, y)
print('Best params:', grid.best_params_)
print('Best CV score (neg MSE):', grid.best_score_)

## Notes
- Use cross-validation to estimate generalization performance.
- GridSearchCV performs CV internally and finds best hyperparameters.