In [3]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

X, y = load_digits(return_X_y=True)
X_tr, X_ts, y_tr, y_ts = train_test_split(X, y, random_state=42, test_size=0.3)

In [4]:
# Define a pipeline to search for the best combination of PCA truncation and classifier regularization.
scaler   = StandardScaler()
logistic = SGDClassifier(loss='log_loss', penalty='l2', max_iter=20000, tol=1e-6, random_state=0)
pca      = PCA()
pipe     = Pipeline(steps=[('scaler', scaler), ('pca', pca), ('logistic', logistic)])

# Define the parameter grid
param_grid = {
    'pca__n_components': [5, 20, 30, 40, 50, 64],
    'logistic__alpha': np.logspace(-4, 4, 5) }

# Use GridSearchCV to search for the best parameters
search = GridSearchCV(pipe, param_grid, cv=5, return_train_score=False)
search.fit(X_tr, y_tr)

print(f"Best parameter (CV score = {search.best_score_:0.3f}):")
print(f"Best parameters: {search.best_params_}")

Best parameter (CV score = 0.936):
Best parameters: {'logistic__alpha': 0.01, 'pca__n_components': 50}
