In [1]:
from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import make_scorer, accuracy_score
import numpy as np
from utils import load_MNIST, save_file
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [2]:
RANDOM_SEED = 42

Define the parameter grid: This is done in a vertain way because certain solvers are only compatible with certain regularization methods! E.g. LBFGS is only compatible without regularization or L2. 

In [3]:
C_options = [0.01, 0.1, 1.0]
tol_options = [1e-4, 1e-3]
max_iter_options = [50, 100, 150]
param_grid = [
    {"solver": 
         ["sag", "lbfgs", "newton-cg"], "penalty": ["l2"], "C": C_options, "tol": tol_options, "max_iter": max_iter_options},
    {"solver": 
         ["liblinear"], "penalty": ["l1", "l2"], "C": C_options, "tol": tol_options, "max_iter": max_iter_options},
    {"solver": 
         ["saga"], "penalty": ["l1", "l2", "elasticnet"], "C": C_options, "tol": tol_options, "max_iter": max_iter_options, "l1_ratio": [0.25, 0.5, 0.75]},
]

In [4]:
# Cross-validation settings
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Create the Logistic Regression model
log_reg = LogisticRegression(multi_class='auto', random_state=RANDOM_SEED)

# Setup GridSearchCV
grid_search = GridSearchCV(
    estimator=log_reg,
    param_grid=param_grid,
    scoring=make_scorer(accuracy_score),
    cv=cv,
    n_jobs=-1,
    verbose=3
)

Loading MNIST

In [5]:
X_train, y_train, X_test, y_test, _, _, _, _ = load_MNIST(random_seed=RANDOM_SEED)

In [6]:
# Perform grid search
grid_search.fit(X_train, y_train)

# Output the best parameters and corresponding score
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

# Evaluate the best model on the test set
best_model = grid_search.best_estimator_
test_accuracy = best_model.score(X_test, y_test)
print("Test Accuracy with Best Model:", test_accuracy)

Fitting 5 folds for each of 252 candidates, totalling 1260 fits
Best Parameters: {'C': 0.1, 'max_iter': 50, 'penalty': 'l2', 'solver': 'newton-cg', 'tol': 0.0001}
Best Cross-Validation Accuracy: 0.9206041666666668
Test Accuracy with Best Model: 0.9239


Save parameters

In [7]:
best_params = grid_search.best_params_
# transform them to the format of dict that I use in the shallow_classifier experiments
model_parameters={
    "max_iterations_per_epoch": best_params['max_iter'],
    "regularization": best_params['penalty'], # l1', 'l2', 'elasticnet' or None
    "regularization_strength": best_params['C'], # smaller values mean stronger regularization
    "solver": best_params['solver'],
    "early_stopping_tol": best_params['tol']
}

save_file("shallow_classifier_parameters.pkl", model_parameters)