# WARNING: This code doesn't work with the torch version that modAL needs, so to be able to run this script you need to do the following:
- Run "pip install --upgrade scikit-learn skorch" to update skorch
- Run this script
- Run "pip install skorch==0.9.0" to downgrade skorch to the newest version that is compatible with ModAL

In [11]:
from sklearn.model_selection import RandomizedSearchCV, cross_val_score, StratifiedKFold
from cnn_model import CNN
from skorch import NeuralNetClassifier
from sklearn.metrics import make_scorer, accuracy_score
import numpy as np
import torch
from utils import load_CIFAR, save_file
import warnings
from sklearn.exceptions import ConvergenceWarning
import time

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [12]:
RANDOM_SEED = 42

Define the parameter grid: This is done in a vertain way because certain solvers are only compatible with certain regularization methods! E.g. LBFGS is only compatible without regularization or L2. 

In [13]:
param_grid = {
    # CNN-specific parameters
    'module__input_size': [16, 32],
    'module__num_channels': [3],
    'module__l1_channels': [32, 64],
    'module__l1_kernel_size': [3, 5, 7],
    'module__l1_padding': [1, 'same'],
    'module__l1_stride': [1, 2], 
    'module__l2_channels': [64, 128],
    'module__l2_kernel_size': [3, 5, 7],
    'module__l2_max_pool_kernel_size': [3, 5],
    'module__l2_padding': [1, 'same'],
    'module__l2_stride': [1, 2],
    'module__l2_dropout': [0.25, 0.35, 0.5],
    'module__l3_dropout': [0.25, 0.35, 0.5],
    'module__l4_input': [512, 1024, 2048],
    'module__l4_dropout': [0.25, 0.35, 0.5],
    'module__l5_input': [1024, 2048],
    'module__output_size': [10],
    # Training-specific parameters
    'lr': [1e-4, 1e-3, 5e-3, 1e-2],
    'optimizer__weight_decay': [1e-4, 1e-3, 5e-3],
    'max_epochs': [1000, 1250, 1500],
    'batch_size': [32, 64, 128]
}


In [14]:
# Cross-validation settings
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Create the Logistic Regression model
cnn_basic = NeuralNetClassifier(
    module=CNN,
    criterion=torch.nn.CrossEntropyLoss,
    optimizer=torch.optim.Adam,
    train_split=None,
    verbose=0,
    device="cuda" if torch.cuda.is_available() else "cpu"
)

# Setup GridSearchCV
random_search = RandomizedSearchCV(
    estimator=cnn_basic,
    param_distributions=param_grid,
    n_iter=5,  # Number of configurations to try
    scoring='accuracy',
    cv=cv,
    random_state=RANDOM_SEED,
    verbose=3
)

Loading MNIST

In [15]:
X_train, y_train, _, _, _, _, _, _ = load_CIFAR(random_seed=RANDOM_SEED)

In [None]:
# Perform grid search
start = time.time()
random_search.fit(X_train, y_train)

# Output the best parameters and corresponding score
print("____________")
print("Best Parameters:", random_search.best_params_)
print("Best Cross-Validation Accuracy:", random_search.best_score_)
print("time:", time.time()-start)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


In [None]:
del(X_train)
del(y_train)
_, _, X_test, y_test, _, _, _, _ = load_CIFAR(random_seed=RANDOM_SEED)
# Evaluate the best model on the test set
best_model = random_search.best_estimator_
test_accuracy = best_model.score(X_test, y_test)
print("Test Accuracy with Best Model:", test_accuracy)

Save parameters

In [None]:
best_params = random_search.best_params_
print(best_params)
# transform them to the format of dict that I use in the shallow_classifier experiments
model_parameters={
    # CNN-specific parameters
    'input_size': best_params['module__input_size'],
    'num_channels': best_params['module__num_channels'],
    'l1_channels': best_params['module__l1_channels'],
    'l1_kernel_size': best_params['module__l1_kernel_size'],
    'l1_padding': best_params['module__l1_padding'],
    'l1_stride': best_params['module__l1_stride'], 
    'l2_channels': best_params['module__l2_channels'],
    'l2_kernel_size': best_params['module__l2_kernel_size'],
    'l2_max_pool_kernel_size': best_params['module__l2_max_pool_kernel_size'],
    'l2_padding': best_params['module__l2_padding'],
    'l2_stride': best_params['module__l2_stride'],
    'l2_dropout': best_params['module__l2_dropout'],
    'l3_dropout': best_params['module__l3_dropout'],
    'l4_input': best_params['module__l4_input'],
    'l4_dropout': best_params['module__l4_dropout'],
    'l5_input': best_params['module__l5_input'],
    'output_size': best_params['module__output_size'],
    # Training-specific parameters
    'lr': best_params['lr'],
    'weight_decay': best_params['optimizer__weight_decay'],
    'max_epochs': best_params['max_epochs'],
    'batch_size': best_params['batch_size']
}
save_file("deep_classifier_parameters.pkl", model_parameters)