In [3]:
import numpy as np
import tensorflow as tf
import random
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix

# Set the seed for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

set_seed()

# Load the dataset
train_data = pd.read_csv('sign_mnist_train.csv')
test_data = pd.read_csv('sign_mnist_test.csv')

# Extract labels and images
X_train = train_data.iloc[:, 1:].values
y_train = train_data.iloc[:, 0].values
X_test = test_data.iloc[:, 1:].values
y_test = test_data.iloc[:, 0].values

# Remove 'J' (label 9) and 'Z' (label 25)
valid_labels = [i for i in range(26) if i not in [9, 25]]
train_mask = np.isin(y_train, valid_labels)
test_mask = np.isin(y_test, valid_labels)

X_train = X_train[train_mask]
y_train = y_train[train_mask]
X_test = X_test[test_mask]
y_test = y_test[test_mask]

# Adjust labels to be in range 0-23 instead of 0-24
y_train = [i if i < 9 else i - 1 for i in y_train]
y_test = [i if i < 9 else i - 1 for i in y_test]

# Convert lists to numpy arrays
y_train = np.array(y_train)
y_test = np.array(y_test)

# Reshape images to 32x32 (since the images consists of 32x32 pixels)
X_train = X_train.reshape(-1, 32, 32, 1)
X_test = X_test.reshape(-1, 32, 32, 1)

# Normalize pixel values
X_train = X_train / 255.0
X_test = X_test / 255.0

# Create a fixed validation set and a test set from the testing data
X_val, X_final_test, y_val, y_final_test = train_test_split(X_test, y_test, test_size=0.5, stratify=y_test, random_state=42)

# One-hot encode labels
y_train_enc = to_categorical(y_train, num_classes=24)
y_val_enc = to_categorical(y_val, num_classes=24)
y_final_test_enc = to_categorical(y_final_test, num_classes=24)

# Define the data augmentation generator
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)
datagen.fit(X_train)

# Define the model creation function
def create_cnn_model_with_regularization(learning_rate=0.001, l2_lambda=0.01, num_conv_layers=2, conv_filter_base=32, num_dense_units=128):
    set_seed(42)
    model = Sequential()
    for i in range(num_conv_layers):
        filters = conv_filter_base * (2 ** i)
        model.add(Conv2D(filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(l2_lambda), input_shape=(32, 32, 1) if i == 0 else ()))
        model.add(BatchNormalization())
        model.add(MaxPooling2D((2, 2), padding='same'))
    model.add(Flatten())
    model.add(Dense(num_dense_units, activation='relu', kernel_regularizer=l2(l2_lambda)))
    model.add(BatchNormalization())
    model.add(Dense(24, activation='softmax', kernel_regularizer=l2(l2_lambda)))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Wrap the model using KerasClassifier
model = KerasClassifier(build_fn=create_cnn_model_with_regularization, verbose=0)

# Check available parameters
params = model.get_params().keys()
print("Available parameters:", params)

# Set up hyperparameter grid
param_grid = {
    'learning_rate': [0.001, 0.0005, 0.0001],
    'l2_lambda': [0.01, 0.005],
    'num_conv_layers': [2, 3],
    'conv_filter_base': [32, 64],
    'num_dense_units': [128, 256, 512],
    'batch_size': [32, 64],
    'epochs': [10]  # Use fewer epochs for faster grid search, increase after finding the best hyperparameters
}

# Perform Grid Search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, y_train_enc)

# Print the results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, std, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, std, param))


Available parameters: dict_keys(['model', 'build_fn', 'warm_start', 'random_state', 'optimizer', 'loss', 'metrics', 'batch_size', 'validation_batch_size', 'verbose', 'callbacks', 'validation_split', 'shuffle', 'run_eagerly', 'epochs', 'class_weight'])


ValueError: Invalid parameter conv_filter_base for estimator KerasClassifier.
This issue can likely be resolved by setting this parameter in the KerasClassifier constructor:
`KerasClassifier(conv_filter_base=32)`
Check the list of available parameters with `estimator.get_params().keys()`