In [32]:
# Set the seed for reproducibility
import random
import tensorflow as tf
seed = 42
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [33]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Load the dataset
train_data = pd.read_csv('sign_mnist_train.csv')
test_data = pd.read_csv('sign_mnist_test.csv')

# Extract labels and images
X_train = train_data.iloc[:, 1:].values
y_train = train_data.iloc[:, 0].values
X_test = test_data.iloc[:, 1:].values
y_test = test_data.iloc[:, 0].values

# Remove 'J' (label 9) and 'Z' (label 25)
valid_labels = [i for i in range(26) if i not in [9, 25]]
train_mask = np.isin(y_train, valid_labels)
test_mask = np.isin(y_test, valid_labels)

X_train = X_train[train_mask]
y_train = y_train[train_mask]
X_test = X_test[test_mask]
y_test = y_test[test_mask]

# Adjust labels to be in range 0-23 instead of 0-24
y_train = [i if i < 9 else i - 1 for i in y_train]
y_test = [i if i < 9 else i - 1 for i in y_test]

# Convert lists to numpy arrays
y_train = np.array(y_train)
y_test = np.array(y_test)

# Reshape images to 32x32 (since the images consists of 32x32 pixels)
X_train = X_train.reshape(-1, 32, 32, 1)
X_test = X_test.reshape(-1, 32, 32, 1)

# Normalize pixel values
X_train = X_train / 255.0
X_test = X_test / 255.0

# Create a fixed validation set and a test set from the testing data
X_val, X_final_test, y_val, y_final_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

# One-hot encode labels
y_train_enc = to_categorical(y_train, num_classes=24)
y_val_enc = to_categorical(y_val, num_classes=24)
y_final_test_enc = to_categorical(y_final_test, num_classes=24)


In [34]:
# Let's see how big it is
print(X_train.shape)
print(X_test.shape)
n_total = X_train.shape[0]

(27455, 32, 32, 1)
(7172, 32, 32, 1)


In [35]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

# Define the densely connected model
dense_model = Sequential([
    Flatten(input_shape=(32, 32, 1)),
    Dense(512, activation='relu'),
    Dense(256, activation='relu'),
    Dense(24, activation='softmax')
])

# Compile the model
dense_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history_dense = dense_model.fit(X_train, y_train_enc, epochs=20, validation_data=(X_val, y_val_enc))


Epoch 1/20


  super().__init__(**kwargs)


[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.2827 - loss: 2.3754 - val_accuracy: 0.5700 - val_loss: 1.3219
Epoch 2/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6973 - loss: 0.9249 - val_accuracy: 0.7100 - val_loss: 0.9362
Epoch 3/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8297 - loss: 0.5159 - val_accuracy: 0.7334 - val_loss: 0.9279
Epoch 4/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9095 - loss: 0.2890 - val_accuracy: 0.7437 - val_loss: 0.9216
Epoch 5/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9410 - loss: 0.1866 - val_accuracy: 0.7474 - val_loss: 1.0296
Epoch 6/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9664 - loss: 0.1147 - val_accuracy: 0.7579 - val_loss: 1.0527
Epoch 7/20
[1m858/858[0m [32m━━━━━━━

In [36]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Define the CNN model
cnn_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(24, activation='softmax')
])

# Compile the model
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history_cnn = cnn_model.fit(X_train, y_train_enc, epochs=20, validation_data=(X_val, y_val_enc))


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.5546 - loss: 1.5292 - val_accuracy: 0.8698 - val_loss: 0.4466
Epoch 2/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9949 - loss: 0.0373 - val_accuracy: 0.8859 - val_loss: 0.4554
Epoch 3/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9984 - loss: 0.0083 - val_accuracy: 0.8751 - val_loss: 0.5138
Epoch 4/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 1.0000 - loss: 0.0023 - val_accuracy: 0.8946 - val_loss: 0.5011
Epoch 5/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 1.0000 - loss: 5.2028e-04 - val_accuracy: 0.8988 - val_loss: 0.5283
Epoch 6/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 1.0000 - loss: 2.7991e-04 - val_accuracy: 0.9007 - val_loss: 0.5485
Epoch 7/20
[1m858/858[0m [32

In [37]:
from sklearn.metrics import confusion_matrix
def evaluate_model(name, model_name, X_final_test, y_final_test_enc, verbose = 2):
    model_eval = model_name.evaluate(X_final_test, y_final_test_enc, verbose = verbose)
    print(f"{name} Model - Test Accuracy: {model_eval[1]}")

    # Detailed evaluation of the model
    y_pred = np.argmax(model_name.predict(X_final_test), axis=1)

    # Compute accuracy per class, skipping index 9 (for J)
    accuracy_per_class = []
    for i in range(24):
        if np.sum(y_final_test == i) > 0:
            accuracy_per_class.append(np.mean(y_pred[y_final_test == i] == i))
        else:
            accuracy_per_class.append(np.nan)  # Handle classes with no samples

    # Filter out NaN values to calculate the median accuracy
    valid_accuracies = [acc for acc in accuracy_per_class if not np.isnan(acc)]
    median_accuracy = np.median(valid_accuracies)

    print(f"Unbiased Median Accuracy: {median_accuracy}")

    # Identify the letter with the highest individual accuracy
    highest_accuracy_class = np.nanargmax(accuracy_per_class)
    print(f"Letter with Highest Accuracy: {chr(highest_accuracy_class + ord('A'))}")

    # Identify the letter with the lowest individual accuracy
    lowest_accuracy_class = np.nanargmin(accuracy_per_class)
    print(f"Letter with Lowest Accuracy: {chr(lowest_accuracy_class + ord('A'))}")

    # Calculate the confusion matrix
  
    conf_matrix = confusion_matrix(y_final_test, y_pred)

    # Set the diagonal elements to zero to exclude correct classifications
    np.fill_diagonal(conf_matrix, 0)

    # Find the indices of the top three errors
    errors = np.unravel_index(np.argsort(-conf_matrix, axis=None), conf_matrix.shape)

    # Get the top three most common errors
    common_errors = [(chr(errors[0][i] + ord('A')), chr(errors[1][i] + ord('A'))) for i in range(3)]
    print(f"Most Common Errors: {common_errors}")

    # Report overall mean accuracy and accuracy per letter
    mean_accuracy = np.nanmean(accuracy_per_class)
    print(f"Overall Mean Accuracy: {mean_accuracy}")

    # Print each letter and its accuracy
    letters = [chr(i + ord('A')) for i in range(26) if i not in [9, 25]]
    for i, acc in enumerate(accuracy_per_class):
        print(f"Letter {letters[i]}: Accuracy {acc}")




In [38]:
evaluate_model("Dense",dense_model, X_val, y_val_enc, 2)

113/113 - 0s - 4ms/step - accuracy: 0.8363 - loss: 0.8788
Dense Model - Test Accuracy: 0.8363078832626343
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Unbiased Median Accuracy: 0.03864824495892457
Letter with Highest Accuracy: B
Letter with Lowest Accuracy: K
Most Common Errors: [('H', 'U'), ('C', 'E'), ('E', 'K')]
Overall Mean Accuracy: 0.04228168717173333
Letter A: Accuracy 0.06707317073170732
Letter B: Accuracy 0.07692307692307693
Letter C: Accuracy 0.019736842105263157
Letter D: Accuracy 0.023255813953488372
Letter E: Accuracy 0.051587301587301584
Letter F: Accuracy 0.038461538461538464
Letter G: Accuracy 0.03125
Letter H: Accuracy 0.07511737089201878
Letter I: Accuracy 0.06802721088435375
Letter K: Accuracy 0.043209876543209874
Letter L: Accuracy 0.009523809523809525
Letter M: Accuracy 0.07
Letter N: Accuracy 0.014184397163120567
Letter O: Accuracy 0.03305785123966942
Letter P: Accuracy 0.02857142857142857
Letter Q: Accuracy 0.02666666666666667
Letter

In [39]:
evaluate_model("CNN", cnn_model, X_val, y_val_enc, 2)

113/113 - 1s - 5ms/step - accuracy: 0.8957 - loss: 0.8947
CNN Model - Test Accuracy: 0.89570552110672
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
Unbiased Median Accuracy: 0.04366376180101671
Letter with Highest Accuracy: B
Letter with Lowest Accuracy: Q
Most Common Errors: [('C', 'E'), ('L', 'H'), ('O', 'E')]
Overall Mean Accuracy: 0.03948595416101395
Letter A: Accuracy 0.04878048780487805
Letter B: Accuracy 0.07692307692307693
Letter C: Accuracy 0.013157894736842105
Letter D: Accuracy 0.023255813953488372
Letter E: Accuracy 0.051587301587301584
Letter F: Accuracy 0.038461538461538464
Letter G: Accuracy 0.0375
Letter H: Accuracy 0.07511737089201878
Letter I: Accuracy 0.047619047619047616
Letter K: Accuracy 0.043209876543209874
Letter L: Accuracy 0.009523809523809525
Letter M: Accuracy 0.06
Letter N: Accuracy 0.04964539007092199
Letter O: Accuracy 0.03305785123966942
Letter P: Accuracy 0.02857142857142857
Letter Q: Accuracy 0.013333333333333334
Letter R: 

Part 2

In [40]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the data augmentation generator
datagen = ImageDataGenerator(
    rotation_range=10,        # Randomly rotate images by 10 degrees
    width_shift_range=0.1,    # Randomly translate images horizontally by 10% of the width
    height_shift_range=0.1,   # Randomly translate images vertically by 10% of the height
    zoom_range=0.1,           # Randomly zoom images by 10%
    horizontal_flip=True      # Randomly flip images horizontally
)

# Fit the generator to the training data
datagen.fit(X_train)

In [41]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
# Early stopping and learning rate reduction callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)


In [42]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

def create_dense_model_with_regularization(layer_sizes, learning_rate=0.001, l2_lambda=0.01):
    model = Sequential()
    model.add(Flatten(input_shape=(32, 32, 1)))
    for size in layer_sizes:
        model.add(Dense(size, activation='relu', kernel_regularizer=l2(l2_lambda)))
        model.add(BatchNormalization())
    model.add(Dense(24, activation='softmax', kernel_regularizer=l2(l2_lambda)))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [43]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

def create_cnn_model_with_regularization(conv_layers, dense_layers, learning_rate=0.001, l2_lambda=0.01):
    model = Sequential()
    for filters, kernel_size in conv_layers:
        model.add(Conv2D(filters, kernel_size, activation='relu', padding='same', kernel_regularizer=l2(l2_lambda), input_shape=(32, 32, 1)))
        model.add(BatchNormalization())
        model.add(MaxPooling2D((2, 2), padding='same'))
    model.add(Flatten())
    for size in dense_layers:
        model.add(Dense(size, activation='relu', kernel_regularizer=l2(l2_lambda)))
        model.add(BatchNormalization())
    model.add(Dense(24, activation='softmax', kernel_regularizer=l2(l2_lambda)))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [44]:
def return_best_model(group_name, model_group, X_train, y_train_enc, batch_size = 30):
    # Train and evaluate each  model
    model_histories = []
    for model in model_group:
        print(f'Model for {group_name}: {model}')
        history = model.fit(
            datagen.flow(X_train, y_train_enc, batch_size=batch_size),
            steps_per_epoch=len(X_train) // 30,
            epochs=50,
            validation_data=(X_val, y_val_enc),
            callbacks=[early_stopping, reduce_lr],
            verbose=1
        )
        val_accuracy = history.history['val_accuracy'][-1]
        model_histories.append((val_accuracy, model))

    # Determine the best model based on validation accuracy
    best_val_accuracy, best_model = max(model_histories, key=lambda item: item[0])

    print(f'The best model is: {best_model} with a validation accuracy of: {best_val_accuracy}')
    return model_histories

In [45]:
# Define different dense models to experiment with
dense_models = [
    create_dense_model_with_regularization([512, 256], learning_rate=0.001),
    create_dense_model_with_regularization([1024, 512, 256], learning_rate=0.001),
    create_dense_model_with_regularization([1024, 512, 256, 128], learning_rate=0.001)
]

  super().__init__(**kwargs)


In [46]:
# Define different CNN models with regularization to experiment with
cnn_models = [
    create_cnn_model_with_regularization([(32, (3, 3)), (64, (3, 3))], [128],learning_rate=0.001),
    create_cnn_model_with_regularization([(32, (3, 3)), (64, (3, 3)), (128, (3, 3))], [256], learning_rate=0.001),
    create_cnn_model_with_regularization([(32, (3, 3)), (64, (3, 3)), (128, (3, 3)), (256, (3, 3))], [512], learning_rate=0.001)
]



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [47]:
dense_histories = return_best_model("Dense", dense_models, X_train, y_train_enc, 30)

Model for Dense: <Sequential name=sequential_18, built=True>
Epoch 1/50
[1m  1/915[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m33:40[0m 2s/step - accuracy: 0.0333 - loss: 14.2928

  self._warn_if_super_not_called()


[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - accuracy: 0.2369 - loss: 6.9747 - val_accuracy: 0.0510 - val_loss: 16.1490 - learning_rate: 0.0010
Epoch 2/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153us/step - accuracy: 0.4000 - loss: 2.4090 - val_accuracy: 0.0508 - val_loss: 15.3376 - learning_rate: 0.0010
Epoch 3/50
[1m  1/915[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m42s[0m 47ms/step - accuracy: 0.3000 - loss: 2.7853

  self.gen.throw(value)


[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.3815 - loss: 2.4453 - val_accuracy: 0.1065 - val_loss: 5.9742 - learning_rate: 0.0010
Epoch 4/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135us/step - accuracy: 0.5333 - loss: 1.9002 - val_accuracy: 0.1576 - val_loss: 5.2179 - learning_rate: 0.0010
Epoch 5/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.4153 - loss: 2.2274 - val_accuracy: 0.0739 - val_loss: 7.9362 - learning_rate: 0.0010
Epoch 6/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133us/step - accuracy: 0.4667 - loss: 1.9804 - val_accuracy: 0.0661 - val_loss: 8.0234 - learning_rate: 0.0010
Epoch 7/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.4434 - loss: 2.1271 - val_accuracy: 0.2315 - val_loss: 3.0373 - learning_rate: 0.0010
Epoch 8/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

In [48]:
cnn_histories = return_best_model("CNN", dense_models, X_train, y_train_enc, 30)

Model for CNN: <Sequential name=sequential_18, built=True>
Epoch 1/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.6668 - loss: 1.3747 - val_accuracy: 0.7705 - val_loss: 1.1157 - learning_rate: 1.0000e-05
Epoch 2/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139us/step - accuracy: 0.6333 - loss: 1.4292 - val_accuracy: 0.7722 - val_loss: 1.1150 - learning_rate: 1.0000e-05
Epoch 3/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.6639 - loss: 1.3817 - val_accuracy: 0.7730 - val_loss: 1.0972 - learning_rate: 1.0000e-05
Epoch 4/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142us/step - accuracy: 0.8000 - loss: 1.2854 - val_accuracy: 0.7724 - val_loss: 1.0977 - learning_rate: 1.0000e-05
Epoch 5/50
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.6642 - loss: 1.3863 - val_accuracy: 0.7446 - val_loss: 1.1388 - learning_rate

In [49]:
# Combine all histories and models
all_histories = dense_histories + cnn_histories

# Select the best model based on validation accuracy
best_model = max(all_histories, key=lambda x: x[0])[1]


# Evaluate the best model on the final test set
final_eval = best_model.evaluate(X_final_test, y_final_test_enc, verbose=0)
print(f"Best Model: {best_model} - Final Test Accuracy: {final_eval[1]}")


Best Model: <Sequential name=sequential_18, built=True> - Final Test Accuracy: 0.7640825510025024


In [50]:
evaluate_model("CNN", best_model, X_final_test, y_final_test_enc)

113/113 - 0s - 1ms/step - accuracy: 0.7641 - loss: 1.1075
CNN Model - Test Accuracy: 0.7640825510025024
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Unbiased Median Accuracy: 0.7863119834710743
Letter with Highest Accuracy: K
Letter with Lowest Accuracy: Q
Most Common Errors: [('E', 'R'), ('L', 'R'), ('T', 'U')]
Overall Mean Accuracy: 0.7597572908084921
Letter A: Accuracy 0.7560975609756098
Letter B: Accuracy 0.9230769230769231
Letter C: Accuracy 0.9276315789473685
Letter D: Accuracy 0.689922480620155
Letter E: Accuracy 0.7182539682539683
Letter F: Accuracy 0.9153846153846154
Letter G: Accuracy 0.7875
Letter H: Accuracy 0.7934272300469484
Letter I: Accuracy 0.8571428571428571
Letter K: Accuracy 0.5987654320987654
Letter L: Accuracy 1.0
Letter M: Accuracy 0.47
Letter N: Accuracy 0.8865248226950354
Letter O: Accuracy 0.7851239669421488
Letter P: Accuracy 0.9257142857142857
Letter Q: Accuracy 0.8533333333333334
Letter R: Accuracy 0.36619718309859156
Letter S: