In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Load the dataset
train_data = pd.read_csv('sign_mnist_train.csv')
test_data = pd.read_csv('sign_mnist_test.csv')

# Extract labels and images
X_train = train_data.iloc[:, 1:].values
y_train = train_data.iloc[:, 0].values
X_test = test_data.iloc[:, 1:].values
y_test = test_data.iloc[:, 0].values

# Remove 'J' (label 9) and 'Z' (label 25)
valid_labels = [i for i in range(26) if i not in [9, 25]]
train_mask = np.isin(y_train, valid_labels)
test_mask = np.isin(y_test, valid_labels)

X_train = X_train[train_mask]
y_train = y_train[train_mask]
X_test = X_test[test_mask]
y_test = y_test[test_mask]

# Adjust labels to be in range 0-23 instead of 0-24
y_train = [i if i < 9 else i - 1 for i in y_train]
y_test = [i if i < 9 else i - 1 for i in y_test]

# Convert lists to numpy arrays
y_train = np.array(y_train)
y_test = np.array(y_test)

# Reshape images to 32x32 (since the images consists of 32x32 pixels)
X_train = X_train.reshape(-1, 32, 32, 1)
X_test = X_test.reshape(-1, 32, 32, 1)

# Normalize pixel values
X_train = X_train / 255.0
X_test = X_test / 255.0

# Create a fixed validation set and a test set from the testing data
X_val, X_final_test, y_val, y_final_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

# One-hot encode labels
y_train_enc = to_categorical(y_train, num_classes=24)
y_val_enc = to_categorical(y_val, num_classes=24)
y_final_test_enc = to_categorical(y_final_test, num_classes=24)


In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

# Define the densely connected model
dense_model = Sequential([
    Flatten(input_shape=(32, 32, 1)),
    Dense(512, activation='relu'),
    Dense(256, activation='relu'),
    Dense(24, activation='softmax')
])

# Compile the model
dense_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history_dense = dense_model.fit(X_train, y_train_enc, epochs=20, validation_data=(X_val, y_val_enc))


  super().__init__(**kwargs)


Epoch 1/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 14ms/step - accuracy: 0.2865 - loss: 2.3323 - val_accuracy: 0.5591 - val_loss: 1.3532
Epoch 2/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 13ms/step - accuracy: 0.7035 - loss: 0.8978 - val_accuracy: 0.6539 - val_loss: 1.0439
Epoch 3/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 14ms/step - accuracy: 0.8377 - loss: 0.4874 - val_accuracy: 0.6439 - val_loss: 1.3930
Epoch 4/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 14ms/step - accuracy: 0.9163 - loss: 0.2651 - val_accuracy: 0.7507 - val_loss: 0.9535
Epoch 5/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 13ms/step - accuracy: 0.9522 - loss: 0.1582 - val_accuracy: 0.7705 - val_loss: 0.8952
Epoch 6/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 14ms/step - accuracy: 0.9640 - loss: 0.1203 - val_accuracy: 0.7607 - val_loss: 1.0375
Epoch 7/20
[1m8

In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Define the CNN model
cnn_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(24, activation='softmax')
])

# Compile the model
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history_cnn = cnn_model.fit(X_train, y_train_enc, epochs=20, validation_data=(X_val, y_val_enc))


  super().__init__(


Epoch 1/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 18ms/step - accuracy: 0.5604 - loss: 1.4881 - val_accuracy: 0.8670 - val_loss: 0.4635
Epoch 2/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 17ms/step - accuracy: 0.9952 - loss: 0.0320 - val_accuracy: 0.8890 - val_loss: 0.4206
Epoch 3/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 17ms/step - accuracy: 0.9978 - loss: 0.0109 - val_accuracy: 0.8940 - val_loss: 0.4951
Epoch 4/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 17ms/step - accuracy: 1.0000 - loss: 8.8514e-04 - val_accuracy: 0.9032 - val_loss: 0.5021
Epoch 5/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 20ms/step - accuracy: 1.0000 - loss: 3.7818e-04 - val_accuracy: 0.9004 - val_loss: 0.5507
Epoch 6/20
[1m858/858[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 19ms/step - accuracy: 1.0000 - loss: 2.0534e-04 - val_accuracy: 0.9046 - val_loss: 0.5338
Epoc

In [20]:
dense_eval = dense_model.evaluate(X_final_test, y_final_test_enc, verbose=2)
print(dense_eval)

113/113 - 0s - 2ms/step - accuracy: 0.8249 - loss: 0.8499
[0.8499348163604736, 0.8248745203018188]


In [21]:
def evaluate_model(model_name, X_final_test, y_final_test_enc, verbose = 2):
    model_eval = model_name.evaluate(X_final_test, y_final_test_enc, verbose = verbose)
    print(f"Dense Model - Test Accuracy: {model_eval[1]}")

    # Detailed evaluation of the model
    y_pred = np.argmax(model_name.predict(X_final_test), axis=1)

    # Compute accuracy per class, skipping index 9 (for J)
    accuracy_per_class = []
    for i in range(24):
        if np.sum(y_final_test == i) > 0:
            accuracy_per_class.append(np.mean(y_pred[y_final_test == i] == i))
        else:
            accuracy_per_class.append(np.nan)  # Handle classes with no samples

    # Filter out NaN values to calculate the median accuracy
    valid_accuracies = [acc for acc in accuracy_per_class if not np.isnan(acc)]
    median_accuracy = np.median(valid_accuracies)

    print(f"Unbiased Median Accuracy: {median_accuracy}")

    # Identify the letter with the highest individual accuracy
    highest_accuracy_class = np.nanargmax(accuracy_per_class)
    print(f"Letter with Highest Accuracy: {chr(highest_accuracy_class + ord('A'))}")

    # Identify the letter with the lowest individual accuracy
    lowest_accuracy_class = np.nanargmin(accuracy_per_class)
    print(f"Letter with Lowest Accuracy: {chr(lowest_accuracy_class + ord('A'))}")

    # Calculate the confusion matrix
    from sklearn.metrics import confusion_matrix
    conf_matrix = confusion_matrix(y_final_test, y_pred)

    # Set the diagonal elements to zero to exclude correct classifications
    np.fill_diagonal(conf_matrix, 0)

    # Find the indices of the top three errors
    errors = np.unravel_index(np.argsort(-conf_matrix, axis=None), conf_matrix.shape)

    # Get the top three most common errors
    common_errors = [(chr(errors[0][i] + ord('A')), chr(errors[1][i] + ord('A'))) for i in range(3)]
    print(f"Most Common Errors: {common_errors}")

    # Report overall mean accuracy and accuracy per letter
    mean_accuracy = np.nanmean(accuracy_per_class)
    print(f"Overall Mean Accuracy: {mean_accuracy}")

    # Print each letter and its accuracy
    letters = [chr(i + ord('A')) for i in range(26) if i not in [9, 25]]
    for i, acc in enumerate(accuracy_per_class):
        print(f"Letter {letters[i]}: Accuracy {acc}")




In [22]:
evaluate_model(dense_model, X_final_test, y_final_test_enc, 2)

113/113 - 0s - 2ms/step - accuracy: 0.8249 - loss: 0.8499
Dense Model - Test Accuracy: 0.8248745203018188
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Unbiased Median Accuracy: 0.7914728682170542
Letter with Highest Accuracy: A
Letter with Lowest Accuracy: T
Most Common Errors: [('W', 'V'), ('J', 'T'), ('R', 'L')]
Overall Mean Accuracy: 0.809177981499935
Letter A: Accuracy 1.0
Letter B: Accuracy 0.9140271493212669
Letter C: Accuracy 0.9407894736842105
Letter D: Accuracy 0.7829457364341085
Letter E: Accuracy 1.0
Letter F: Accuracy 0.9
Letter G: Accuracy 0.94375
Letter H: Accuracy 0.9624413145539906
Letter I: Accuracy 0.9319727891156463
Letter K: Accuracy 0.5987654320987654
Letter L: Accuracy 1.0
Letter M: Accuracy 0.8
Letter N: Accuracy 0.7659574468085106
Letter O: Accuracy 0.6528925619834711
Letter P: Accuracy 1.0
Letter Q: Accuracy 0.8533333333333334
Letter R: Accuracy 0.7464788732394366
Letter S: Accuracy 0.5677966101694916
Letter T: Accuracy 0.651162790

In [23]:
evaluate_model(cnn_model, X_final_test, y_final_test_enc, 2)

113/113 - 1s - 6ms/step - accuracy: 0.9359 - loss: 0.4460
Dense Model - Test Accuracy: 0.9358617067337036
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
Unbiased Median Accuracy: 0.958125
Letter with Highest Accuracy: A
Letter with Lowest Accuracy: Q
Most Common Errors: [('S', 'G'), ('I', 'X'), ('T', 'J')]
Overall Mean Accuracy: 0.9253847631816399
Letter A: Accuracy 1.0
Letter B: Accuracy 0.9547511312217195
Letter C: Accuracy 0.993421052631579
Letter D: Accuracy 0.9922480620155039
Letter E: Accuracy 1.0
Letter F: Accuracy 1.0
Letter G: Accuracy 0.95625
Letter H: Accuracy 0.9859154929577465
Letter I: Accuracy 0.8231292517006803
Letter K: Accuracy 0.8827160493827161
Letter L: Accuracy 1.0
Letter M: Accuracy 0.96
Letter N: Accuracy 0.851063829787234
Letter O: Accuracy 0.8347107438016529
Letter P: Accuracy 1.0
Letter Q: Accuracy 1.0
Letter R: Accuracy 0.5774647887323944
Letter S: Accuracy 1.0
Letter T: Accuracy 0.7286821705426356
Letter U: Accuracy 0.86764705882

Part 2

In [36]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the data augmentation generator
datagen = ImageDataGenerator(
    rotation_range=10,        # Randomly rotate images by 10 degrees
    width_shift_range=0.1,    # Randomly translate images horizontally by 10% of the width
    height_shift_range=0.1,   # Randomly translate images vertically by 10% of the height
    zoom_range=0.1,           # Randomly zoom images by 10%
    horizontal_flip=True      # Randomly flip images horizontally
)

# Fit the generator to the training data
datagen.fit(X_train)

In [25]:
# from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
# # Early stopping and learning rate reduction callbacks
# early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)


In [37]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

def create_dense_model_with_regularization(layer_sizes, dropout_rate=0.5, learning_rate=0.001, l2_lambda=0.01):
    model = Sequential()
    model.add(Flatten(input_shape=(32, 32, 1)))
    for size in layer_sizes:
        model.add(Dense(size, activation='relu', kernel_regularizer=l2(l2_lambda)))
        model.add(Dropout(dropout_rate))
        model.add(BatchNormalization())
    model.add(Dense(24, activation='softmax', kernel_regularizer=l2(l2_lambda)))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [42]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

def create_cnn_model_with_regularization(conv_layers, dense_layers, dropout_rate=0.3, learning_rate=0.001, l2_lambda=0.01):
    model = Sequential()
    for filters, kernel_size in conv_layers:
        model.add(Conv2D(filters, kernel_size, activation='relu', kernel_regularizer=l2(l2_lambda), input_shape=(32, 32, 1)))
        model.add(MaxPooling2D((2, 2)))
        model.add(Dropout(dropout_rate))
        model.add(BatchNormalization())
    model.add(Flatten())
    for size in dense_layers:
        model.add(Dense(size, activation='relu', kernel_regularizer=l2(l2_lambda)))
        model.add(Dropout(dropout_rate))
        model.add(BatchNormalization())
    model.add(Dense(24, activation='softmax', kernel_regularizer=l2(l2_lambda)))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [43]:
# Define different dense models to experiment with
dense_models = [
    create_dense_model_with_regularization([512, 256], dropout_rate=0.3, learning_rate=0.001),
    create_dense_model_with_regularization([1024, 512, 256], dropout_rate=0.3, learning_rate=0.001),
    create_dense_model_with_regularization([1024, 512, 256, 128], dropout_rate=0.3, learning_rate=0.001)
]

# Train and evaluate each dense model
dense_histories = []
for dense_model in dense_models:
    print(f'Dense model: {dense_model}')
    history = dense_model.fit(
        datagen.flow(X_train, y_train_enc, batch_size=30),
        steps_per_epoch=len(X_train) // 30,
        epochs=20,
        validation_data=(X_val, y_val_enc),
        # callbacks=[early_stopping, reduce_lr],
        verbose=1
    )
    val_accuracy = history.history['val_accuracy'][-1]
    dense_histories.append((val_accuracy, dense_model))

Dense model: <Sequential name=sequential_28, built=True>
Epoch 1/20
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 27ms/step - accuracy: 0.0597 - loss: 7.5303 - val_accuracy: 0.0480 - val_loss: 3.9478
Epoch 2/20
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 590us/step - accuracy: 0.0000e+00 - loss: 1.7902 - val_accuracy: 0.0480 - val_loss: 3.9419
Epoch 3/20
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 28ms/step - accuracy: 0.0536 - loss: 3.4457 - val_accuracy: 0.0257 - val_loss: 3.2948
Epoch 4/20
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 453us/step - accuracy: 0.0333 - loss: 1.6270 - val_accuracy: 0.0257 - val_loss: 3.2946
Epoch 5/20
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 27ms/step - accuracy: 0.0573 - loss: 3.2035 - val_accuracy: 0.0349 - val_loss: 3.2226
Epoch 6/20
[1m915/915[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 475us/step - accuracy: 0.0333 - loss: 1.60

KeyboardInterrupt: 

In [29]:
# Define different CNN models with regularization to experiment with
cnn_models = [
    create_cnn_model_with_regularization([(32, (3, 3)), (64, (3, 3))], [128], dropout_rate=0.3, learning_rate=0.001),
    create_cnn_model_with_regularization([(32, (3, 3)), (64, (3, 3)), (128, (3, 3))], [256], dropout_rate=0.3, learning_rate=0.001),
    create_cnn_model_with_regularization([(32, (3, 3)), (64, (3, 3)), (128, (3, 3)), (256, (3, 3))], [512], dropout_rate=0.3, learning_rate=0.001)
]

# Train and evaluate each CNN model
cnn_histories = []
for cnn_model in cnn_models:
    print(f'CNN model: {cnn_model}')
    history = cnn_model.fit(
        datagen.flow(X_train, y_train_enc, batch_size=30),
        steps_per_epoch=len(X_train) // 30,
        epochs=20,
        validation_data=(X_val, y_val_enc),
        # callbacks=[early_stopping, reduce_lr],
        verbose=1
    )
    val_accuracy = history.history['val_accuracy'][-1]
    cnn_histories.append((val_accuracy, cnn_model))


  super().__init__(


CNN model: <Sequential name=sequential_13, built=True>
Epoch 1/50
[1m857/857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 47ms/step - accuracy: 0.1280 - loss: 5.5636 - val_accuracy: 0.5583 - val_loss: 2.4991 - learning_rate: 0.0010
Epoch 2/50
[1m857/857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 823us/step - accuracy: 0.3125 - loss: 1.4754 - val_accuracy: 0.5697 - val_loss: 2.4557 - learning_rate: 0.0010
Epoch 3/50
[1m857/857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 33ms/step - accuracy: 0.4303 - loss: 2.6974 - val_accuracy: 0.5951 - val_loss: 2.2233 - learning_rate: 0.0010
Epoch 4/50
[1m857/857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.3438 - loss: 1.4228 - val_accuracy: 0.5706 - val_loss: 2.2568 - learning_rate: 0.0010
Epoch 5/50
[1m857/857[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 35ms/step - accuracy: 0.5061 - loss: 2.5087 - val_accuracy: 0.7362 - val_loss: 1.9534 - learning_rate: 0.0010
Epoch 6

KeyboardInterrupt: 

In [31]:
# Let's see how big it is
print(X_train.shape)
print(X_test.shape)
n_total = X_train.shape[0]

(27455, 32, 32, 1)
(7172, 32, 32, 1)


In [None]:
# Combine all histories and models
all_histories = dense_histories + cnn_histories

# Select the best model based on validation accuracy
best_model = max(all_histories, key=lambda x: x[0])[1]

# Evaluate the best model on the final test set
final_eval = best_model.evaluate(X_final_test, y_final_test_enc, verbose=0)
print(f"Best Model - Final Test Accuracy: {final_eval[1]}")


In [None]:
# Predict on the final test set
y_pred = np.argmax(best_model.predict(X_final_test), axis=1)

# Compute accuracy per class
accuracy_per_class = []
for i in range(24):
    if np.sum(y_final_test == i) > 0:
        accuracy = np.mean(y_pred[y_final_test == i] == i)
        accuracy_per_class.append(accuracy)
    else:
        accuracy_per_class.append(np.nan)  # Handle classes with no samples

# Filter out NaN values to calculate the median accuracy
valid_accuracies = [acc for acc in accuracy_per_class if not np.isnan(acc)]
median_accuracy = np.median(valid_accuracies)

print(f"Unbiased Median Accuracy: {median_accuracy}")

# Identify the letter with the highest individual accuracy
highest_accuracy_class = np.nanargmax(accuracy_per_class)
print(f"Letter with Highest Accuracy: {chr(highest_accuracy_class + ord('A'))}")

# Identify the letter with the lowest individual accuracy
lowest_accuracy_class = np.nanargmin(accuracy_per_class)
print(f"Letter with Lowest Accuracy: {chr(lowest_accuracy_class + ord('A'))}")

# Calculate the confusion matrix
conf_matrix = confusion_matrix(y_final_test, y_pred)

# Set the diagonal elements to zero to exclude correct classifications
np.fill_diagonal(conf_matrix, 0)

# Find the indices of the top three errors
errors = np.unravel_index(np.argsort(-conf_matrix, axis=None), conf_matrix.shape)

# Get the top three most common errors
common_errors = [(chr(errors[0][i] + ord('A')), chr(errors[1][i] + ord('A'))) for i in range(3)]
print(f"Most Common Errors: {common_errors}")


# Report overall mean accuracy and accuracy per letter
mean_accuracy = np.nanmean(accuracy_per_class)
print(f"Overall Mean Accuracy: {mean_accuracy}")

# Print each letter and its accuracy
letters = [chr(i + ord('A')) for i in range(26) if i not in [9, 25]]
for i, acc in enumerate(accuracy_per_class):
    print(f"Letter {letters[i]}: Accuracy {acc}")
