# Imports

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

# LOAD EMNIST LETTERS DATASET

In [None]:
(ds_train, ds_test), ds_info = tfds.load(
    'emnist/letters',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

# PREPROCESS DATA

In [3]:
def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0    # Normalize image to [0, 1]
    # image = tf.expand_dims(image, -1)              # Add channel dimension
    label = label - 1                              # Adjust labels from 1-26 to 0-25
    return image, label

ds_train = ds_train.map(preprocess)
ds_test = ds_test.map(preprocess)

# Convert TF datasets to NumPy arrays

In [None]:
X_train = []
y_train = []
X_test = []
y_test = []

for img, label in ds_train:
    X_train.append(img.numpy())
    y_train.append(label.numpy())

for img, label in ds_test:
    X_test.append(img.numpy())
    y_test.append(label.numpy())

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

# 3. SQUEEZE extra useless dimensions
X_train = np.squeeze(X_train)
X_test = np.squeeze(X_test)

# 3. Expand dimsions to add channel dimension
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)


print(f"Training samples: {X_train.shape}, Test samples: {X_test.shape}")

# BUILD CNN MODEL

In [None]:

model = Sequential()

# First Block
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# Second Block
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# Fully Connected Layers
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(26, activation='softmax'))  # 26 classes (A-Z)

# Compile Model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()

# DATA AUGMENTATION

In [6]:
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)

datagen.fit(X_train)

# TRAIN MODEL

In [None]:
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    epochs=30,
    validation_data=(X_test, y_test)
)

# SAVE TRAINED MODEL

In [None]:
model.save('emnist_letters_advanced.h5')

# EVALUATE MODEL

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc*100:.2f}%")

# PLOT TRAINING CURVES

In [None]:
plt.figure(figsize=(12,5))

# Plot accuracy
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy')
plt.legend()

# Plot loss
plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss')
plt.legend()

plt.show()