Import necessary modules:
- Numpy for handling the data and maths
- Tensorflow for pipeline, preprocessing, and the CNN itself
- Matplotlib for the analysis of the metrics for the CNN and preview
- PIL to access and interact with the images/data and get data
- Keras for the overall architecture

In [38]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import keras
from keras import layers
from keras import ops
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow.keras.callbacks import EarlyStopping

In [39]:
root_data_dir = "datasets"
train_data_dir, test_data_dir = f"{root_data_dir}/asl_alphabet_train", f"{root_data_dir}/asl_alphabet_test"

In [40]:
batch_size = 32
img_width, img_height = 128, 128

In [41]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  train_data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_width, img_height),
  batch_size=batch_size
)

val_ds = tf.keras.utils.image_dataset_from_directory(
  train_data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_width, img_height)
)

test_ds = tf.keras.utils.image_dataset_from_directory(
  test_data_dir,
  image_size=(img_width, img_height),
  batch_size=batch_size,
  shuffle=False
)

Found 87000 files belonging to 29 classes.
Using 69600 files for training.
Found 87000 files belonging to 29 classes.
Using 17400 files for validation.
Found 28 files belonging to 28 classes.


In [42]:
num_classes = len(train_ds.class_names)

In [43]:
inputs = keras.Input(shape=(128, 128, 3))

preprocessing = [
  layers.Rescaling(1./255)
]

augmentation = [
  layers.RandomFlip("horizontal"),
  layers.RandomRotation(0.1),
  layers.RandomZoom(0.1),
  layers.RandomContrast(0.1)      
]

convlayers = [
  layers.Conv2D(32, (3,3), activation='relu'),
  layers.MaxPooling2D((2, 2)),

  layers.Conv2D(64, (3,3), activation='relu'),
  layers.MaxPooling2D((2, 2)),

  layers.Conv2D(128, (3,3), activation='relu'),
  layers.MaxPooling2D((2, 2)),

  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dropout(0.5),
  layers.Dense(num_classes, activation='softmax')
]

In [44]:
x = inputs

for process in preprocessing:
  x = process(x)

for process in augmentation:
  x = process(x)

for process in convlayers:
  x = process(x)

cnn_model = tf.keras.Model(inputs=inputs, outputs=x, name="ASL-Detector")

In [45]:
cnn_model.summary()

In [46]:
cnn_model.compile(
  optimizer='adam',
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
  metrics=['accuracy']
)

In [47]:
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,           # stop if val_loss doesn’t improve for 3 epochs
    restore_best_weights=True
)

history = cnn_model.fit(train_ds, validation_data=val_ds, epochs=20, callbacks=[early_stop])

Epoch 1/20
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m620s[0m 282ms/step - accuracy: 0.3162 - loss: 2.2519 - val_accuracy: 0.7041 - val_loss: 0.9715
Epoch 2/20
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m446s[0m 205ms/step - accuracy: 0.5734 - loss: 1.2758 - val_accuracy: 0.8224 - val_loss: 0.5981
Epoch 3/20
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m421s[0m 194ms/step - accuracy: 0.6790 - loss: 0.9275 - val_accuracy: 0.8839 - val_loss: 0.3810
Epoch 4/20
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m458s[0m 211ms/step - accuracy: 0.7395 - loss: 0.7484 - val_accuracy: 0.9254 - val_loss: 0.2538
Epoch 5/20
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m417s[0m 192ms/step - accuracy: 0.7776 - loss: 0.6342 - val_accuracy: 0.9387 - val_loss: 0.2048
Epoch 6/20
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m413s[0m 190ms/step - accuracy: 0.8050 - loss: 0.5507 - val_accuracy: 0.9313 - val_loss:

In [49]:
cnn_model.save("asl_detector.keras")