In [24]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os

# Define paths
TRAIN_DIR = '/kaggle/input/asl-alphabet/asl_alphabet_train/asl_alphabet_train'

# Image parameters
BATCH_SIZE = 128
VALIDATION_SPLIT = 0.2
SEED = 42

# Define the AUTOTUNE constant
AUTOTUNE = tf.data.AUTOTUNE

# Load Training and Validation Datasets
print("Loading training data...")
train_generator = tf.keras.utils.image_dataset_from_directory(
    TRAIN_DIR,
    validation_split=VALIDATION_SPLIT,
    subset="training",
    seed=SEED,
    image_size=None,
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

print("Loading validation data...")
validation_generator = tf.keras.utils.image_dataset_from_directory(
    TRAIN_DIR,
    validation_split=VALIDATION_SPLIT,
    subset="validation",
    seed=SEED,
    image_size=None,
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

class_names = train_generator.class_names
num_classes = len(class_names)

print(f"Number of classes: {num_classes}")
print(f"Class labels: {class_names}")


# Data Augmentation Layers 
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal", seed=SEED),
        layers.RandomRotation(0.1, seed=SEED),
        layers.RandomZoom(0.2, seed=SEED),
        layers.RandomTranslation(height_factor=0.2, width_factor=0.2, seed=SEED),
    ],
    name="data_augmentation"
)

def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

train_generator = train_generator.map(
    lambda x, y: (data_augmentation(x, training=True), y),
    num_parallel_calls=AUTOTUNE
).map(preprocess, num_parallel_calls=AUTOTUNE).prefetch(buffer_size=AUTOTUNE)

validation_generator = validation_generator.map(preprocess, num_parallel_calls=AUTOTUNE).prefetch(buffer_size=AUTOTUNE)

Loading training data...
Found 87000 files belonging to 29 classes.
Using 69600 files for training.
Loading validation data...
Found 87000 files belonging to 29 classes.
Using 17400 files for validation.
Number of classes: 29
Class labels: ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']


In [25]:
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

def create_asl_cnn_model(input_shape=(200, 200, 3), num_classes=29):
    """
    Creates a CNN model for ASL alphabet recognition.
    
    Architecture:
    - 4 Convolutional blocks with increasing filters
    - Batch Normalization for training stability
    - MaxPooling for spatial dimension reduction
    - Dropout for regularization
    - Dense layers for classification
    """
    model = models.Sequential([
        # First Convolutional Block
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Second Convolutional Block
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Third Convolutional Block
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Fourth Convolutional Block
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Flatten and Dense Layers
        layers.GlobalAveragePooling2D(),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

# Create the model
model = create_asl_cnn_model(
    input_shape=(None, None, 3),
    num_classes=num_classes
)

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Display model architecture
model.summary()

# Define callbacks for training
callbacks = [
    # Stop training when validation loss stops improving
    EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    
    # Reduce learning rate when validation loss plateaus
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1
    ),
    
    # Save the best model
    ModelCheckpoint(
        'best_asl_model.keras',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    )
]

# Train the model
print("\n" + "="*50)
print("STARTING MODEL TRAINING")
print("="*50 + "\n")

history = model.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator,
    callbacks=callbacks,
    verbose=1
)


STARTING MODEL TRAINING

Epoch 1/10
[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 615ms/step - accuracy: 0.2302 - loss: 2.8703
Epoch 1: val_accuracy improved from -inf to 0.31080, saving model to best_asl_model.keras
[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m361s[0m 633ms/step - accuracy: 0.2305 - loss: 2.8689 - val_accuracy: 0.3108 - val_loss: 2.7634 - learning_rate: 0.0010
Epoch 2/10
[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 611ms/step - accuracy: 0.7133 - loss: 0.8695
Epoch 2: val_accuracy improved from 0.31080 to 0.86425, saving model to best_asl_model.keras
[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 627ms/step - accuracy: 0.7134 - loss: 0.8691 - val_accuracy: 0.8643 - val_loss: 0.3839 - learning_rate: 0.0010
Epoch 3/10
[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 609ms/step - accuracy: 0.8780 - loss: 0.3786
Epoch 3: val_accuracy did not improve from 0.86425
[1m544/544[0m 

In [27]:
model.save('modelASL.keras', '/kaggle/output/')