### Data Preprocessing (Code - Keras)

In [1]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils.class_weight import compute_class_weight

# ✅ Path to dataset
DATA_PATH = r'C:\Users\User\Desktop\SIGN LANGUAGE PROJECT\Dataset\asl_alphabet_train'

# ✅ Image size and batch size
IMG_HEIGHT, IMG_WIDTH = 64, 64
BATCH_SIZE = 64

# ✅ Image augmentation + preprocessing
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    horizontal_flip=False,  # horizontal flip is NOT safe for ASL
    fill_mode='nearest'
)

# ✅ Training generator
train_generator = datagen.flow_from_directory(
    DATA_PATH,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

# ✅ Validation generator
val_generator = datagen.flow_from_directory(
    DATA_PATH,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# ✅ Class info
class_indices = train_generator.class_indices
class_labels = list(class_indices.keys())
num_classes = len(class_labels)
print("Detected Classes:", class_indices)
print("Number of classes:", num_classes)

# ✅ Save class labels for inference use
import pickle
with open("class_labels.pkl", "wb") as f:
    pickle.dump(class_labels, f)

# ✅ Compute class weights to balance dataset
labels = train_generator.classes
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(labels),
    y=labels
)
class_weights_dict = dict(enumerate(class_weights))

print("\nClass weights:")
for idx, weight in class_weights_dict.items():
    print(f"{class_labels[idx]}: {weight:.4f}")


Found 66886 images belonging to 28 classes.
Found 16721 images belonging to 28 classes.
Detected Classes: {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25, 'nothing': 26, 'space': 27}
Number of classes: 28

Class weights:
A: 0.9953
B: 0.9953
C: 0.9953
D: 0.9953
E: 0.9953
F: 0.9953
G: 0.9953
H: 0.9953
I: 0.9953
J: 1.1452
K: 0.9953
L: 0.9953
M: 0.9953
N: 0.9953
O: 0.9953
P: 0.9953
Q: 0.9953
R: 0.9953
S: 0.9953
T: 0.9953
U: 0.9953
V: 0.9953
W: 0.9953
X: 0.9953
Y: 0.9953
Z: 0.9953
nothing: 0.9953
space: 0.9953


### CNN Model Architecture

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# ✅ CNN Model
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    Dropout(0.3),

    Conv2D(128, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# ✅ Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# ✅ Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=1, factor=0.5, verbose=1)
checkpoint = ModelCheckpoint('best_asl_model.h5', monitor='val_loss', save_best_only=True, verbose=1)

# ✅ Train the model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,
    callbacks=[early_stop, reduce_lr, checkpoint],
    class_weight=class_weights_dict,  # ensures balanced training
    verbose=1
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  self._warn_if_super_not_called()


Epoch 1/10
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.1084 - loss: 3.1193
Epoch 1: val_loss improved from None to 4.03071, saving model to best_asl_model.h5




[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2219s[0m 2s/step - accuracy: 0.1599 - loss: 2.8468 - val_accuracy: 0.1393 - val_loss: 4.0307 - learning_rate: 0.0010
Epoch 2/10
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.3080 - loss: 2.1962
Epoch 2: val_loss improved from 4.03071 to 2.54190, saving model to best_asl_model.h5




[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2072s[0m 2s/step - accuracy: 0.3528 - loss: 2.0080 - val_accuracy: 0.3952 - val_loss: 2.5419 - learning_rate: 0.0010
Epoch 3/10
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.4899 - loss: 1.5185
Epoch 3: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 3: val_loss did not improve from 2.54190
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1737s[0m 2s/step - accuracy: 0.5306 - loss: 1.3827 - val_accuracy: 0.2903 - val_loss: 4.2332 - learning_rate: 0.0010
Epoch 4/10
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 617ms/step - accuracy: 0.6479 - loss: 1.0000
Epoch 4: val_loss improved from 2.54190 to 1.20817, saving model to best_asl_model.h5




[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m727s[0m 695ms/step - accuracy: 0.6766 - loss: 0.9146 - val_accuracy: 0.6619 - val_loss: 1.2082 - learning_rate: 5.0000e-04
Epoch 5/10
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 417ms/step - accuracy: 0.7415 - loss: 0.7331
Epoch 5: val_loss improved from 1.20817 to 1.05069, saving model to best_asl_model.h5




[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m482s[0m 461ms/step - accuracy: 0.7575 - loss: 0.6865 - val_accuracy: 0.7100 - val_loss: 1.0507 - learning_rate: 5.0000e-04
Epoch 6/10
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 352ms/step - accuracy: 0.8035 - loss: 0.5622
Epoch 6: val_loss improved from 1.05069 to 0.86369, saving model to best_asl_model.h5




[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m414s[0m 395ms/step - accuracy: 0.8137 - loss: 0.5361 - val_accuracy: 0.7396 - val_loss: 0.8637 - learning_rate: 5.0000e-04
Epoch 7/10
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 433ms/step - accuracy: 0.8392 - loss: 0.4618
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 7: val_loss did not improve from 0.86369
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m656s[0m 627ms/step - accuracy: 0.8481 - loss: 0.4360 - val_accuracy: 0.7342 - val_loss: 1.1174 - learning_rate: 5.0000e-04
Epoch 8/10
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.8735 - loss: 0.3556
Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 8: val_loss did not improve from 0.86369
[1m1046/1046[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1424s[0m 1s/step - accuracy: 0.8832 - loss: 0.3383 - val_accuracy: 0.7902 -