<a href="https://colab.research.google.com/github/ShreyaVats23/Sign_Language_Recognition/blob/main/Sign_Language_Detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Import libraries
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Step 2: Set dataset path from kagglehub
import kagglehub
dataset_path = kagglehub.dataset_download("harshvardhan21/sign-language-detection-using-images")

print("Path to dataset files:", dataset_path)


Downloading from https://www.kaggle.com/api/v1/datasets/download/harshvardhan21/sign-language-detection-using-images?dataset_version_number=1...


100%|██████████| 268M/268M [00:01<00:00, 163MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/harshvardhan21/sign-language-detection-using-images/versions/1


In [3]:
# Step 3: Preprocess the data

# Define paths
train_dir = os.path.join(dataset_path, "data", "train")
test_dir = os.path.join(dataset_path, "data", "test")


**Purpose of ImageDataGenerator**


ImageDataGenerator is used to preprocess and augment image data on the fly (as it’s fed to the model).

This helps prevent overfitting and improves the model’s ability to generalize to new, unseen images.



In [4]:
# Set correct base directory
base_dir = os.path.join(dataset_path, "data")

# Augment only training data
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    validation_split=0.2
)

# Data generators with augmentation for training
train_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(28, 28),
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

val_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(28, 28),
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

Found 33600 images belonging to 35 classes.
Found 8400 images belonging to 35 classes.


In [5]:
# Step 4: Build the model
from tensorflow.keras.layers import Dropout

model = Sequential([
    Flatten(input_shape=(28, 28, 1)),
    Dense(64, activation='relu'),
    Dropout(0.5),  # 50% neurons dropped
    Dense(35, activation='softmax')
])

  super().__init__(**kwargs)


In [6]:
# Step 5: Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
# Step 6: Train the model
model.fit(train_generator, epochs=20, validation_data=val_generator)



  self._warn_if_super_not_called()


Epoch 1/20
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 43ms/step - accuracy: 0.3725 - loss: 2.2362 - val_accuracy: 0.8743 - val_loss: 0.5614
Epoch 2/20
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 43ms/step - accuracy: 0.7517 - loss: 0.8176 - val_accuracy: 0.9325 - val_loss: 0.3076
Epoch 3/20
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 41ms/step - accuracy: 0.8231 - loss: 0.5853 - val_accuracy: 0.9494 - val_loss: 0.2266
Epoch 4/20
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 42ms/step - accuracy: 0.8545 - loss: 0.4732 - val_accuracy: 0.9496 - val_loss: 0.1931
Epoch 5/20
[1m 724/1050[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m11s[0m 34ms/step - accuracy: 0.8709 - loss: 0.4190

In [None]:
# Step 7: Evaluate the model
test_loss, test_acc = model.evaluate(val_generator)
print(f'Test accuracy: {test_acc}')


In [None]:
# Map class indices to label names
class_indices = val_generator.class_indices  # e.g., {'A': 0, 'B': 1, ...}
index_to_label = {v: k for k, v in class_indices.items()}



In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Build class index map
class_indices = val_generator.class_indices
index_to_label = {v: k for k, v in class_indices.items()}

# Store first seen example of each class
seen = {}
batch_count = len(val_generator)

# Loop through all batches
for batch_num in range(batch_count):
    images, labels = val_generator[batch_num]
    for i in range(len(images)):
        label_index = np.argmax(labels[i])
        if label_index not in seen:
            seen[label_index] = (images[i], batch_num, i)
        if len(seen) == len(class_indices):
            break
    if len(seen) == len(class_indices):
        break

# Plot one sample per class with its batch:index and label
cols = 10
rows = int(np.ceil(len(seen) / cols))
fig, axes = plt.subplots(rows, cols, figsize=(18, 3 * rows))
axes = axes.flatten()

for idx, (label_index, (img, b, i)) in enumerate(sorted(seen.items())):
    axes[idx].imshow(img.reshape(28, 28), cmap='gray')
    axes[idx].set_title(f'Batch:{b}, Index:{i}\nLabel: {index_to_label[label_index]}')
    axes[idx].axis('off')

# Hide unused axes
for j in range(idx + 1, len(axes)):
    axes[j].axis('off')

plt.tight_layout()
plt.suptitle("One Sample per Sign — Use Index to Choose", fontsize=18, y=1.02)
plt.show()


In [None]:
# change batch_num and img_index according to the above image you want
batch_num = 255
img_index = 0

# Extract image and true label
img = val_generator[batch_num][0][img_index]
true_index = np.argmax(val_generator[batch_num][1][img_index])

# Predict
pred = model.predict(np.expand_dims(img, axis=0))
pred_index = np.argmax(pred)

# Map to labels
class_indices = val_generator.class_indices
index_to_label = {v: k for k, v in class_indices.items()}
pred_label = index_to_label[pred_index]
true_label = index_to_label[true_index]
print(f"Predicted class: {pred_label}, Actual class: {true_label}")



val_generator[0] returns a tuple of (images, labels) for batch 0

val_generator[0][0] → just the images in that batch

val_generator[0][1] → the labels in that batch

val_generator[0][0][3] → the 4th image in the batch

val_generator[0][1][3] → the 4th label

In [None]:
# Step 9: Visualize the sign with labels
plt.imshow(img.reshape(28, 28), cmap='gray')
plt.title(f"Predicted: {pred_label}, Actual: {true_label}")
plt.axis('off')
plt.show()
