In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Set image size and dataset path
IMG_SIZE = 128
DATASET_PATH = "ISL"

# Load dataset
X, y = [], []
class_labels = sorted(os.listdir(DATASET_PATH))  # Ensure consistent order

for label in class_labels:
    label_path = os.path.join(DATASET_PATH, label)
    for img_name in os.listdir(label_path):
        img_path = os.path.join(label_path, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Load in grayscale
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)) / 255.0  # Normalize
        X.append(img)
        y.append(label)

# Convert to NumPy arrays
X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y = np.array(y)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_one_hot = to_categorical(y_encoded)

# Save label encoder for later use
np.save("label_classes.npy", label_encoder.classes_)

# Build CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(len(class_labels), activation='softmax')
])

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X, y_one_hot, epochs=20, batch_size=32, validation_split=0.2)

# Save model
model.save("ISL_HandSign_Model.h5")

print("Model trained and saved successfully.")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 161ms/step - accuracy: 0.7860 - loss: 0.8302 - val_accuracy: 0.0000e+00 - val_loss: 16.4079
Epoch 2/20
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 169ms/step - accuracy: 0.9975 - loss: 0.0109 - val_accuracy: 0.0000e+00 - val_loss: 17.9079
Epoch 3/20
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 171ms/step - accuracy: 1.0000 - loss: 4.8702e-04 - val_accuracy: 0.0000e+00 - val_loss: 14.4737
Epoch 4/20
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 173ms/step - accuracy: 0.9999 - loss: 2.7671e-04 - val_accuracy: 0.0000e+00 - val_loss: 12.2145
Epoch 5/20
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 175ms/step - accuracy: 0.9999 - loss: 9.2184e-04 - val_accuracy: 0.0000e+00 - val_loss: 18.4365
Epoch 6/20
[1m176/176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 175ms/step - accuracy: 0.9992 - loss: 0.0062 - val_accuracy:



Model trained and saved successfully.


In [5]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
import time

# Load trained model
model = tf.keras.models.load_model("ISL_HandSign_Model.h5")
class_names = np.load("label_classes.npy")

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1)
mp_draw = mp.solutions.drawing_utils

# Open webcam
cap = cv2.VideoCapture(0)

sentence = ""
last_char_time = time.time()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)  # Mirror effect
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    results = hands.process(rgb_frame)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Get bounding box
            h, w, _ = frame.shape
            x_min, y_min = w, h
            x_max, y_max = 0, 0
            
            for lm in hand_landmarks.landmark:
                x, y = int(lm.x * w), int(lm.y * h)
                x_min, y_min = min(x, x_min), min(y, y_min)
                x_max, y_max = max(x, x_max), max(y, y_max)

            offset = 20
            x_min = max(0, x_min - offset)
            y_min = max(0, y_min - offset)
            x_max = min(w, x_max + offset)
            y_max = min(h, y_max + offset)

            hand_img = frame[y_min:y_max, x_min:x_max]
            if hand_img.size == 0:
                continue
            
            hand_img = cv2.cvtColor(hand_img, cv2.COLOR_BGR2GRAY)
            hand_img = cv2.resize(hand_img, (128, 128)) / 255.0
            hand_img = np.reshape(hand_img, (1, 128, 128, 1))

            # Predict hand sign
            prediction = model.predict(hand_img)
            predicted_class = np.argmax(prediction)
            confidence = prediction[0][predicted_class]

            if confidence > 0.8:
                detected_char = class_names[predicted_class]
                current_time = time.time()
                
                if current_time - last_char_time > 1:  # Prevent rapid repeating
                    sentence += detected_char
                    last_char_time = current_time
                
                cv2.putText(frame, f"Sign: {detected_char} ({confidence:.2f})", 
                            (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 
                            1, (0, 255, 0), 2)
    
    # Display the sentence at the top
    cv2.putText(frame, f"Sentence: {sentence}", (50, 50), 
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (255, 0, 0), 3)

    # Show result
    cv2.imshow("ISL Hand Sign Recognition", frame)
    
    # If no hands detected for 5 seconds, finalize sentence
    if time.time() - last_char_time > 5 and sentence:
        print(f"Final Sentence: {sentence}")
        sentence = ""

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33

In [None]:
import cv2
import numpy as np
import os
import mediapipe as mp

# Paths
input_path = "ISL"  # Folder containing the collected images
processed_path = "processed_hand_signs"  # Folder to save processed images
os.makedirs(processed_path, exist_ok=True)

# Initialize Mediapipe Hand Detection
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.7)

# Process each label (folder)
for label in os.listdir(input_path):
    input_label_path = os.path.join(input_path, label)
    output_label_path = os.path.join(processed_path, label)
    os.makedirs(output_label_path, exist_ok=True)

    # Skip if not a folder
    if not os.path.isdir(input_label_path):
        continue

    # Process each image
    for image_name in os.listdir(input_label_path):
        image_path = os.path.join(input_label_path, image_name)
        image = cv2.imread(image_path)

        if image is None:
            continue  # Skip if image not found

        # Convert to RGB (Mediapipe requires RGB format)
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Detect hands
        result = hands.process(rgb_image)

        # Create a blank mask
        mask = np.zeros(image.shape[:2], dtype=np.uint8)

        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                # Draw hand landmarks on the mask
                for landmark in hand_landmarks.landmark:
                    x = int(landmark.x * image.shape[1])
                    y = int(landmark.y * image.shape[0])
                    cv2.circle(mask, (x, y), 20, 255, -1)

                # Fill the mask
                cv2.fillPoly(mask, [np.array([[int(l.x * image.shape[1]), int(l.y * image.shape[0])] for l in hand_landmarks.landmark], np.int32)], 255)

        # Apply mask to original image
        masked_image = cv2.bitwise_and(image, image, mask=mask)

        # Convert to grayscale
        gray = cv2.cvtColor(masked_image, cv2.COLOR_BGR2GRAY)

        # Resize to 128x128 for training
        resized = cv2.resize(gray, (128, 128))

        # Save processed image
        output_image_path = os.path.join(output_label_path, image_name)
        cv2.imwrite(output_image_path, resized)

        print(f"Processed & saved: {output_image_path}")

print("✅ Hand sign preprocessing completed!")


In [18]:
import os
import shutil

def duplicate_image(input_image_path, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for i in range(1, 100):
        output_image_path = os.path.join(output_folder, f"{i}.jpg")
        shutil.copy(input_image_path, output_image_path)
    
    print("100 images generated successfully.")

# Example usage
input_image_path = "ISL/s/2.jpg"  # Change this to your input image path
output_folder = "dataset/s"      # Change this to your desired output folder
duplicate_image(input_image_path, output_folder)

100 images generated successfully.
