In [None]:
!pip install opencv-python matplotlib tensorflow keras

In [None]:
import cv2
import numpy as np
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Load the trained model
model = load_model('asl_model.keras')

In [None]:
# Load class labels from folder names
dataset_path = 'dataset/asl_alphabet_train/'
class_names = sorted(os.listdir(dataset_path))  # A-Z, del, nothing, space
print("Class Labels:", class_names)

In [None]:
# Sentence Builder variables
sentence = ""
last_predicted_label = ""
frames_since_last_add = 0
delay_frames = 20 

In [None]:
# ImageDataGenerator to confirm classes (optional but useful for debugging)
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(64, 64),
    batch_size=32,
    color_mode='grayscale',
    class_mode='categorical',
    subset='training'
)
print("Class Indices:", train_generator.class_indices)

In [None]:
# Start webcam
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Region of interest (ROI)
    x1, y1, x2, y2 = 100, 100, 300, 300
    roi = frame[y1:y2, x1:x2]

    # Preprocess ROI
    roi_resized = cv2.resize(roi, (64, 64))
    roi_gray = cv2.cvtColor(roi_resized, cv2.COLOR_BGR2GRAY)
    roi_normalized = roi_gray / 255.0
    roi_reshaped = np.reshape(roi_normalized, (1, 64, 64, 1))

    # Predict
    # Predict
    predictions = model.predict(roi_reshaped, verbose=0)
    confidence = np.max(predictions)
    class_id = np.argmax(predictions)

    # Get label from class_id
    predicted_label = class_names[class_id]

    # Show debug info in console (optional)
    print(f"Predicted: {predicted_label}, Confidence: {confidence:.2f}")

    # Filter by confidence threshold
    if confidence >= 0.75 and predicted_label != "nothing":
        if predicted_label == last_predicted_label:
            frames_since_last_add += 1
        else:
            frames_since_last_add = 1  # start fresh for new label

        # Add to sentence if label is stable for required frames
        if frames_since_last_add == delay_frames:
            if predicted_label == "space":
                sentence += " "
            elif predicted_label == "del":
                sentence = sentence[:-1]
            else:
                sentence += predicted_label

            frames_since_last_add = 0  # reset after adding
    else:
        frames_since_last_add = 0  # reset if not confident

    # Update last label
    last_predicted_label = predicted_label


    # Draw ROI box
    cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)

    # Show current prediction
    cv2.putText(frame, f"Predicted: {predicted_label}", (10, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show full sentence
    cv2.putText(frame, f"Sentence: {sentence}", (10, 90),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)

    # Show the frame
    cv2.imshow("Sign Language Detection", frame)

    # Quit or Clear
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
    elif key == ord('c'):
        sentence = ""

# Cleanup
cap.release()
cv2.destroyAllWindows()

In [None]:
import cv2
import numpy as np
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import time
import pyttsx3

# ---------------- CONFIG ---------------- #
MODEL_PATH = 'asl_model.keras'
DATASET_PATH = 'dataset/asl_alphabet_train/'
roi = [100, 100, 300, 300]  # x1, y1, x2, y2
CONF_THRESH = 0.75
DELAY_FRAMES = 20
WIDTH, HEIGHT = 640, 480
sentence = ""
last_label = ""
frame_count = 0
paused = False

# TTS engine
tts = pyttsx3.init()

# ---------------- LOAD MODEL ---------------- #
model = load_model(MODEL_PATH)
classes = sorted(os.listdir(DATASET_PATH))
print("Classes:", classes)

# Optional: check class indices
gen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_gen = gen.flow_from_directory(
    DATASET_PATH,
    target_size=(64, 64),
    batch_size=32,
    color_mode='grayscale',
    class_mode='categorical',
    subset='training'
)
print("Class indices:", train_gen.class_indices)

# ---------------- START CAMERA ---------------- #
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, WIDTH)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, HEIGHT)

print("Press 'q' to quit, 'c' to clear, 'u' to undo word, 'p' to pause, +/- to adjust threshold.")

prev_time = time.time()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    key = cv2.waitKey(1) & 0xFF

    # handle key presses
    if key == ord('q'):
        break
    elif key == ord('c'):
        sentence = ""
    elif key == ord('u'):
        sentence = " ".join(sentence.split(" ")[:-1])
    elif key == ord('p'):
        paused = not paused
    elif key == ord('+'):
        CONF_THRESH = min(1.0, CONF_THRESH + 0.05)
    elif key == ord('-'):
        CONF_THRESH = max(0.5, CONF_THRESH - 0.05)

    # move ROI with arrows
    if key == 82:  # up
        roi[1] = max(0, roi[1] - 10)
        roi[3] = roi[1] + 200
    elif key == 84:  # down
        roi[1] = min(HEIGHT-200, roi[1] + 10)
        roi[3] = roi[1] + 200
    elif key == 81:  # left
        roi[0] = max(0, roi[0] - 10)
        roi[2] = roi[0] + 200
    elif key == 83:  # right
        roi[0] = min(WIDTH-200, roi[0] + 10)
        roi[2] = roi[0] + 200

    if not paused:
        # get ROI
        x1, y1, x2, y2 = roi
        hand = frame[y1:y2, x1:x2]
        hand_gray = cv2.cvtColor(cv2.resize(hand, (64, 64)), cv2.COLOR_BGR2GRAY)
        hand_norm = hand_gray / 255.0
        hand_input = np.expand_dims(hand_norm, axis=(0, -1))

        # predict
        pred = model.predict(hand_input, verbose=0)
        conf = np.max(pred)
        label = classes[np.argmax(pred)]

        # add to sentence if confident
        if conf >= CONF_THRESH and label != "nothing":
            if label == last_label:
                frame_count += 1
            else:
                frame_count = 1

            if frame_count >= DELAY_FRAMES:
                if label == "space":
                    sentence += " "
                elif label == "del":
                    sentence = sentence[:-1]
                else:
                    if not sentence or sentence[-1] == " ":
                        sentence += label.upper()
                    else:
                        sentence += label

                frame_count = 0
                # speak updated sentence
                tts.say(sentence)
                tts.runAndWait()
                # save sentence to file
                with open("sentence.txt", "w") as f:
                    f.write(sentence)
                # append history
                with open("sentence_history.txt", "a") as f:
                    f.write(sentence + "\n")

        last_label = label

    # draw stuff on screen
    cv2.rectangle(frame, (roi[0], roi[1]), (roi[2], roi[3]), (255, 0, 0), 2)
    fps = 1 / (time.time() - prev_time)
    prev_time = time.time()
    cv2.putText(frame, f"Predicted: {label} ({conf:.2f})", (10, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(frame, f"Sentence: {sentence}", (10, 90),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
    cv2.putText(frame, f"FPS: {fps:.1f} | Thresh: {CONF_THRESH:.2f}", (10, 130),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)

    cv2.imshow("ASL Recognition", frame)

cap.release()
cv2.destroyAllWindows()
