In [None]:
!pip install opencv-python tensorflow

In [1]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model
from collections import deque, Counter  # For smoothing predictions

# Load trained model
model = load_model('model/sign_speak_model.h5')

# Clean label list (digits + letters only)
labels = [
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
    'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
    'u', 'v', 'w', 'x', 'y', 'z']

IMG_SIZE = 64  # Must match training size

cap = cv2.VideoCapture(0)
current_text = ""

# Prediction buffer for smoothing
prediction_buffer = deque(maxlen=15)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Define ROI
    x1, y1, x2, y2 = 100, 100, 300, 300
    roi = frame[y1:y2, x1:x2]

    # Preprocess ROI
    img = cv2.resize(roi, (IMG_SIZE, IMG_SIZE))
    img = img / 255.0
    img = np.expand_dims(img, axis=0)

    # Predict
    prediction = model.predict(img)
    confidence = np.max(prediction)
    class_index = np.argmax(prediction)

    if class_index < len(labels):
        predicted_label = labels[class_index]
    else:
        predicted_label = "Unknown"

    # Only add to buffer if confident
    if confidence > 0.85:
        prediction_buffer.append(predicted_label)

    # Stability check (majority vote from buffer)
    if len(prediction_buffer) == prediction_buffer.maxlen:
        most_common = Counter(prediction_buffer).most_common(1)[0]
        label, count = most_common
        if count > 9:  # majority must agree
            current_text += label
            prediction_buffer.clear()  # clear buffer after accepting

    # Draw rectangle and prediction text
    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
    cv2.putText(frame, f"Prediction: {predicted_label}", (x1, y1 - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(frame, f"Text: {current_text}", (50, 400),
                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    # Show frame
    cv2.imshow("SignSpeak AI - Live", frame)

    # Break loop with 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 585ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 134ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 169ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 148ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

KeyboardInterrupt: 

In [None]:
import os

data_dir = 'data/asl_dataset_train'  # change if your path is different
labels = sorted(os.listdir(data_dir))
print("Labels:", labels)
print("Total labels:", len(labels))
