In [1]:

import cv2
import numpy as np
import time
from collections import deque
from tensorflow.keras.models import load_model

# === Config ===
SEQUENCE_LENGTH = 15
IMG_SIZE = 224
CONFIDENCE_THRESHOLD = 0.8
COOLDOWN_SECONDS = 2.0  # Cooldown between valid predictions
CLASSES = ['bed', 'before', 'candy', 'cool', 'drink', 'go', 'help', 'thin']

# --- Load Model ---
model = load_model('asl_mobilenetv2_finetuned.h5')


2025-04-08 20:40:33.774463: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# === Init webcam and sequence queue ===
cap = cv2.VideoCapture(0)
sequence = deque(maxlen=SEQUENCE_LENGTH)
label = ""
confidence = 0.0
last_prediction_time = 0

# === Prediction log list ===
prediction_log = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    resized_frame = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
    normalized_frame = resized_frame.astype("float32") / 255.0

    sequence.append(normalized_frame)

    # Predict only if sequence is full AND cooldown passed
    current_time = time.time()
    if len(sequence) == SEQUENCE_LENGTH and (current_time - last_prediction_time > COOLDOWN_SECONDS):
        input_data = np.expand_dims(sequence[-1], axis=0)  # Shape: (1, 15, 224, 224, 3)
        prediction = model.predict(input_data, verbose=0)[0]
        confidence = np.max(prediction)
        predicted_index = np.argmax(prediction)

        if confidence > CONFIDENCE_THRESHOLD:
            label = f"{CLASSES[predicted_index]} ({confidence*100:.1f}%)"
            last_prediction_time = current_time

            # Print to notebook
            print(f"[{time.strftime('%H:%M:%S')}] Prediction: {CLASSES[predicted_index]} ({confidence*100:.1f}%)")
            prediction_log.append((time.strftime('%H:%M:%S'), CLASSES[predicted_index], confidence))

    # === Subtitle display ===
    height, width, _ = frame.shape
    if label:
        cv2.rectangle(frame, (0, height - 40), (width, height), (0, 0, 0), -1)
        cv2.putText(frame, label, (10, height - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    cv2.imshow("ASL Real-Time Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



[20:41:02] Prediction: before (91.3%)
[20:41:04] Prediction: before (92.8%)
[20:41:06] Prediction: before (90.0%)
[20:41:08] Prediction: before (88.9%)
[20:41:12] Prediction: before (83.5%)
[20:41:14] Prediction: before (86.9%)
[20:41:18] Prediction: before (82.2%)
[20:41:23] Prediction: before (82.7%)
[20:41:26] Prediction: before (88.4%)
[20:41:29] Prediction: before (94.5%)
[20:41:32] Prediction: before (86.3%)
[20:41:38] Prediction: drink (83.6%)
[20:41:40] Prediction: thin (93.8%)
[20:41:43] Prediction: drink (80.9%)
[20:41:45] Prediction: thin (82.8%)
[20:41:47] Prediction: drink (93.2%)
[20:41:49] Prediction: drink (95.0%)
[20:41:51] Prediction: go (83.3%)
[20:41:53] Prediction: go (91.5%)
[20:41:55] Prediction: thin (83.1%)
[20:42:00] Prediction: help (86.4%)
[20:42:03] Prediction: help (81.8%)
[20:42:07] Prediction: help (85.4%)
[20:42:09] Prediction: thin (90.2%)
[20:42:16] Prediction: bed (95.6%)
[20:42:18] Prediction: thin (81.6%)
[20:42:21] Prediction: bed (99.9%)
[20:42:2

: 