In [1]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
from collections import deque

# Load model
model = load_model("model_lstm.h5")

WORDS = ["book", "hello", "no", "thankyou", "yes"]
SEQ_LENGTH = 30
CONF_THRESHOLD = 0.6

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

sequence = deque(maxlen=SEQ_LENGTH)
pred_buffer = deque(maxlen=5)  # smoothing predictions

cap = cv2.VideoCapture(0)

with mp_hands.Hands(min_detection_confidence=0.6, min_tracking_confidence=0.6) as hands:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            continue
        
        frame = cv2.flip(frame, 1)
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        res = hands.process(img)

        keypoints = np.zeros(63)
        
        if res.multi_hand_landmarks:
            for hand_landmarks in res.multi_hand_landmarks:
                for i, lm in enumerate(hand_landmarks.landmark):
                    keypoints[i*3]   = lm.x
                    keypoints[i*3+1] = lm.y
                    keypoints[i*3+2] = lm.z
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        sequence.append(keypoints)

        prediction_text = "Waiting..."

        if len(sequence) == SEQ_LENGTH:
            input_data = np.array(sequence).reshape(1, SEQ_LENGTH, 63)
            pred = model.predict(input_data, verbose=0)[0]
            idx = np.argmax(pred)
            conf = pred[idx]

            if conf > CONF_THRESHOLD:
                pred_buffer.append(idx)
            else:
                pred_buffer.append("none")

            # Smooth prediction
            if pred_buffer.count("none") > 3:
                prediction_text = "No Gesture"
            else:
                most_common = max(set(pred_buffer), key=pred_buffer.count)
                if most_common != "none":
                    prediction_text = WORDS[most_common]

        cv2.putText(frame, prediction_text, (10,40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)

        cv2.imshow("ASL Word Recognition", frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

cap.release()
cv2.destroyAllWindows()




In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf

model = tf.keras.models.load_model("asl_mnist_model.h5")

label_map = {
     0:"A",1:"B",2:"C",3:"D",4:"E",
     5:"F",6:"G",7:"H",8:"I",9:"K",
     10:"L",11:"M",12:"N",13:"O",14:"P",
     15:"Q",16:"R",17:"S",18:"T",19:"U",
     20:"V",21:"W",22:"X",23:"Y",24:"Z"
}

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1)
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

smooth_preds = []
frames_avg = 6
conf_th = 0.75

def smooth(p):
    smooth_preds.append(p)
    if len(smooth_preds) > frames_avg:
        smooth_preds.pop(0)
    return max(set(smooth_preds), key=smooth_preds.count)

while True:
    ret, frame = cap.read()
    frame = cv2.flip(frame, 1)
    imgRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(imgRGB)

    text = "No Hand"
    
    if result.multi_hand_landmarks:
        hand = result.multi_hand_landmarks[0]
        h, w, _ = frame.shape
        xs, ys = [], []

        for lm in hand.landmark:
            xs.append(int(lm.x * w))
            ys.append(int(lm.y * h))

        x1, y1 = max(min(xs)-20,0), max(min(ys)-20,0)
        x2, y2 = min(max(xs)+20,w), min(max(ys)+20,h)

        roi = frame[y1:y2, x1:x2]

        gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        # binarize â€” hand white, background black
        blur = cv2.GaussianBlur(gray, (5,5), 0)
        _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)

        img = cv2.resize(thresh, (28,28))
        img = img / 255.0
        img = img.reshape(1,28,28,1)

        probs = model.predict(img, verbose=0)[0]
        conf = max(probs)
        pred = np.argmax(probs)

        if conf > conf_th:
            letter = label_map[pred]
            letter = smooth(letter)
            text = f"{letter} ({conf:.2f})"
        else:
            text = "Unknown"

        cv2.rectangle(frame, (x1,y1), (x2,y2), (0,255,0), 2)

    cv2.putText(frame, text, (10,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
    cv2.imshow("ASL Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
