In [1]:
# %%capture
# (uncomment the above line if you don't want OpenCV debug prints)

import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import joblib
import time

# ── CONFIG ────────────────────────────────────────────────────────────────
MODEL_PATH      = "models/sequence_gesture_model3.keras"
ENCODER_PATH    = "models/sequence_label_encoder3.pkl"
SEQUENCE_LENGTH = 120
NUM_FEATURES    = 63

# ── LOAD MODEL & LABEL ENCODER ────────────────────────────────────────────
model = tf.keras.models.load_model(MODEL_PATH)
le    = joblib.load(ENCODER_PATH)

# ── SETUP MEDIAPIPE & OPENCV ──────────────────────────────────────────────
mp_hands   = mp.solutions.hands
mp_draw    = mp.solutions.drawing_utils
hands      = mp_hands.Hands(static_image_mode=False,
                           max_num_hands=1,
                           min_detection_confidence=0.7,
                           min_tracking_confidence=0.5)

cap = cv2.VideoCapture(0)
sequence = []       # rolling buffer of last 120 feature vectors
pred_text = ""      # last predicted label

print("Starting live gesture recognition. Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    rgb   = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    res   = hands.process(rgb)

    if res.multi_hand_landmarks:
        for handLms in res.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, handLms, mp_hands.HAND_CONNECTIONS)

            # extract 63-dim feature vector
            feats = []
            for lm in handLms.landmark:
                feats += [lm.x, lm.y, lm.z]
            sequence.append(feats)

            # keep only the last SEQUENCE_LENGTH frames
            if len(sequence) > SEQUENCE_LENGTH:
                sequence.pop(0)

            # when we have enough frames, predict
            if len(sequence) == SEQUENCE_LENGTH:
                X = np.array(sequence, dtype=np.float32).reshape(1, SEQUENCE_LENGTH, NUM_FEATURES)
                probs = model.predict(X, verbose=0)[0]
                idx   = np.argmax(probs)
                pred_text = le.inverse_transform([idx])[0]

    # overlay prediction
    cv2.putText(frame, f"Gesture: {pred_text}", (10, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,255,0), 2)

    cv2.imshow("Live Gesture Recognition", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
hands.close()


Starting live gesture recognition. Press 'q' to quit.


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import joblib

MODEL_PATH      = "models/static_gesture_model.keras"
ENCODER_PATH    = "models/static_label_encoder.pkl"
NUM_FEATURES    = 63

model = tf.keras.models.load_model(MODEL_PATH)
le    = joblib.load(ENCODER_PATH)

mp_hands   = mp.solutions.hands
mp_draw    = mp.solutions.drawing_utils
hands      = mp_hands.Hands(static_image_mode=False,
                           max_num_hands=1,
                           min_detection_confidence=0.7,
                           min_tracking_confidence=0.5)

cap = cv2.VideoCapture(0)
pred_text = ""

print("Starting live static gesture recognition. Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    rgb   = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    res   = hands.process(rgb)

    if res.multi_hand_landmarks:
        for handLms in res.multi_hand_landmarks:
            mp_draw.draw_landmarks(frame, handLms, mp_hands.HAND_CONNECTIONS)

            feats = []
            for lm in handLms.landmark:
                feats += [lm.x, lm.y, lm.z]

            # Predict directly on the 63 features
            X = np.array(feats, dtype=np.float32).reshape(1, NUM_FEATURES)
            probs = model.predict(X, verbose=0)[0]
            idx   = np.argmax(probs)
            pred_text = le.inverse_transform([idx])[0]

    # overlay prediction
    cv2.putText(frame, f"Gesture: {pred_text}", (10, 40),
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,255,0), 2)

    cv2.imshow("Live Static Gesture Recognition", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
hands.close()
