In [None]:
import cv2

# Open the default camera (0 usuammlly refers to the first camera)
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open camera.")
    exit()

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    
    if not ret:
        print("Failed to grab frame")
        break
    
    # Display the resulting frame
    cv2.imshow('Camera', frame)
    
    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close windows
cap.release()
cv2.destroyAllWindows()


In [None]:
class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'Nothing', 'O', 'P', 'Q', 'R', 'S', 'Space', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

In [4]:
import cv2
import numpy as np
import mediapipe as mp

import tflite_runtime.interpreter as tflite

class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'Nothing', 'O', 'P', 'Q', 'R', 'S', 'Space', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
TARGET_SIZE = 224
MODEL_PATH = "/home/rafayahmadraza/SignBuddy/Models/asl_model_1.tflite"   # your trained model file


# -------------------------------------------------------
# Resize the cropped hand to 224×224 while keeping aspect
# -------------------------------------------------------
def resize_with_padding(img, target=TARGET_SIZE):
    h, w = img.shape[:2]
    aspect = w / h

    if aspect > 1:  # wide
        new_w = target
        new_h = int(target / aspect)
    else:  # tall
        new_h = target
        new_w = int(target * aspect)

    resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)

    # Add black padding
    top = (target - new_h) // 2
    bottom = target - new_h - top
    left = (target - new_w) // 2
    right = target - new_w - left

    padded = cv2.copyMakeBorder(
        resized, top, bottom, left, right,
        cv2.BORDER_CONSTANT, value=(0, 0, 0)
    )
    return padded


# -------------------------------------------------------
# Load TFLite Model
# -------------------------------------------------------
interpreter = tflite.Interpreter(model_path=MODEL_PATH)
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("Model input:", input_details[0]["shape"])


# -------------------------------------------------------
# Initialize MediaPipe Hands
# -------------------------------------------------------
mp_draw = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6
)

cap = cv2.VideoCapture(0)


# -------------------------------------------------------
# Main loop
# -------------------------------------------------------
while True:
    ret, frame = cap.read()
    if not ret:
        break

    h, w, _ = frame.shape

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:

            # Extract bounding box
            xs = [lm.x for lm in hand_landmarks.landmark]
            ys = [lm.y for lm in hand_landmarks.landmark]

            xmin = int(min(xs) * w)
            xmax = int(max(xs) * w)
            ymin = int(min(ys) * h)
            ymax = int(max(ys) * h)

            pad = 30
            xmin = max(0, xmin - pad)
            ymin = max(0, ymin - pad)
            xmax = min(w, xmax + pad)
            ymax = min(h, ymax + pad)

            # Draw box
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

            # Crop
            crop = frame[ymin:ymax, xmin:xmax]

            if crop.size > 0:
                crop_96 = resize_with_padding(crop)

                # Show resized hand
                cv2.imshow("Hand 96x96", crop_96)

                # Prepare input for model
                inp = cv2.cvtColor(crop_96, cv2.COLOR_BGR2RGB)
                inp = inp.astype(np.float32) / 255.0
                inp = np.expand_dims(inp, axis=0)

                # Run inference
                interpreter.set_tensor(input_details[0]["index"], inp)
                interpreter.invoke()
                prediction = interpreter.get_tensor(output_details[0]["index"])

                # Get predicted class
                cls = np.argmax(prediction)
                conf = np.max(prediction)

                # Display prediction
                text = f"Class: {class_names[np.argmax(prediction)]}  ({conf:.2f})"
                cv2.putText(frame, text, (xmin, ymin - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

                # Draw landmarks
                mp_draw.draw_landmarks(
                    frame, hand_landmarks, mp_hands.HAND_CONNECTIONS
                )

    cv2.imshow("Live", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
I0000 00:00:1765744265.677002  125109 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1765744265.679147  129888 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.1.5-1pop0~1753463422~24.04~8af185e), renderer: Mesa Intel(R) UHD Graphics (CML GT2)
W0000 00:00:1765744265.703547  129880 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1765744265.722953  129880 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Model input: [  1 224 224   3]


In [6]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model

# -------------------------------------------------------
# SETTINGS
# -------------------------------------------------------
class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
               'Nothing', 'O', 'P', 'Q', 'R', 'S', 'Space', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z','del']
print(len(class_names))

TARGET_SIZE = 96
MODEL_PATH = "/home/rafayahmadraza/SignBuddy/Models/asl_model_6.h5"   # 👈 now .h5 file


# -------------------------------------------------------
# Resize the cropped hand to 224×224 while keeping aspect
# -------------------------------------------------------
def resize_with_padding(img, target=TARGET_SIZE):
    h, w = img.shape[:2]
    aspect = w / h

    if aspect > 1:  # wide
        new_w = target
        new_h = int(target / aspect)
    else:  # tall
        new_h = target
        new_w = int(target * aspect)

    resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)

    # Add black padding
    top = (target - new_h) // 2
    bottom = target - new_h - top
    left = (target - new_w) // 2
    right = target - new_w - left

    padded = cv2.copyMakeBorder(
        resized, top, bottom, left, right,
        cv2.BORDER_CONSTANT, value=(0, 0, 0)
    )
    return padded


# -------------------------------------------------------
# Load Keras Model
# -------------------------------------------------------
print("Loading Keras model...")
model = load_model(MODEL_PATH)
print("Model loaded!")


# -------------------------------------------------------
# Initialize MediaPipe Hands
# -------------------------------------------------------
mp_draw = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6
)

cap = cv2.VideoCapture(0)


# -------------------------------------------------------
# Main loop
# -------------------------------------------------------
while True:
    ret, frame = cap.read()
    if not ret:
        break

    h, w, _ = frame.shape

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:

            # Extract bounding box
            xs = [lm.x for lm in hand_landmarks.landmark]
            ys = [lm.y for lm in hand_landmarks.landmark]

            xmin = int(min(xs) * w)
            xmax = int(max(xs) * w)
            ymin = int(min(ys) * h)
            ymax = int(max(ys) * h)

            pad = 30
            xmin = max(0, xmin - pad)
            ymin = max(0, ymin - pad)
            xmax = min(w, xmax + pad)
            ymax = min(h, ymax + pad)

            # Draw box
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

            # Crop
            crop = frame[ymin:ymax, xmin:xmax]

            if crop.size > 0:
                crop_96 = resize_with_padding(crop)

                # Show resized hand
                cv2.imshow("Hand 224x224", crop_96)

                # Prepare input for model
                inp = cv2.cvtColor(crop_96, cv2.COLOR_BGR2RGB)
                inp = inp.astype(np.float32) / 255.0
                inp = np.expand_dims(inp, axis=0)

                # Predict
                prediction = model.predict(inp)[0]

                # Get predicted class
                cls = np.argmax(prediction)
                conf = prediction[cls]

                # Display prediction
                text = f"Class: {class_names[cls]}  ({conf:.2f})"
                cv2.putText(frame, text, (xmin, ymin - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

                # Draw landmarks
                mp_draw.draw_landmarks(
                    frame, hand_landmarks, mp_hands.HAND_CONNECTIONS
                )
            

    cv2.imshow("Live", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


29
Loading Keras model...


I0000 00:00:1766080902.949868   30210 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1766080902.951658  102747 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.1.5-1pop0~1753463422~24.04~8af185e), renderer: Mesa Intel(R) UHD Graphics (CML GT2)
W0000 00:00:1766080902.974823  102742 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1766080902.989356  102738 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Model loaded!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 989ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0