In [1]:
# Library to Image processing
import cv2
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# Library to GUI
import tkinter as tk
from tkinter import ttk

# Library to handle file system
from PIL import Image, ImageTk
import os
import uuid
import json
from datetime import datetime, timezone

In [2]:
# Load variables from .env file for local development or get it from the environment variables in production
try:
    from dotenv import load_dotenv
    load_dotenv()
except ImportError:
    pass  # Safe fallback if it's not available in prod

base_path = os.getenv("LSM_BASE")
if not base_path:
    raise ValueError("❌ Environment variable 'LSM_BASE' is not set!")

In [3]:
# Setup MediaPipe HandLandmarker
model_path = os.path.join(base_path, "models", "hand_landmarker.task")

BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.IMAGE,
    num_hands=2
)

hand_landmarker = HandLandmarker.create_from_options(options)


In [4]:
# Webcam setup
cap = cv2.VideoCapture(0)

# Tkinter window
root = tk.Tk()
root.title("LSM Webcam UI")

# Dropdown to select letter
selected_letter = tk.StringVar(value="A")
letter_dropdown = ttk.Combobox(root, textvariable=selected_letter, values=[chr(i) for i in range(65, 91)])
letter_dropdown.pack()

# Webcam display
label = tk.Label(root)
label.pack()

In [5]:
# Button callback
def capture_frame():
    ret, frame = cap.read()
    if ret:
        image_id = str(uuid.uuid4())
        letter = selected_letter.get()
        filename = f"{image_id}.png"
        filepath = os.path.join(base_path, "data/images", filename)
        json_path = os.path.join(base_path, "data/metadata", f"{image_id}.json")

        # Save original (raw) image without drawing
        cv2.imwrite(filepath, frame)
        print(f"✅ Saved: {filepath}")

        # Convert frame to RGB and process with MediaPipe
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        result = hand_landmarker.detect(mp_image)

        # Extract landmarks
        landmarks = {}
        if result.hand_landmarks and result.handedness:
            for i, (hand_lms, handedness) in enumerate(zip(result.hand_landmarks, result.handedness)):
                label = handedness[0].category_name.lower()  # "left" or "right"
                hand_points = [{"x": lm.x, "y": lm.y, "z": lm.z} for lm in hand_lms]
                landmarks[f"{label}_hand"] = hand_points
        else:
            print("⚠️ No hands detected.")


        # Save metadata
        metadata = {
            "image": filename,
            "letter": letter,
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "landmarks": landmarks
        }

        with open(json_path, "w") as f:
            json.dump(metadata, f, indent=2)

        # Determine which hands were stored
        hands_detected = list(landmarks.keys())
        if hands_detected:
            print(f"📝 Metadata saved for letter '{letter}' with: {', '.join(hands_detected)}.")
        else:
            print(f"📝 Metadata saved for letter '{letter}', but no hands were detected.")


In [None]:
btn = tk.Button(root, text="Capture", command=capture_frame)
btn.pack()

# Drawing connections
HAND_CONNECTIONS = [
    (0,1), (1,2), (2,3), (3,4),         # thumb
    (0,5), (5,6), (6,7), (7,8),         # index
    (5,9), (9,10), (10,11), (11,12),    # middle
    (9,13), (13,14), (14,15), (15,16),  # ring
    (13,17), (17,18), (18,19), (19,20), # pinky
    (0,17)                              # palm span
]

# Update loop for webcam with landmarks preview
def show_frame():
    ret, frame = cap.read()
    if ret:
        # Copy frame for drawing
        draw_frame = frame.copy()
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

        result = hand_landmarker.detect(mp_image)

        if result.hand_landmarks:
            h, w = frame.shape[:2]
            for hand_landmarks in result.hand_landmarks:
                h, w = frame.shape[:2]
                if result.hand_landmarks and result.handedness:
                    for i, (hand_lms, handedness) in enumerate(zip(result.hand_landmarks, result.handedness)):
                        label_text = handedness[0].category_name
                        color = (0, 255, 0) if label_text == "Right" else (0, 128, 255)

                        # Draw label near wrist
                        wrist = hand_lms[0]
                        wx, wy = int(wrist.x * w), int(wrist.y * h)
                        cv2.putText(draw_frame, label_text, (wx - 10, wy + 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

                        # Draw connections and landmarks
                        for connection in HAND_CONNECTIONS:
                            start = hand_lms[connection[0]]
                            end = hand_lms[connection[1]]
                            x1, y1 = int(start.x * w), int(start.y * h)
                            x2, y2 = int(end.x * w), int(end.y * h)
                            cv2.line(draw_frame, (x1, y1), (x2, y2), color, 2)

                        for lm in hand_lms:
                            cx, cy = int(lm.x * w), int(lm.y * h)
                            cv2.circle(draw_frame, (cx, cy), 4, (255, 255, 255), -1)


        # Convert to display
        display_frame = cv2.cvtColor(draw_frame, cv2.COLOR_BGR2RGB)
        img = ImageTk.PhotoImage(Image.fromarray(display_frame))
        label.config(image=img)
        label.image = img

    root.after(10, show_frame)

In [7]:
show_frame()
root.mainloop()
cap.release()

✅ Saved: D:/Dev/Python/MNA/Mexican Sign Language\data/images\f57b2b9b-9d5b-4c85-8bd2-955a3e37cac1.png
📝 Metadata saved for letter 'A' with: left_hand.
✅ Saved: D:/Dev/Python/MNA/Mexican Sign Language\data/images\ca9b47b9-c78e-4c73-99a6-31cc9b6ad5eb.png
📝 Metadata saved for letter 'C' with: right_hand.
✅ Saved: D:/Dev/Python/MNA/Mexican Sign Language\data/images\07bd687f-69d1-45c8-99d0-69885796ce31.png
📝 Metadata saved for letter 'F' with: right_hand, left_hand.
✅ Saved: D:/Dev/Python/MNA/Mexican Sign Language\data/images\c8728045-a9f5-4f5e-ace0-cf4ce9420881.png
⚠️ No hands detected.
📝 Metadata saved for letter 'F', but no hands were detected.
