In [1]:
import cv2
import mediapipe as mp
import numpy as np
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.tasks.python.vision import GestureRecognizer, GestureRecognizerOptions, RunningMode, GestureRecognizerResult
from mediapipe.tasks.python.core.base_options import BaseOptions
from mediapipe.framework.formats import landmark_pb2

In [2]:
model_path = "gesture_recognizer.task"

LANDMARK_NAMES = [
    "WRIST", "THUMB CMC", "THUMB MCP", "THUMB IP", "THUMB TIP",
    "INDEX FINGER MCP", "INDEX FINGER PIP", "INDEX FINGER DIP", "INDEX FINGER TIP",
    "MIDDLE FINGER MCP", "MIDDLE FINGER PIP", "MIDDLE FINGER DIP", "MIDDLE FINGER TIP",
    "RING FINGER MCP", "RING FINGER PIP", "RING FINGER DIP", "RING FINGER TIP",
    "PINKY MCP", "PINKY PIP", "PINKY DIP", "PINKY TIP"
]

In [9]:
def landmarkdisplay(result, fheight=900, fwidth=1000):
    img = np.zeros((fheight, fwidth, 3), dtype=np.uint8)
    img.fill(255)

    y_offset = 10
    line_height = 16
    col1_x = 10
    col2_x = 450

    font = cv2.FONT_HERSHEY_PLAIN
    font_scale = 1.0
    small_font = 0.8

    colors = {
        "Maroon": (0, 0, 128),
        "Dblue": (139, 0, 0),
        "Dgreen": (0, 100, 0),
        "Golden": (0, 215, 255),
        "Burnt": (0, 85, 165),
        "Black": (0, 0, 0),
        "White": (255, 255, 255),
        "Olive": (80, 80, 0)
    }

    y_offset += 10
    (text_width, text_height), _ = cv2.getTextSize("DATA", font, 1.2, 2)
    title_x = (fwidth - text_width) // 2
    cv2.putText(img, "DATA", (title_x, y_offset), font, 1.2, colors["Black"], 2)
    y_offset += 35

    if result.handedness:
        cv2.putText(img, "Handedness:", (col1_x, y_offset), font, font_scale, colors["Black"], 1)
        y_offset += 20
        x_cursor = col1_x + 20
        for i, hand in enumerate(result.handedness):
            for h in hand:
                label = h.category_name
                score = h.score
                text = f"Hand {i+1}: {label} ({score:.4f})"
                cv2.putText(img, text, (x_cursor, y_offset), font, small_font, colors["Maroon"], 1)
        y_offset += 30

    if result.gestures:
        cv2.putText(img, "Gestures:", (col1_x, y_offset), font, font_scale, colors["Black"], 1)
        y_offset += 20
        for i, gesture_list in enumerate(result.gestures):
            if gesture_list:
                x_cursor = col1_x + 20
                cv2.putText(img, f"Hand {i+1}:", (col1_x + 10, y_offset), font, small_font, colors["Dblue"], 1)
                y_offset += 20
                for gesture in gesture_list:
                    gesture_str = f"{gesture.category_name}: {gesture.score:.4f}"
                    cv2.putText(img, gesture_str, (x_cursor, y_offset), font, small_font, colors["Dgreen"], 1)
                    text_size, _ = cv2.getTextSize(gesture_str, font, small_font, 1)
                    x_cursor += text_size[0] + 20
                y_offset += 20

    landmarks_y = max(y_offset + 10, 150)
    world_landmarks_y = landmarks_y

    if result.hand_landmarks:
        cv2.putText(img, "Normalized Landmarks:", (col1_x, landmarks_y), font, font_scale, colors["Black"], 1)
        landmarks_y += 20
        for hand_idx, landmarks in enumerate(result.hand_landmarks):
            cv2.putText(img, f"Hand {hand_idx + 1}:", (col1_x, landmarks_y), font, small_font, colors["Dblue"], 1)
            landmarks_y += 15
            for i, landmark in enumerate(landmarks):
                landmark_name = LANDMARK_NAMES[i][:12].ljust(12)
                text = f"{i:02d}: {landmark_name} x={landmark.x:7.4f} y={landmark.y:7.4f} z={landmark.z:7.4f}"
                cv2.putText(img, text, (col1_x + 10, landmarks_y), font, 0.8, colors["Dgreen"], 1)
                landmarks_y += line_height

    if result.hand_world_landmarks:
        cv2.putText(img, "World Landmarks:", (col2_x, world_landmarks_y), font, font_scale, colors["Black"], 1)
        world_landmarks_y += 20
        for hand_idx, world_landmarks in enumerate(result.hand_world_landmarks):
            cv2.putText(img, f"Hand {hand_idx + 1}:", (col2_x, world_landmarks_y), font, small_font, colors["Dblue"], 1)
            world_landmarks_y += 15
            for i, landmark in enumerate(world_landmarks):
                landmark_name = LANDMARK_NAMES[i][:12].ljust(12)
                text = f"{i:02d}: {landmark_name} x={landmark.x:7.4f} y={landmark.y:7.4f} z={landmark.z:7.4f}"
                cv2.putText(img, text, (col2_x + 10, world_landmarks_y), font, 0.8, colors["Dgreen"], 1)
                world_landmarks_y += line_height

    footer_text = "Press ESC to exit"
    (footer_width, _), _ = cv2.getTextSize(footer_text, font, small_font, 1)
    footer_x = (fwidth - footer_width) // 2
    cv2.putText(img, footer_text, (footer_x, fheight - 20), font, small_font, colors["Maroon"], 1)

    return img


In [10]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path=model_path),
    running_mode=RunningMode.IMAGE,
    min_hand_detection_confidence=0.6,
    min_hand_presence_confidence=0.5,
    min_tracking_confidence=0.5,
    num_hands=2
)
recognizer = GestureRecognizer.create_from_options(options)

colors = {
    "Maroon": (0, 0, 128), #0
    "Dblue": (139, 0, 0), #1
    "Dgreen": (0, 100, 0), #2
    "Golden": (0, 215, 255), #3
    "Burnt": (0, 85, 165), #4
    "Black": (0, 0, 0), #5
    "White": (255, 255, 255), #6
    "Olive": (80, 80, 0) #7
    }
font = cv2.FONT_HERSHEY_DUPLEX
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)

    result = recognizer.recognize(mp_image)

    if result.hand_landmarks:
        for landmarks in result.hand_landmarks:
            mp_landmarks = landmark_pb2.NormalizedLandmarkList()
            
            for landmark in landmarks:
                landmark_proto = landmark_pb2.NormalizedLandmark()
                landmark_proto.x = landmark.x
                landmark_proto.y = landmark.y
                landmark_proto.z = landmark.z
                mp_landmarks.landmark.append(landmark_proto)

            mp_drawing.draw_landmarks(
                frame,
                mp_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing.DrawingSpec(color=colors["Golden"], thickness=2, circle_radius=3),
                mp_drawing.DrawingSpec(color=colors["Dgreen"], thickness=2)
            )

    if result.gestures and result.gestures[0]:
        gesture = result.gestures[0][0].category_name
        score = result.gestures[0][0].score
        cv2.putText(frame, f"Gesture: {gesture} ({score:.2f})", (10, 40),
                    font, 1, (0, 255, 255), 2)

    if result.handedness:
        for i, hand in enumerate(result.handedness):
            label = hand[0].category_name
            score = hand[0].score
            cv2.putText(frame, f"{label} ({score:.2f})", (10, 80 + 40 * i),
                        font, 1, (255, 255, 0), 2)

    detailed_display = landmarkdisplay(result)
    
    cv2.imshow("Gesture Recognizer - Camera Feed", frame)
    cv2.imshow("Detailed Landmarks Data", detailed_display)
    
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()