In [12]:
import os
import cv2 
import time
import numpy as np
import matplotlib.pyplot as plt

import mediapipe as mp
from mediapipe.tasks.python.vision import GestureRecognizer , GestureRecognizerOptions , RunningMode

In [13]:
model_path = "F:\Omar 3amora\Gesture Recognition\Mediapipe_solution\gesture_recognizer.task"  # Replace with your model path


options = GestureRecognizerOptions(
    base_options=mp.tasks.BaseOptions(model_asset_path=model_path),
    running_mode=RunningMode.VIDEO,
    num_hands=1
)
recognizer = GestureRecognizer.create_from_options(options)


In [14]:
cap = cv2.VideoCapture(0)
frame_counter = 0  # To keep track of timestamps

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert BGR to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Create a MediaPipe Image
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

    # Get timestamp in milliseconds (just use frame counter as dummy timestamp)
    timestamp_ms = frame_counter * 33  # Assuming ~30 FPS

    # Recognize gestures
    recognition_result = recognizer.recognize_for_video(mp_image, timestamp_ms)

    # Process result
    if recognition_result.gestures:
        top_gesture = recognition_result.gestures[0][0].category_name
        print(f"Gesture: {top_gesture}")
        cv2.putText(frame, f"Gesture: {top_gesture}", (30, 40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Show the frame
    cv2.imshow("Gesture Recognition", frame)
    frame_counter += 1

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up
Gesture: Thumb_Up


# Custom Geastures

In [None]:
import os
import cv2 
import time
import numpy as np
import matplotlib.pyplot as plt
from collections import deque

import mediapipe as mp
from mediapipe.tasks.python.vision import GestureRecognizer , GestureRecognizerOptions , RunningMode

In [2]:

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)


In [8]:

motion_history = deque(maxlen=5)

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    h, w, _ = frame.shape
    motion_text = ""
    gesture_text = ""

    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        lm = hand_landmarks.landmark

        # Wrist for motion tracking
        wrist = lm[mp_hands.HandLandmark.WRIST]
        wrist_pos = np.array([wrist.x * w, wrist.y * h])
        motion_history.append(wrist_pos)

        # Motion direction
        if len(motion_history) >= 2:
            delta = motion_history[-1] - motion_history[0]
            dx, dy = delta

            motion_threshold = 30  # pixels
            if abs(dx) > abs(dy):
                if dx > motion_threshold:
                    motion_text = "Moving Right"
                elif dx < -motion_threshold:
                    motion_text = "Moving Left"
            else:
                if dy > motion_threshold:
                    motion_text = "Moving Down"
                elif dy < -motion_threshold:
                    motion_text = "Moving Up"

        # Gesture detection
        # Define finger tip and pip indices
        finger_tips = [
            mp_hands.HandLandmark.INDEX_FINGER_TIP,
            mp_hands.HandLandmark.MIDDLE_FINGER_TIP,
            mp_hands.HandLandmark.RING_FINGER_TIP,
            mp_hands.HandLandmark.PINKY_TIP
        ]
        finger_pips = [
            mp_hands.HandLandmark.INDEX_FINGER_PIP,
            mp_hands.HandLandmark.MIDDLE_FINGER_PIP,
            mp_hands.HandLandmark.RING_FINGER_PIP,
            mp_hands.HandLandmark.PINKY_PIP
        ]

        # Check which fingers are extended
        fingers_extended = []
        for tip, pip in zip(finger_tips, finger_pips):
            if lm[tip].y < lm[pip].y:
                fingers_extended.append(True)
            else:
                fingers_extended.append(False)

        # Determine gestures
        if all(fingers_extended):
            gesture_text = "STOP"
        elif all(not f for f in fingers_extended):
            gesture_text = "MOVE BACKWARD"
        elif fingers_extended[0] and fingers_extended[1] and not fingers_extended[2] and not fingers_extended[3]:
            gesture_text = "MOVE FORWARD"  # Victory sign
        else:
            gesture_text = ""

        # Draw landmarks
        mp.solutions.drawing_utils.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # Display texts
        cv2.putText(frame, motion_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, gesture_text, (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    cv2.imshow("Hand Motion and Gestures", frame)

    # Exit on 'q'
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# Simple Simulation

In [None]:
import cv2
import numpy as np
from collections import deque
import mediapipe as mp

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# Motion Analyzer
motion_history = deque(maxlen=5)

# Video capture
cap = cv2.VideoCapture(0)

# Robot simulation initial position
robot_x, robot_y = 320, 240

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    h, w, _ = frame.shape
    motion_text = ""
    gesture_text = ""
    command = ""

    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        lm = hand_landmarks.landmark

        # Wrist for motion
        wrist = lm[mp_hands.HandLandmark.WRIST]
        wrist_pos = np.array([wrist.x * w, wrist.y * h])
        motion_history.append(wrist_pos)

        if len(motion_history) >= 2:
            delta = motion_history[-1] - motion_history[0]
            dx, dy = delta

            motion_threshold = 30
            if abs(dx) > abs(dy):
                if dx > motion_threshold:
                    motion_text = "Moving Right"
                elif dx < -motion_threshold:
                    motion_text = "Moving Left"
            else:
                if dy > motion_threshold:
                    motion_text = "Moving Down"
                elif dy < -motion_threshold:
                    motion_text = "Moving Up"

        # Gesture detection
        finger_tips = [
            mp_hands.HandLandmark.INDEX_FINGER_TIP,
            mp_hands.HandLandmark.MIDDLE_FINGER_TIP,
            mp_hands.HandLandmark.RING_FINGER_TIP,
            mp_hands.HandLandmark.PINKY_TIP
        ]
        finger_pips = [
            mp_hands.HandLandmark.INDEX_FINGER_PIP,
            mp_hands.HandLandmark.MIDDLE_FINGER_PIP,
            mp_hands.HandLandmark.RING_FINGER_PIP,
            mp_hands.HandLandmark.PINKY_PIP
        ]

        fingers_extended = []
        for tip, pip in zip(finger_tips, finger_pips):
            if lm[tip].y < lm[pip].y:
                fingers_extended.append(True)
            else:
                fingers_extended.append(False)

        if all(fingers_extended):
            gesture_text = "STOP"
            command = "STOP"
        elif all(not f for f in fingers_extended):
            gesture_text = "MOVE BACKWARD"
            command = "MOVE_BACKWARD"
        elif fingers_extended[0] and fingers_extended[1] and not fingers_extended[2] and not fingers_extended[3]:
            gesture_text = "MOVE FORWARD"
            command = "MOVE_FORWARD"
        else:
            gesture_text = ""
            command = ""

        mp.solutions.drawing_utils.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

    # Update robot position
    if command == "MOVE_FORWARD":
        robot_y -= 5
    elif command == "MOVE_BACKWARD":
        robot_y += 5
    elif motion_text == "Moving Left":
        robot_x -= 5
    elif motion_text == "Moving Right":
        robot_x += 5
    elif command == "STOP":
        pass  # No movement

    # Clamp position
    robot_x = np.clip(robot_x, 0, 640)
    robot_y = np.clip(robot_y, 0, 480)

    # Display camera frame
    cv2.putText(frame, motion_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.putText(frame, gesture_text, (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
    cv2.imshow("Hand Motion and Gestures", frame)

    # Create simulation window
    sim_frame = np.zeros((480, 640, 3), dtype=np.uint8)
    cv2.circle(sim_frame, (robot_x, robot_y), 20, (0, 255, 255), -1)
    cv2.putText(sim_frame, f"X:{robot_x} Y:{robot_y}", (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
    cv2.imshow("Robot Simulation", sim_frame)

    # Exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
