In [1]:
!pip install -q mediapipe


[notice] A new release of pip is available: 23.2.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
!wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task

'wget' is not recognized as an internal or external command,
operable program or batch file.


In [3]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2
import time
import random
import numpy

In [4]:
show_finger_dots = True  # Flag to toggle the display of green dot on index fingertip

# Import necessary MediaPipe classes for hand tracking
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
HandLandmarkerResult = mp.tasks.vision.HandLandmarkerResult
VisionRunningMode = mp.tasks.vision.RunningMode

index_finger_coords = None  # Variable to store the index finger coordinates of a hand

# Callback function to handle hand detection results
def print_result(result: HandLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
    global index_finger_coords
    if result.hand_landmarks:
        for hand_idx, hand_landmarks in enumerate(result.hand_landmarks):
            index_finger_tip = hand_landmarks[8]  # Index finger tip (landmark 8)
            index_finger_coords = (index_finger_tip.x, index_finger_tip.y)

# Configure the hand landmarker for live video input and callback on detection
options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)

# Create the hand landmarker instance
landmarker = HandLandmarker.create_from_options(options)

# Start video capture from the webcam
cap = cv2.VideoCapture(0)

# Exit if webcam is not found or cannot be opened
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Read a single frame to get image dimensions
ret, frame = cap.read()
h, w, _ = frame.shape

# Randomize target objective position
x_pix_objective = int(random.uniform(0.1, 0.9) * w) 
y_pix_objective = int(random.uniform(0.1, 0.9) * h)

points = 0  # Game score

last_toggle_time = 0  # Timestamp of the last toggle action
hover_start_time = None  # Tracks when user started hovering over the "Quit" dot

# Main loop to process video frames
while True:
    ret, frame = cap.read()  # Capture new frame from webcam
    frame = cv2.flip(frame, 1)  # Mirror the image

    # CConvert OpenCV frame to MediaPipe Image format
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
    timestamp_ms = int(time.time() * 1000)

    # Perform asynchronous hand landmark detection
    landmarker.detect_async(mp_image, timestamp_ms)

    if index_finger_coords:
        # Convert normalized coordinates to pixel coordinates
        x_pix_finger = int(index_finger_coords[0] * w)
        y_pix_finger = int(index_finger_coords[1] * h)

        # Draw a green dot on the fingertip if toggled on
        if show_finger_dots:
            cv2.circle(frame, (x_pix_finger, y_pix_finger), 10, (0, 255, 0), -1)

        # If fingertip touches red target, award a point and generate a new target
        if numpy.sqrt((x_pix_finger - x_pix_objective)**2 + (y_pix_finger - y_pix_objective)**2) < 10:
            points += 1
            x_pix_objective = int(random.uniform(0.1, 0.9) * w) 
            y_pix_objective = int(random.uniform(0.1, 0.9) * h)

        # If fingertip hovers over the toggle dot, toggle visibility of dot on fingertip with a 1-second cooldown
        if numpy.sqrt((x_pix_finger - 10)**2 + (y_pix_finger - 50)**2) < 10:
            current_time = time.time()
            if current_time - last_toggle_time >= 1:
                show_finger_dots = not show_finger_dots
                last_toggle_time = current_time

        # If fingertip hovers over the quit dot for 2+ seconds, break loop
        if numpy.sqrt((x_pix_finger - 10)**2 + (y_pix_finger - 80)**2) < 10:
            current_time = time.time()
            if hover_start_time is None:
                hover_start_time = current_time
            elif current_time - hover_start_time >= 2:
                break
        else:
            hover_start_time = None  # Reset timer if finger leaves the dot

    # Draw red target dot, toggle dot, and quit dot
    cv2.circle(frame, (x_pix_objective, y_pix_objective), 10, (0, 0, 255), -1)  # Target
    cv2.circle(frame, (10, 50), 10, (0, 0, 0), -1)  # Toggle display dot
    cv2.circle(frame, (10, 80), 10, (0, 0, 0), -1)  # Quit dot

    # Draw score text
    score_text = f"Score: {points}"
    cv2.putText(frame, score_text, org=(5, 30), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                fontScale=1, color=(0, 0, 0), thickness=2, lineType=cv2.LINE_AA)

    # Draw hitbox for toggle dot
    score_text = "Toggle dot"
    cv2.putText(frame, score_text, org=(25, 60), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                fontScale=1, color=(0, 0, 0), thickness=2, lineType=cv2.LINE_AA)

    # Draw hitbox for quit dot
    score_text = "Quit"
    cv2.putText(frame, score_text, org=(25, 90), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                fontScale=1, color=(0, 0, 0), thickness=2, lineType=cv2.LINE_AA)

    # Show the annotated frame
    cv2.imshow("Hand Landmarker", frame)

    # Allow early exit using 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Clean up resources
cap.release()
cv2.destroyAllWindows()