##### Copyright 2023 The MediaPipe Authors. All Rights Reserved.

In [1]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Hand Landmarks Detection with MediaPipe Tasks

This notebook shows you how to use MediaPipe Tasks Python API to detect hand landmarks from images.

## Preparation

Let's start with installing MediaPipe.

In [2]:
import random
!pip install -q mediapipe

Then download an off-the-shelf model bundle. Check out the [MediaPipe documentation](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker#models) for more information about this model bundle.

In [3]:
!wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task

zsh:1: command not found: wget


## Visualization utilities

In [2]:
#@markdown We implemented some functions to visualize the hand landmark detection results. <br/> Run the following cell to activate the functions.

from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np

MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

  return annotated_image

## Download test image

Let's grab a test image that we'll use later. The image is from [Unsplash](https://unsplash.com/photos/mt2fyrdXxzk).

In [5]:
!wget -q -O image.jpg https://storage.googleapis.com/mediapipe-tasks/hand_landmarker/woman_hands.jpg

import cv2
from google.colab.patches import cv2_imshow

img = cv2.imread("image.jpg")
cv2_imshow(img)

zsh:1: command not found: wget


ModuleNotFoundError: No module named 'google.colab'

Optionally, you can upload your own image. If you want to do so, uncomment and run the cell below.

In [None]:
# from google.colab import files
# uploaded = files.upload()

# for filename in uploaded:
#   content = uploaded[filename]
#   with open(filename, 'wb') as f:
#     f.write(content)

# if len(uploaded.keys()):
#   IMAGE_FILE = next(iter(uploaded))
#   print('Uploaded file:', IMAGE_FILE)

## Running inference and visualizing the results

Here are the steps to run hand landmark detection using MediaPipe.

Check out the [MediaPipe documentation](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker/python) to learn more about configuration options that this solution supports.


In [1]:
# STEP 1: Import the necessary modules.
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# STEP 2: Create an HandLandmarker object.
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

# STEP 3: Load the input image.
image = mp.Image.create_from_file("image.jpg")

# STEP 4: Detect hand landmarks from the input image.
detection_result = detector.detect(image)

# STEP 5: Process the classification result. In this case, visualize it.
annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
cv2_imshow(cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))

I0000 00:00:1721871203.571017 1128745 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 76.3), renderer: Apple M1


RuntimeError: Unable to open file at /Users/apple/PycharmProjects/hand_landmarker/hand_landmarker.task

In [5]:

11122211212353521325

11122211212353521325

I0000 00:00:1723170204.721669 3861222 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 76.3), renderer: Apple M1
W0000 00:00:1723170204.728736 3867526 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1723170204.747830 3867528 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


KeyboardInterrupt: 

In [2]:
import math
from math import atan2, degrees
import numpy as np
import cv2
import mediapipe as mp

# Initialize video capture and MediaPipe
cap = cv2.VideoCapture(0)
mpHands = mp.solutions.hands
hands = mpHands.Hands()
mpDraw = mp.solutions.drawing_utils

# Define font settings for displaying text on the image
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 0.8
color = (255, 255, 255)
thickness = 2



# variables for thumb tip detection and distance calculation
thumb_tip = False
index_finger_tip = False
middle_finger_tip = False
ring_finger_tip = False
pinky_finger_tip = False


while True:
    success, image = cap.read()
    imageRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(imageRGB)
    
    # Draw a random dots on the image
    h, w, c = image.shape
    
    
    rx1 = 500
    ry1 = 300
    cv2.circle(image, (rx1, ry1), 5, (255, 0, 0), cv2.FILLED)
    
    rx2 = 300
    ry2 = 350
    cv2.circle(image, (rx2, ry2), 5, (255, 0, 0), cv2.FILLED)
    
    rx3 = 800
    ry3 = 100
    cv2.circle(image, (rx3, ry3), 5, (255, 0, 0), cv2.FILLED)
    
    rx4 = 650
    ry4 = 250
    cv2.circle(image, (rx4, ry4), 5, (255, 0, 0), cv2.FILLED)
    
    rx5 = 700
    ry5 = 400
    cv2.circle(image, (rx5, ry5), 5, (255, 0, 0), cv2.FILLED)
    
    # Check whether a hand is detected
    if results.multi_hand_landmarks:
        for handLms in results.multi_hand_landmarks: # Working with each hand
            for id, lm in enumerate(handLms.landmark):
                h, w, c = image.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                
                        
                if thumb_tip and id == 4:
                    cv2.circle(image, (cx, cy), 25, (0, 255, 0), cv2.FILLED)
                    
                    
                    # Get the coordinates of another point (thumb IP right here) for angle calculation
                    sx, sy = int(handLms.landmark[id - 1].x * w), int(handLms.landmark[id-1].y * h)
                        
                    # Calculate the distance between fingertip and the target dot
                    x_target_distance = rx1 - cx
                    y_target_distance = ry1 - cy
                    distance = math.sqrt(x_target_distance**2 + y_target_distance**2)
                        
                    # Calculate the angle to the target point using atan2
                    angle_to_target_in_radians = math.atan2(y_target_distance, x_target_distance)
                    angle_to_target_in_degrees = degrees(angle_to_target_in_radians)
                        
                    # Calculate the angle of the thumb tip (relative to the hand) using atan2
                    thumb_angle_in_radians = math.atan2(cy - sy, cx - sx)
                    thumb_angle_in_degrees = degrees(thumb_angle_in_radians)
                        
                    # Compute the difference in angles
                    angle_difference = angle_to_target_in_degrees - thumb_angle_in_degrees
                        
                    # Normalize the angle difference to the range [-180, 180]
                    angle_difference = (angle_difference + 180) % 360 - 180
                
                # A line that connect the fingertip and the target dot
                    cv2.line(image, (cx, cy), (rx1, ry1), (0, 255, 0), 2)
                        
                        
                
                # Display distance and angle on the image
                    thumb_text_position = (cx, cy - 40)
                    cv2.putText(image, f"distance: {distance:.2f}", thumb_text_position, font, font_scale, color, thickness)
                    angle_text_position = (cx, cy - 70)
                    cv2.putText(image, f"angle difference: {angle_difference:.2f}", angle_text_position, font, font_scale, color, thickness)
                    
                
                        
                        
                if index_finger_tip and id == 8:
                    cv2.circle(image, (cx, cy), 25, (0, 255, 0), cv2.FILLED)
                    
                    
                    # Get the coordinates of another point (index IP right here) for angle calculation
                    sx, sy = int(handLms.landmark[id - 1].x * w), int(handLms.landmark[id-1].y * h)
                        
                    # Calculate the distance between fingertip and the target dot
                    x_target_distance = rx2 - cx
                    y_target_distance = ry2 - cy
                    distance = math.sqrt(x_target_distance**2 + y_target_distance**2)
                        
                    # Calculate the angle to the target point using atan2
                    angle_to_target_in_radians = math.atan2(y_target_distance, x_target_distance)
                    angle_to_target_in_degrees = degrees(angle_to_target_in_radians)
                        
                    # Calculate the angle of the index tip (relative to the hand) using atan2
                    index_angle_in_radians = math.atan2(cy - sy, cx - sx)
                    index_angle_in_degrees = degrees(index_angle_in_radians)
                        
                    # Compute the difference in angles
                    angle_difference = angle_to_target_in_degrees - index_angle_in_degrees
                        
                    # Normalize the angle difference to the range [-180, 180]
                    angle_difference = (angle_difference + 180) % 360 - 180
                
                
                # A line that connect the fingertip and the target dot
                    cv2.line(image, (cx, cy), (rx2, ry2), (0, 255, 0), 2)
                        
                        
                
                # Display distance and angle on the image
                    index_text_position = (cx, cy - 40)
                    cv2.putText(image, f"distance: {distance:.2f}", index_text_position, font, font_scale, color, thickness)
                    angle_text_position = (cx, cy - 70)
                    cv2.putText(image, f"angle difference: {angle_difference:.2f}", angle_text_position, font, font_scale, color, thickness)
                    
                    
                    
                if middle_finger_tip and id == 12:
                    cv2.circle(image, (cx, cy), 25, (0, 255, 0), cv2.FILLED)
                    
                    
                    # Get the coordinates of another point (middle IP right here) for angle calculation
                    sx, sy = int(handLms.landmark[id - 1].x * w), int(handLms.landmark[id-1].y * h)
                        
                    # Calculate the distance between fingertip and the target dot
                    x_target_distance = rx3 - cx
                    y_target_distance = ry3 - cy
                    distance = math.sqrt(x_target_distance**2 + y_target_distance**2)
                        
                    # Calculate the angle to the target point using atan2
                    angle_to_target_in_radians = math.atan2(y_target_distance, x_target_distance)
                    angle_to_target_in_degrees = degrees(angle_to_target_in_radians)
                        
                    # Calculate the angle of the middle tip (relative to the hand) using atan2
                    middle_angle_in_radians = math.atan2(cy - sy, cx - sx)
                    middle_angle_in_degrees = degrees(middle_angle_in_radians)
                        
                    # Compute the difference in angles
                    angle_difference = angle_to_target_in_degrees - middle_angle_in_degrees
                        
                    # Normalize the angle difference to the range [-180, 180]
                    angle_difference = (angle_difference + 180) % 360 - 180
                
                
                # A line that connect the fingertip and the target dot
                    cv2.line(image, (cx, cy), (rx3, ry3), (0, 255, 0), 2)
                        
                        
                
                # Display distance and angle on the image
                    middle_text_position = (cx, cy - 40)
                    cv2.putText(image, f"distance: {distance:.2f}", middle_text_position, font, font_scale, color, thickness)
                    angle_text_position = (cx, cy - 70)
                    cv2.putText(image, f"angle difference: {angle_difference:.2f}", angle_text_position, font, font_scale, color, thickness)
                
                
                if ring_finger_tip and id == 16:
                    cv2.circle(image, (cx, cy), 25, (0, 255, 0), cv2.FILLED)
                    
                    
                    # Get the coordinates of another point (ring IP right here) for angle calculation
                    sx, sy = int(handLms.landmark[id - 1].x * w), int(handLms.landmark[id-1].y * h)
                        
                    # Calculate the distance between fingertip and the target dot
                    x_target_distance = rx4 - cx
                    y_target_distance = ry4 - cy
                    distance = math.sqrt(x_target_distance**2 + y_target_distance**2)
                        
                    # Calculate the angle to the target point using atan2
                    angle_to_target_in_radians = math.atan2(y_target_distance, x_target_distance)
                    angle_to_target_in_degrees = degrees(angle_to_target_in_radians)
                        
                    # Calculate the angle of the thumb tip (relative to the hand) using atan2
                    ring_angle_in_radians = math.atan2(cy - sy, cx - sx)
                    ring_angle_in_degrees = degrees(ring_angle_in_radians)
                        
                    # Compute the difference in angles
                    angle_difference = angle_to_target_in_degrees - ring_angle_in_degrees
                        
                    # Normalize the angle difference to the range [-180, 180]
                    angle_difference = (angle_difference + 180) % 360 - 180
                
                
                # A line that connect the fingertip and the target dot
                    cv2.line(image, (cx, cy), (rx4, ry4), (0, 255, 0), 2)
                        
                        
                
                # Display distance and angle on the image
                    ring_text_position = (cx, cy - 40)
                    cv2.putText(image, f"distance: {distance:.2f}", ring_text_position, font, font_scale, color, thickness)
                    angle_text_position = (cx, cy - 70)
                    cv2.putText(image, f"angle difference: {angle_difference:.2f}", angle_text_position, font, font_scale, color, thickness)
                
                
                if pinky_finger_tip and id == 20:
                    cv2.circle(image, (cx, cy), 25, (0, 255, 0), cv2.FILLED)
                    
                    
                    # Get the coordinates of another point (pinky IP right here) for angle calculation
                    sx, sy = int(handLms.landmark[id - 1].x * w), int(handLms.landmark[id-1].y * h)
                        
                    # Calculate the distance between fingertip and the target dot
                    x_target_distance = rx5 - cx
                    y_target_distance = ry5 - cy
                    distance = math.sqrt(x_target_distance**2 + y_target_distance**2)
                        
                    # Calculate the angle to the target point using atan2
                    angle_to_target_in_radians = math.atan2(y_target_distance, x_target_distance)
                    angle_to_target_in_degrees = degrees(angle_to_target_in_radians)
                        
                    # Calculate the angle of the thumb tip (relative to the hand) using atan2
                    pinky_angle_in_radians = math.atan2(cy - sy, cx - sx)
                    pinky_angle_in_degrees = degrees(pinky_angle_in_radians)
                        
                    # Compute the difference in angles
                    angle_difference = angle_to_target_in_degrees - pinky_angle_in_degrees
                        
                    # Normalize the angle difference to the range [-180, 180]
                    angle_difference = (angle_difference + 180) % 360 - 180
                
                
                # A line that connect the fingertip and the target dot
                    cv2.line(image, (cx, cy), (rx5, ry5), (0, 255, 0), 2)
                        
                        
                
                # Display distance and angle on the image
                    pinky_text_position = (cx, cy - 40)
                    cv2.putText(image, f"distance: {distance:.2f}", pinky_text_position, font, font_scale, color, thickness)
                    angle_text_position = (cx, cy - 70)
                    cv2.putText(image, f"angle difference: {angle_difference:.2f}", angle_text_position, font, font_scale, color, thickness)
                
                
                   
                
                
                    
                        
                
            mpDraw.draw_landmarks(image, handLms, mpHands.HAND_CONNECTIONS)
            
    
    cv2.imshow("Output", image)
    
    
    # Check for key presses
    key = cv2.waitKey(1) & 0xFF 
    if key == ord('1'):
        thumb_tip = not thumb_tip
    elif key == ord('2'):
        index_finger_tip = not index_finger_tip
    elif key == ord('3'):
        middle_finger_tip = not middle_finger_tip
    elif key == ord('4'):
        ring_finger_tip = not ring_finger_tip
    elif key == ord('5'):
        pinky_finger_tip = not pinky_finger_tip
    elif key == ord('q'):
        break
        
     # Check for window close button
    if cv2.getWindowProperty("Output", cv2.WND_PROP_VISIBLE) < 1:
        break
    
        

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1737990558.645506  236064 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 76.3), renderer: Apple M1
W0000 00:00:1737990558.659599  238258 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1737990558.670842  238258 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
