# Hand Landmarks Dection with MediaPipe Tasks Python API

## 1. Install Dependencies

In [1]:
!pip install -q mediapipe==0.10.0 opencv-python

## 2. Download model
Then download an off-the-shelf model bundle. Check out the [MediaPipe documentation](https://developers.google.com/mediapipe/solutions/vision/hand_landmarker#models) for more information about this model bundle.

In [2]:
!wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task

wget: /Users/gmx/.netrc:1: unknown token "-e"


## 3. Import the necessary modules

In [1]:
# Import the necessary modules
import cv2
import mediapipe as mp
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np

In [8]:

BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
HandLandmarkerResult = mp.tasks.vision.HandLandmarkerResult
VisionRunningMode = mp.tasks.vision.RunningMode

# options = HandLandmarkerOptions(
#     base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
#     num_hands=2)


cap = cv2.VideoCapture(0) # VideoCapture()参数时0 表示打开笔记本内置摄像头
RESULT = None
timestamp = 0

MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def draw_landmarks_on_image(rgb_image, detection_result):
    hand_landmarks_list = detection_result.hand_landmarks
    
    handedness_list = detection_result.handedness
    annotated_image = np.copy(rgb_image)

    # Loop through the detected hands to visualize.
    for idx in range(len(hand_landmarks_list)):
        hand_landmarks = hand_landmarks_list[idx]
        handedness = handedness_list[idx]

        # Draw the hand landmarks.
        hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        hand_landmarks_proto.landmark.extend([
          landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
        ])
        solutions.drawing_utils.draw_landmarks(
            annotated_image,
            hand_landmarks_proto,
            solutions.hands.HAND_CONNECTIONS,
            solutions.drawing_styles.get_default_hand_landmarks_style(),
            solutions.drawing_styles.get_default_hand_connections_style())
        # Get the top left corner of the detected hand's bounding box.
        height, width, _ = annotated_image.shape
        x_coordinates = [landmark.x for landmark in hand_landmarks]
        y_coordinates = [landmark.y for landmark in hand_landmarks]
        text_x = int(min(x_coordinates) * width)
        text_y = int(min(y_coordinates) * height) - MARGIN

        # Draw handedness (left or right hand) on the image.
        cv2.putText(annotated_image, f"{handedness[0].category_name}",
            (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
            FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)
    
    return annotated_image


# Create a hand landmarker instance with the live stream mode:
def print_result(result: HandLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
    global RESULT
    RESULT = result
#     print('hand landmarker result: {}'.format(RESULT))

options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result,
    num_hands=2)

In [9]:
detector = HandLandmarker.create_from_options(options)

# The detector is initialized. Use it here.
while cap.isOpened(): # Capture frame-by-frame, 检查视频捕获是否成功
    ret, frame = cap.read() # 读取视频帧，ret为bool类型表示是否成功
    if not ret:
        print("Ignoring empty frmae")
        break
    timestamp += 1
        
    # Flip on horizontal 图像水平翻转，镜像
    frame = cv2.flip(frame, 1)
        
    # frame_timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))  
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

    # Detections
    detector.detect_async(mp_image,timestamp)
        
    # Rendering results
    if(not (RESULT is None)):
        annotated_image = draw_landmarks_on_image(mp_image.numpy_view(), RESULT)
        cv2.imshow('Hand Tracking', annotated_image) # 在窗口中显示图像
#         print("showing detected image")
            
        # Save our image    
        # cv2.imwrite(os.path.join('Output Images', '{}.jpg'.format(uuid.uuid1())), annotated_image)
          
    else:
        cv2.imshow('Hand Tracking', frame)
    # 每隔10ms读取键盘按键值
#     if cv2.waitKey(5) & 0xFF == ord('q'):
    if cv2.waitKey(5) == 27:
        print("Closing Camera Stream")  
        break
            

detector.close()
cap.release() # 关闭视频
cv2.destroyAllWindows() #关闭所有已打开的窗口

showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing detected image
showing det