# Pose Landmarks Detection with MediaPipe Tasks
- mediapipe.solutions already provides a complete solution for pose detection
- with mediapipe.tasks specific settings can be made and the output can be customized

Documentation: https://developers.google.com/mediapipe/solutions/vision/pose_landmarker/python


## 1. Preparation
- Install mediapipe 
- Import necessary libraries
- Store Pose Landmarker model in variable (Download Model from https://developers.google.com/mediapipe/solutions/vision/pose_landmarker/index#models)

In [1]:
# Run command in terminal: python -m pip install mediapipe
import numpy as np
import cv2
import mediapipe as mp
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

model_asset_path_heavy = r'mediapipe_models\pose_landmarker_heavy.task'
model_asset_path_full = r'mediapipe_models\pose_landmarker_full.task'
model_asset_path_lite = r'mediapipe_models\pose_landmarker_lite.task'

## 2. Function to Create Pose Landmarker

In [2]:
def draw_landmarks_on_image(rgb_image, detection_result: vision.PoseLandmarkerResult):
    pose_landmarks_list = detection_result.pose_landmarks
    annotated_image = np.copy(rgb_image)

    # Loop through the detected poses to visualize.
    for idx in range(len(pose_landmarks_list)):
        pose_landmarks = pose_landmarks_list[idx]

        # Draw the pose landmarks.
        pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        pose_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
        ])
        solutions.drawing_utils.draw_landmarks(
            annotated_image,
            pose_landmarks_proto,
            solutions.pose.POSE_CONNECTIONS,
            solutions.drawing_styles.get_default_pose_landmarks_style())
    return annotated_image



### 2.1 Function to process landmark results asynchronously

In [5]:

# Create a pose landmarker instance with the live stream mode:
def print_result(result: vision.PoseLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
    print('pose landmarker result: {}'.format(result))
    
    # Process the detection result. In this case, visualize it
    # Dirty coding, but it works
    
    # pose_landmarks_list = result.pose_landmarks
    # annotated_image = np.copy(output_image)
    # 
    # 
    #     # Loop through the detected poses to visualize.
    # for idx in range(len(pose_landmarks_list)):
    #     pose_landmarks = pose_landmarks_list[idx]
    # 
    #     # Draw the pose landmarks.
    #     pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    #     pose_landmarks_proto.landmark.extend([
    #         landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    #     ])
    #     solutions.drawing_utils.draw_landmarks(
    #         annotated_image,
    #         pose_landmarks_proto,
    #         solutions.pose.POSE_CONNECTIONS,
    #         solutions.drawing_styles.get_default_pose_landmarks_style())
    # cv2.imshow('Annotated Webcam Feed', annotated_image)


## 3. Running inference and visualizing the results
load base options
specify model (model_asset_path_heavy, model_asset_path_full or model_asset_path_lite)
open webcam
detect pose landmarks
visualize pose landmarks

In [3]:
# Create an PoseLandmarker object
base_options = python.BaseOptions(model_asset_path_heavy) # model_asset_path_heavy, model_asset_path_full or model_asset_path_lite

options = vision.PoseLandmarkerOptions(
    base_options=base_options,
    output_segmentation_masks=True)
detector = vision.PoseLandmarker.create_from_options(options)

# Open Webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Camera could not be opened.")
    exit()

while cap.isOpened():
    ret, frame = cap.read()

    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break
        

    cv2.imshow('Webcam Feed', frame)
 
 
    # Detect pose landmarks from the input image.
    img = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
    detection_result = detector.detect(img)
    
    # Process the detection result. In this case, visualize it.
    img_annotated = draw_landmarks_on_image(img.numpy_view(), detection_result)
  
    # Display annotated image
    cv2.imshow('Annotated Webcam Feed', img_annotated)

    # Press 'q' to quit
    key = cv2.waitKey(1)
    if key != -1 or cv2.getWindowProperty('Webcam Feed', cv2.WND_PROP_VISIBLE) < 1:
        break

# Clean up
cap.release()
cv2.destroyAllWindows()

### 3.1 Async Running inference and visualizing the results with LiveStream
In 3 the images of the webcam feed are processed. In this code, the Live Stream of the webcam feed is processed asynchronously. The results are printed in the print_result function, but not yet visualized.
- load base options
  - specify model (model_asset_path_heavy, model_asset_path_full or model_asset_path_lite) 
- open webcam
- detect pose landmarks
- visualize pose landmarks


In [6]:
# Create an PoseLandmarker object
base_options = python.BaseOptions(model_asset_path_heavy) # model_asset_path_heavy, model_asset_path_full or model_asset_path_lite

options = vision.PoseLandmarkerOptions(
    base_options=base_options,
    running_mode=vision.RunningMode.LIVE_STREAM,
    result_callback=print_result,
    output_segmentation_masks=True)

# detector = vision.PoseLandmarker.create_from_options(options)

with vision.PoseLandmarker.create_from_options(options) as landmarker:
    # Open Webcam
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Camera could not be opened.")
        exit()
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Can't receive frame (stream end?). Exiting ...")
            break
        
        # Get timestamp for pose landmarker task
        frame_timestamp_ms = cap.get(cv2.CAP_PROP_POS_MSEC)
        
        # Display webcam feed
        cv2.imshow('Webcam Feed', frame)
    
        # Detect pose landmarks from the input image.
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
        landmarker.detect_async(mp_image, int(frame_timestamp_ms))
    
        # Process the detection result. In this case, visualize it.
        # img_annotated = draw_landmarks_on_image(mp_image.numpy_view(), detection_result)
    
        # Display annotated image
        # cv2.imshow('Annotated Webcam Feed', img_annotated)
    
        # Press 'q' to quit
        key = cv2.waitKey(1)
        if key != -1 or cv2.getWindowProperty('Webcam Feed', cv2.WND_PROP_VISIBLE) < 1:
            break
    
    # Clean up
    cap.release()
    cv2.destroyAllWindows()


pose landmarker result: PoseLandmarkerResult(pose_landmarks=[], pose_world_landmarks=[], segmentation_masks=None)
pose landmarker result: PoseLandmarkerResult(pose_landmarks=[], pose_world_landmarks=[], segmentation_masks=None)
pose landmarker result: PoseLandmarkerResult(pose_landmarks=[[NormalizedLandmark(x=0.601465106010437, y=0.7012052536010742, z=-0.8109211921691895, visibility=0.9996911287307739, presence=0.9998039603233337), NormalizedLandmark(x=0.6059674620628357, y=0.6403745412826538, z=-0.7060367465019226, visibility=0.9993002414703369, presence=0.9997801184654236), NormalizedLandmark(x=0.6152136921882629, y=0.6397086381912231, z=-0.7067310810089111, visibility=0.9991752505302429, presence=0.9997718930244446), NormalizedLandmark(x=0.6226288676261902, y=0.6392333507537842, z=-0.7066037058830261, visibility=0.9992615580558777, presence=0.9996657371520996), NormalizedLandmark(x=0.5642940402030945, y=0.6412454843521118, z=-0.7699021697044373, visibility=0.999665379524231, presenc