La détection de cette silhouette constitue une première étape pour manipuler un avatar, en fournissant les points essentiels pour l'animation dans Unity ou Blender.

In [2]:
import cv2
import mediapipe as mp
import numpy as np
import ipywidgets as widgets
from IPython.display import display

mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5)

video_path = 'video.mov'
cap = cv2.VideoCapture(video_path)

image_widget = widgets.Image(format='jpeg')
display(image_widget)

def draw_pose_and_hands_landmarks(pose_landmarks, hand_landmarks_list, image_shape):
    image_height, image_width = image_shape[:2]
    canvas = np.ones((image_height, image_width, 3), dtype=np.uint8) * 255  # fond blanc

    connections = [
        (mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.LEFT_ELBOW),
        (mp_pose.PoseLandmark.LEFT_ELBOW, mp_pose.PoseLandmark.LEFT_WRIST),
        (mp_pose.PoseLandmark.RIGHT_SHOULDER, mp_pose.PoseLandmark.RIGHT_ELBOW),
        (mp_pose.PoseLandmark.RIGHT_ELBOW, mp_pose.PoseLandmark.RIGHT_WRIST),
        (mp_pose.PoseLandmark.LEFT_SHOULDER, mp_pose.PoseLandmark.RIGHT_SHOULDER),  # Clavicule
        (mp_pose.PoseLandmark.NOSE, mp_pose.PoseLandmark.LEFT_SHOULDER),
        (mp_pose.PoseLandmark.NOSE, mp_pose.PoseLandmark.RIGHT_SHOULDER)
    ]

    points = {}
    for landmark in mp_pose.PoseLandmark:
        if landmark in mp_pose.PoseLandmark and pose_landmarks.landmark[landmark].visibility > 0.5:
            x = int(pose_landmarks.landmark[landmark].x * image_width)
            y = int(pose_landmarks.landmark[landmark].y * image_height)
            points[landmark] = (x, y)
            cv2.circle(canvas, (x, y), 5, (0, 0, 255), -1) 

    for connection in connections:
        if connection[0] in points and connection[1] in points:
            pt1 = points[connection[0]]
            pt2 = points[connection[1]]
            cv2.line(canvas, pt1, pt2, (0, 0, 255), 2) 

    if hand_landmarks_list:
        for hand_landmarks in hand_landmarks_list:
            for i, landmark in enumerate(hand_landmarks.landmark):
                x = int(landmark.x * image_width)
                y = int(landmark.y * image_height)
                cv2.circle(canvas, (x, y), 5, (0, 255, 0), -1)  

            fingers = [
                (mp_hands.HandLandmark.THUMB_CMC, mp_hands.HandLandmark.THUMB_TIP),
                (mp_hands.HandLandmark.INDEX_FINGER_MCP, mp_hands.HandLandmark.INDEX_FINGER_TIP),
                (mp_hands.HandLandmark.MIDDLE_FINGER_MCP, mp_hands.HandLandmark.MIDDLE_FINGER_TIP),
                (mp_hands.HandLandmark.RING_FINGER_MCP, mp_hands.HandLandmark.RING_FINGER_TIP),
                (mp_hands.HandLandmark.PINKY_MCP, mp_hands.HandLandmark.PINKY_TIP)
            ]
            
            for finger in fingers:
                pt1 = (int(hand_landmarks.landmark[finger[0]].x * image_width), int(hand_landmarks.landmark[finger[0]].y * image_height))
                pt2 = (int(hand_landmarks.landmark[finger[1]].x * image_width), int(hand_landmarks.landmark[finger[1]].y * image_height))
                cv2.line(canvas, pt1, pt2, (0, 255, 0), 2)

    return canvas

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results_pose = pose.process(frame_rgb)
    results_hands = hands.process(frame_rgb)

    if results_pose.pose_landmarks or results_hands.multi_hand_landmarks:
        canvas = draw_pose_and_hands_landmarks(
            results_pose.pose_landmarks, 
            results_hands.multi_hand_landmarks, 
            frame.shape
        )

        _, jpeg = cv2.imencode('.jpeg', canvas)
        image_widget.value = jpeg.tobytes()

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()

I0000 00:00:1726755637.523907 2804004 gl_context.cc:357] GL version: 2.1 (2.1 ATI-5.5.17), renderer: AMD Radeon Pro 5500M OpenGL Engine
I0000 00:00:1726755637.536853 2804004 gl_context.cc:357] GL version: 2.1 (2.1 ATI-5.5.17), renderer: AMD Radeon Pro 5500M OpenGL Engine
W0000 00:00:1726755637.555036 2813387 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Image(value=b'', format='jpeg')

W0000 00:00:1726755637.577607 2813398 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726755637.667528 2813372 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726755637.691440 2813372 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
