In [1]:
from ultralytics import YOLO
import cv2
import numpy as np

In [2]:
model = YOLO("yolov8m-pose.pt")

In [3]:
cap = cv2.VideoCapture(0)

In [4]:
recording = False
playing = False
recorded_keypoints = []
current_playback_index = 0
frame_width, frame_height = 640, 480

In [6]:
ret, frame = cap.read()
if ret:
    frame_height, frame_width = frame.shape[:2]

In [7]:
connections = [
    [0, 1], [0, 2], [1, 3], [2, 4],                # Head
    [5, 6], [5, 7], [7, 9], [6, 8], [8, 10],       # Arms
    [11, 12], [5, 11], [6, 12],                    # Shoulders to hips
    [11, 13], [13, 15], [12, 14], [14, 16]         # Legs
]

In [None]:
while True:
    if not playing:
        # Live mode - capture frame
        ret, frame = cap.read()
        if not ret:
            break
            
        # Run pose estimation
        results = model(frame, device=0, verbose=False)
        annotated_frame = results[0].plot()
        
        # If recording, store keypoints
        if recording and results[0].keypoints is not None:
            if len(results[0].keypoints.xy) > 0:
                # Store first person's keypoints (normalized)
                kpts = results[0].keypoints.xyn[0].cpu().numpy()
                recorded_keypoints.append(kpts)
            else:
                recorded_keypoints.append(None)  # No person detected
        
        # Show recording status
        if recording:
            cv2.putText(annotated_frame, "RECORDING", (10, 30), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            
        cv2.imshow("YOLOv8 Pose Skeleton Mimic", annotated_frame)
    else:
        # Playback mode - show recorded movements
        if current_playback_index < len(recorded_keypoints):
            # Create blank canvas
            canvas = np.zeros((frame_height, frame_width, 3), dtype=np.uint8)
            
            # Get keypoints for current frame
            kpts_norm = recorded_keypoints[current_playback_index]
            
            if kpts_norm is not None:
                # Convert normalized coordinates to pixel values
                kpts_pixel = (kpts_norm * (frame_width, frame_height)).astype(int)
                
                # Draw skeleton connections
                for connection in connections:
                    start_idx, end_idx = connection
                    if 0 <= start_idx < 17 and 0 <= end_idx < 17:
                        start = tuple(kpts_pixel[start_idx])
                        end = tuple(kpts_pixel[end_idx])
                        # Only draw if both points are valid
                        if (0 < start[0] < frame_width and 0 < start[1] < frame_height and
                            0 < end[0] < frame_width and 0 < end[1] < frame_height):
                            cv2.line(canvas, start, end, (0, 255, 0), 2)
                
                # Draw keypoints
                for kp in kpts_pixel:
                    if 0 < kp[0] < frame_width and 0 < kp[1] < frame_height:
                        cv2.circle(canvas, tuple(kp), 5, (0, 0, 255), -1)
            
            # Show playback status
            cv2.putText(canvas, f"PLAYBACK: {current_playback_index+1}/{len(recorded_keypoints)}", 
                        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
            
            cv2.imshow("YOLOv8 Pose Skeleton Mimic", canvas)
            current_playback_index += 1
        else:
            # End of playback
            playing = False
            current_playback_index = 0

    # Key controls
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
    elif key == ord('r'):
        # Toggle recording
        recording = not recording
        if recording:
            recorded_keypoints = []  # Clear previous recording
            print("Recording started...")
        else:
            print(f"Recording finished. {len(recorded_keypoints)} frames captured.")
    elif key == ord('p'):
        # Start playback if we have a recording
        if recorded_keypoints:
            playing = True
            current_playback_index = 0
            print("Starting playback...")
        else:
            print("Record something first!")

In [None]:
cap.release()
cv2.destroyAllWindows()