# Pose Estimation With YoloV8

In [1]:
#Librerias 
from ultralytics import YOLO 
import cv2
import torch

In [5]:
class PoseEstimator:
    def __init__(self, model):
        self.model = model

    def estimate_pose(self, image):
        # Predict pose on an image
        results = self.model.predict(image, save=False)[0]
        # Extract keypoints XYZ
        keypoints_xyz = results.keypoints.data[0][4:]
        # Show shape of keypoints in XYZ
        shape = results.keypoints[0].data[0].shape
        #print(shape)
        return keypoints_xyz

    def process_video(self, video_path):
        # Open the video file
        video_capture = cv2.VideoCapture(video_path)
        # Check if video is opened successfully
        if not video_capture.isOpened():
            print("Error: Could not open video.")
            return
        # Initialize variables
        tensors = []
        frame_number = 0
        # Read the first frame
        success, frame = video_capture.read()
        # Loop through the video frames
        while success:
            keypoints = self.estimate_pose(frame)
            tensors.append(keypoints)
            print(keypoints)
            # Read the next frame
            success, frame = video_capture.read()
            frame_number += 1
        # Release the video capture object
        video_capture.release()
        print("Number of frames processed:", frame_number)
        return tensors

In [6]:
if __name__ == "__main__":
    # Path to the input video file
    video_path = "caminando.mp4"  
    # Load Model 
    model = YOLO('yolov8n-pose.pt')
    # Object creation to use the model
    pose_estimator = PoseEstimator(model)
    # Call the function to process frames
    pose_data = pose_estimator.process_video(video_path)
    # Create the Tensor to use
    pose_tensor = torch.stack(pose_data)
    print(pose_tensor)



0: 640x384 1 person, 145.3ms
Speed: 3.5ms preprocess, 145.3ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 384)
torch.Size([17, 3])
tensor([[0.0000e+00, 0.0000e+00, 8.6713e-02],
        [6.1708e+02, 4.1625e+02, 9.9786e-01],
        [5.8589e+02, 4.0864e+02, 8.3944e-01],
        [6.2094e+02, 5.2067e+02, 9.9414e-01],
        [0.0000e+00, 0.0000e+00, 2.9199e-01],
        [5.8919e+02, 6.0856e+02, 9.7703e-01],
        [0.0000e+00, 0.0000e+00, 3.3860e-01],
        [6.0175e+02, 6.0494e+02, 9.9744e-01],
        [5.8409e+02, 5.9840e+02, 9.7698e-01],
        [5.9885e+02, 7.4088e+02, 9.9572e-01],
        [5.9296e+02, 7.2824e+02, 9.6637e-01],
        [6.0818e+02, 8.7967e+02, 9.7812e-01],
        [6.0763e+02, 8.5737e+02, 9.2516e-01]])

0: 640x384 1 person, 132.1ms
Speed: 4.8ms preprocess, 132.1ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 384)
torch.Size([17, 3])
tensor([[0.0000e+00, 0.0000e+00, 8.4197e-02],
        [6.1704e+02, 4.1630e+02, 9.9778e-01],
        [5.86

In [7]:
pose_tensor.shape

torch.Size([307, 13, 3])