In [1]:
import torch
import torchvision
import cv2
import numpy as np
from pathlib import Path
from boxmot import BoTSORT

# Load a pre-trained Keypoint R-CNN model from torchvision
device = torch.device('cpu')  # Change to 'cuda' if you have a GPU available
pose_model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
pose_model.eval().to(device)

tracker = BoTSORT(
    reid_weights=Path('osnet_x0_25_msmt17.pt'),  # ReID model to use
    device=device,
    half=False,
)

# Open the video file
vid = cv2.VideoCapture(0)

# Function to generate a unique color for each track ID
def get_color(track_id):
    np.random.seed(int(track_id))
    return tuple(np.random.randint(0, 255, 3).tolist())

while True:
    ret, im = vid.read()
    if not ret:
        break

    # Convert frame to tensor and move to device
    frame_tensor = torchvision.transforms.functional.to_tensor(im).unsqueeze(0).to(device)

    # Run the Keypoint R-CNN model to detect keypoints and bounding boxes
    with torch.no_grad():
        results = pose_model(frame_tensor)[0]

    # Extract detections (bounding boxes and keypoints)
    dets = []
    keypoints = []

    confidence_threshold = 0.5
    for i, score in enumerate(results['scores']):
        if score >= confidence_threshold:
            # Extract bounding box and score
            x1, y1, x2, y2 = results['boxes'][i].cpu().numpy()
            conf = score.item()
            cls = results['labels'][i].item()  # Assuming that 'labels' would be person class
            dets.append([x1, y1, x2, y2, conf, cls])

            # Extract keypoints
            keypoint = results['keypoints'][i].cpu().numpy().tolist()
            keypoints.append(keypoint)

    # Convert detections to a numpy array (N x (x, y, x, y, conf, cls))
    dets = np.array(dets)

    # Update tracker with detections and image
    tracks = tracker.update(dets, im)  # M x (x, y, x, y, id, conf, cls, ind)

    if len(tracks) > 0:
        inds = tracks[:, 7].astype('int')  # Get track indices as int

        # Use the indices to match tracks with keypoints
        if len(keypoints) > 0:
            keypoints = [keypoints[i] for i in inds if i < len(keypoints)]  # Reorder keypoints to match the tracks

            # Draw keypoints on the image
            for kp in keypoints:
                for point in kp:
                    x, y, confidence = int(point[0]), int(point[1]), point[2]
                    if confidence > 0.5:  # Only draw keypoints with confidence > 0.5
                        cv2.circle(im, (x, y), 3, (0, 0, 255), -1)  # Draw keypoints in red

    # Show the image (optional: draw bounding boxes and keypoints)
    for track in tracks:
        x1, y1, x2, y2, track_id, conf, cls = track[:7].astype('int')
        color = get_color(track_id)

        # Draw bounding box with unique color
        cv2.rectangle(im, (x1, y1), (x2, y2), color, 2)

        # Add text with ID, confidence, and class
        cv2.putText(im, f'ID: {track_id}, Conf: {conf:.2f}, Class: {cls}', 
                    (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Display the image
    cv2.imshow('Pose Tracking', im)

    # Break on pressing q or space
    key = cv2.waitKey(1) & 0xFF
    if key == ord(' ') or key == ord('q'):
        break

vid.release()
cv2.destroyAllWindows()


[32m2024-09-30 22:20:10.435[0m | [1mINFO    [0m | [36mboxmot.utils.torch_utils[0m:[36mselect_device[0m:[36m52[0m - [1mYolo Tracking v11.0.0 🚀 Python-3.11.5 torch-2.2.2CPU[0m
[32m2024-09-30 22:20:10.455[0m | [32m[1mSUCCESS [0m | [36mboxmot.appearance.reid_model_factory[0m:[36mload_pretrained_weights[0m:[36m183[0m - [32m[1mLoaded pretrained weights from osnet_x0_25_msmt17.pt[0m


KeyboardInterrupt: 

: 