In [4]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load YOLOv11 pose model
model = YOLO("yolo11n-pose.pt")

# Open both webcams
cap1 = cv2.VideoCapture(0)
cap2 = cv2.VideoCapture(1)

if not cap1.isOpened() or not cap2.isOpened():
    print("Error: Cannot open one or both cameras.")
    exit()

# Assumed intrinsic matrix (same for both cameras)
K = np.array([[800, 0, 320],
              [0, 800, 240],
              [0,   0,   1]])

# Projection matrix for cam1 (origin)
P1 = K @ np.hstack((np.eye(3), np.zeros((3, 1))))

# Projection matrix for cam2 (baseline 20cm along x-axis)
baseline = 0.2  # meters
R = np.eye(3)
T = np.array([[baseline], [0], [0]])
P2 = K @ np.hstack((R, -T))  # Note the minus sign

# Main loop
while True:
    ret1, frame1 = cap1.read()
    ret2, frame2 = cap2.read()
    if not ret1 or not ret2:
        print("Error reading frames.")
        break

    # Detect keypoints
    results1 = model(frame1)
    results2 = model(frame2)

    # Extract keypoints (only first detected person assumed)
    keypoints1 = results1[0].keypoints.xy.cpu().numpy() if results1[0].keypoints else []
    keypoints2 = results2[0].keypoints.xy.cpu().numpy() if results2[0].keypoints else []

    if len(keypoints1) > 0 and len(keypoints2) > 0:
        pts1 = keypoints1[0].T  # shape: (2, N)
        pts2 = keypoints2[0].T  # shape: (2, N)

        if pts1.shape[1] == pts2.shape[1]:
            pts_4d = cv2.triangulatePoints(P1, P2, pts1, pts2)
            pts_3d = (pts_4d[:3] / pts_4d[3]).T  # shape: (N, 3)

            # Print or process 3D points
            for i, point in enumerate(pts_3d):
                print(f"3D Keypoint {i}: {point}")

    # Show annotated frames
    annotated1 = results1[0].plot()
    annotated2 = results2[0].plot()
    cv2.imshow("Cam1", annotated1)
    cv2.imshow("Cam2", annotated2)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
cap1.release()
cap2.release()
cv2.destroyAllWindows()


: 