# Pose Estimation with camera in real time

In [1]:
import cv2
from ultralytics import YOLO

model = YOLO("yolov8n-pose.pt")
cap = cv2.VideoCapture(1)

while True:
    ok, frame = cap.read()
    if not ok: break
    r = model(frame, verbose=False)[0]
    cv2.imshow("pose", r.plot())
    if cv2.waitKey(1) == 27:  # ESC
        break

cap.release()
cv2.destroyAllWindows()


# Showing only Keypoints

Hinweis: Die Kopf-Keypoints (COCO-Indices 0–4: Nase, linkes/rechtes Auge, linkes/rechtes Ohr) werden vor dem Zeichnen ausgeblendet, indem ihre Konfidenz auf 0 gesetzt wird.

Gruppen-Definitionen (COCO 17 KP):
- head: [0–4]
- shoulders: [5,6]
- arms: [5–10]
- wrists: [9,10]
- hips: [11,12]
- legs: [11–16]
- knees: [13,14]
- ankles: [15,16]
- torso: [5,6,11,12]
- all: [0–16]

Steuerung: Passe in der folgenden Zelle die Variable HIDE_GROUPS an, z. B. ["head"], ["head","arms"], ["legs"], ["all"].

In [5]:
import cv2, numpy as np
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator

# optional: pyvirtualcam for virtual webcam output
try:
    import pyvirtualcam
    from pyvirtualcam import PixelFormat
    HAVE_PYVIRTUALCAM = True
except Exception:
    HAVE_PYVIRTUALCAM = False

model = YOLO("yolov8n-pose.pt")
cap = cv2.VideoCapture(1)
if not cap.isOpened():
    # fallback to camera 0 if 1 is not available
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        raise RuntimeError("No camera available")

# grab a first frame to determine size for the virtual camera
ok, frame = cap.read()
if not ok:
    cap.release()
    raise RuntimeError("Can't read from camera")

height, width = frame.shape[:2]

cam = None
if HAVE_PYVIRTUALCAM:
    try:
        # use RGB pixel format (pyvirtualcam expects RGB frames)
        cam = pyvirtualcam.Camera(width=width, height=height, fps=20, fmt=PixelFormat.RGB)
        print('pyvirtualcam opened:', cam.device)
    except Exception as e:
        print('Failed to open virtual camera:', e)
        cam = None

# COCO 17 keypoints: names and index groups
KEYPOINT_NAMES = [
    "nose", "left_eye", "right_eye", "left_ear", "right_ear",
    "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist",
    "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle"
]

GROUP_IDXS = {
    # head and facial
    "head": [0, 1, 2, 3, 4],
    # upper body
    "shoulders": [5, 6],
    "arms": [5, 6, 7, 8, 9, 10],
    "wrists": [9, 10],
    # lower body
    "hips": [11, 12],
    "legs": [11, 12, 13, 14, 15, 16],
    "knees": [13, 14],
    "ankles": [15, 16],
    # torso rectangle (shoulders + hips)
    "torso": [5, 6, 11, 12],
    # convenience
    "all": list(range(17)),
}

# Which groups to hide (zero confidence before drawing)
# Examples: ["head"], ["head", "arms"], ["legs"], ["all"]
HIDE_GROUPS = [""]

def merged_idxs(groups):
    s = set()
    for g in groups:
        s.update(GROUP_IDXS.get(g, []))
    return sorted(s)

HIDE_IDXS = merged_idxs(HIDE_GROUPS)

# ---------- cube drawing helpers ----------
MIN_CONF = 0.30
# Ensure points are within image bounds for drawing
def _clip_point(pt, w, h):
    x, y = int(round(pt[0])), int(round(pt[1]))
    return (max(0, min(w - 1, x)), max(0, min(h - 1, y)))

def draw_cube(img, center, u, wv, side, depth_scale=0.6, color=(0, 255, 0), thickness=2):
    """
    Draws a faux-3D cube at 'center' using oriented basis vectors u (arm direction) and
    wv (its image-plane perpendicular). 'side' is the base square side length in pixels.
    depth_scale controls the offset between the two squares.
    """
    # normalize basis just in case
    u = np.asarray(u, dtype=float)
    wv = np.asarray(wv, dtype=float)
    nu = np.linalg.norm(u) or 1.0
    nw = np.linalg.norm(wv) or 1.0
    u /= nu
    wv /= nw

    a = side / 2.0
    c = np.asarray(center, dtype=float)
    # base square corners (counter-clockwise)
    c0 = c + (-a) * u + (-a) * wv
    c1 = c + ( a) * u + (-a) * wv
    c2 = c + ( a) * u + ( a) * wv
    c3 = c + (-a) * u + ( a) * wv
    base = [c0, c1, c2, c3]

    # top square offset (simple isometric-ish offset)
    off = (u + wv) * (a * depth_scale)
    top = [p + off for p in base]

    h, w = img.shape[:2]
    base_i = [_clip_point(p, w, h) for p in base]
    top_i  = [_clip_point(p, w, h) for p in top]

    # draw squares
    for pts in (base_i, top_i):
        for i in range(4):
            p1, p2 = pts[i], pts[(i + 1) % 4]
            cv2.line(img, p1, p2, color, thickness)
    # connect corresponding corners
    for i in range(4):
        cv2.line(img, base_i[i], top_i[i], color, thickness)

while True:
    ok, frame = cap.read()
    if not ok: break

    res = model(frame, verbose=False)[0]
    canvas = np.zeros_like(frame)                     # black background
    ann = Annotator(canvas, line_width=2)

    # draw keypoints with selected groups hidden
    if hasattr(res, 'keypoints') and res.keypoints is not None:
        # iterate persons; use (17,3) [x, y, conf] so we can zero out confidences
        for kp in res.keypoints.data:                 # torch.Tensor shape (17,3)
            kpf = kp.clone()
            if HIDE_IDXS:
                kpf[HIDE_IDXS, 2] = 0.0              # hide selected groups by zeroing confidence
            ann.kpts(kpf, radius=3)                  # draw remaining kpts + bones

            # ---- draw cube on left arm (elbow->wrist) ----
            # indices: left_elbow=7, left_wrist=9 (COCO)
            ex, ey, ec = kp[7].tolist()
            wx, wy, wc = kp[9].tolist()
            if ec >= MIN_CONF and wc >= MIN_CONF:
                # arm direction and perpendicular in image plane
                v = np.array([wx - ex, wy - ey], dtype=float)
                L = np.linalg.norm(v)
                if L > 1.0:
                    u = v / L
                    wv = np.array([-u[1], u[0]], dtype=float)  # 90° rotate in image plane
                    center = ((ex + wx) / 2.0, (ey + wy) / 2.0)
                    # scale cube size with arm length (clamped)
                    side = float(np.clip(0.6 * L, 20.0, 80.0))
                    draw_cube(canvas, center, u, wv, side, depth_scale=0.7, color=(0, 255, 255), thickness=2)

    out = ann.result()

    # send to virtual camera (pyvirtualcam expects RGB order)
    if HAVE_PYVIRTUALCAM and cam is not None:
        try:
            rgb = cv2.cvtColor(out, cv2.COLOR_BGR2RGB)
            cam.send(rgb)
            cam.sleep_until_next_frame()
        except Exception as e:
            print('pyvirtualcam send failed:', e)
            HAVE_PYVIRTUALCAM = False

    # local preview
    cv2.imshow("pose", out)
    if cv2.waitKey(1) == 27: break  # ESC

cap.release()
if cam is not None:
    cam.close()
cv2.destroyAllWindows()


pyvirtualcam opened: OBS Virtual Camera
