In [1]:
from ultralytics import YOLO
import cv2

### code to add visibility to the data points

In [2]:
import os

labels_dir = "dataset/labels/val"  # change to your labels folder

for fname in os.listdir(labels_dir):
    if not fname.endswith(".txt"):
        continue

    with open(os.path.join(labels_dir, fname)) as f:
        parts = f.read().strip().split()

    # first 5 = class + bbox
    head = parts[:5]
    coords = parts[5:]

    new_coords = []
    for i in range(0, len(coords), 2):  # x,y pairs
        x, y = coords[i], coords[i+1]
        new_coords += [x, y, "2"]  # add visibility flag

    new_line = " ".join(head + new_coords)

    with open(os.path.join(labels_dir, fname), "w") as f:
        f.write(new_line + "\n")


### Code to test new models

In [6]:
model = YOLO("runs/pose/train5/weights/best.pt")  # load an official model

cap = cv2.VideoCapture("../Videos/rallies_01.mp4")   # Replace with "video.mp4" for a file

In [7]:
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO inference
    results = model(frame, stream=True, device="cuda")  # stream=True gives generator for efficiency

    results = model(frame, device="cuda")

    for r in results:
        annotated_frame = r.plot()
        cv2.imshow("YOLO Pose - Full", annotated_frame)


    # Exit on 'q'
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


0: 384x640 1 table, 24.6ms
Speed: 3.7ms preprocess, 24.6ms inference, 4.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 table, 22.1ms
Speed: 2.9ms preprocess, 22.1ms inference, 7.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.1ms
Speed: 2.4ms preprocess, 21.1ms inference, 2.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.0ms
Speed: 2.4ms preprocess, 21.0ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.1ms
Speed: 2.4ms preprocess, 21.1ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.5ms
Speed: 2.9ms preprocess, 21.5ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 21.4ms
Speed: 3.2ms preprocess, 21.4ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 17.2ms
Speed: 2.7ms preprocess, 17.2ms inference, 1.3ms 

### table generator

In [None]:
import cv2
import numpy as np
import random
import os

# Table dimensions in a base image
W, H = 800, 400
base_corners = np.float32([[0,0],[W,0],[W,H],[0,H]])

def create_table_image():
    img = np.zeros((H, W, 3), dtype=np.uint8)
    img[:] = (0,100,0)  # dark green
    cv2.rectangle(img, (0,0), (W-1,H-1), (255,255,255), 10)  # white border
    return img

def random_perspective(img):
    h, w = img.shape[:2]
    # target quad (apply perspective distortion)
    margin = 100
    pts2 = np.float32([
        [random.randint(0,margin), random.randint(0,margin)],                 # top-left
        [w-random.randint(0,margin), random.randint(0,margin)],               # top-right
        [w-random.randint(0,margin), h-random.randint(0,margin)],             # bottom-right
        [random.randint(0,margin), h-random.randint(0,margin)]                # bottom-left
    ])
    M = cv2.getPerspectiveTransform(base_corners, pts2)
    warped = cv2.warpPerspective(img, M, (w,h))
    return warped, pts2

# Generate dataset
os.makedirs("synthetic/images", exist_ok=True)
os.makedirs("synthetic/labels", exist_ok=True)

for i in range(50):  # generate 50 synthetic samples
    table = create_table_image()
    warped, corners = random_perspective(table)

    # save image
    fname = f"synthetic_{i}.jpg"
    cv2.imwrite(f"synthetic/images/{fname}", warped)

    # write YOLO keypoints
    h, w = warped.shape[:2]
    with open(f"synthetic/labels/{fname.replace('.jpg','.txt')}", "w") as f:
        line = "0 "  # class id
        for (x,y) in corners:
            nx, ny = x/w, y/h
            line += f"{nx:.6f} {ny:.6f} 2 "
        f.write(line.strip()+"\n")
