### Code to find Table coordinates based on yolo

In [1]:
import cv2
import numpy as np
from ultralytics import YOLO
import torch

In [9]:
def yolo_on_video(model, video, start_frame, end_frame):
    cap = cv2.VideoCapture(video)
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    frame_num = start_frame
    all_results = []
    while True:
        ret, frame = cap.read()
        if not ret or frame_num > end_frame:
            break

        # Run YOLO inference
        if torch.cuda.is_available():
            results = model(frame, stream=True, device="cuda")
        else:
            results = model(frame, stream=True, device="cpu")

        for r in results:
            all_results.append(r)
            annotated_frame = r.plot()
            cv2.imshow("YOLO Pose - Full", annotated_frame)

            keypoints = r.keypoints.cpu().numpy()  # (num_instances, num_keypoints, 3)
            if len(keypoints) > 0:
                table_corners = keypoints[0][:, :2]  # first instance, all keypoints, x,y only
                print(f"Frame {frame_num}: Table corners (normalized): {table_corners}")

        frame_num += 1

        # Exit on 'q'
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()
    return all_results


In [3]:
def average_results(results):
    if len(results) == 0:
        print("No results to average")
        return None

    sum_corners = np.zeros((4, 2), dtype=np.float32)
    count = 0

    for r in results:
        keypoints = r.keypoints.cpu().numpy()
        table_corners = None

        # Case 1: use keypoints if available
        if len(keypoints) > 0:
            candidate = keypoints[0][:, :2]   # take first detection (x, y only)
            if candidate.shape == (4, 2):
                table_corners = candidate

        # Case 2: fallback to bounding box
        if table_corners is None:
            if hasattr(r, "boxes") and len(r.boxes) > 0:
                box = r.boxes[0].xyxy.cpu().numpy()[0]  # (x1, y1, x2, y2)
                x1, y1, x2, y2 = box
                table_corners = np.array([
                    [x1, y1],  # top-left
                    [x2, y1],  # top-right
                    [x2, y2],  # bottom-right
                    [x1, y2],  # bottom-left
                ], dtype=np.float32)

        # Add if we have valid corners
        if table_corners is not None and table_corners.shape == (4, 2):
            sum_corners += table_corners
            count += 1

    if count == 0:
        return None

    avg_corners = sum_corners / count
    return avg_corners


In [4]:
def annotate_frame_with_table(frame, table_corners):
    if table_corners is None:
        return frame  # nothing to draw
    
    # Convert to integer pixel coordinates
    pts = np.int32(table_corners).reshape((-1, 1, 2))

    annotated = frame.copy()
    cv2.polylines(annotated, [pts], isClosed=True, color=(0, 255, 0), thickness=3)

    return annotated


In [10]:
model_path = "TableDetection.pt"
video_path = "../Videos/game_5.mp4"

model = YOLO(model_path)  # load an official model
start_frame = 0
end_frame = 50
results = yolo_on_video(model, video_path, start_frame, end_frame)
avg_corners = average_results(results)
print(f"Average Table corners (normalized): {avg_corners}")
if avg_corners is not None:
    print("Corners being drawn:", )
    for corner in avg_corners:
        print(f"({corner[0]:.2f}, {corner[1]:.2f})")
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    ret, frame = cap.read()
    if ret:
        annotated = annotate_frame_with_table(frame, avg_corners)
        cv2.imshow("Average Table Position", annotated)
        cv2.waitKey(0)
    cap.release()
    cv2.destroyAllWindows()
else:
    print("No valid table corners detected in any frame.")


0: 384x640 3 tables, 23.4ms
Frame 0: Table corners (normalized): ultralytics.engine.results.Keypoints object with attributes:

conf: array([[    0.54758,     0.45201]], dtype=float32)
data: array([[[     1119.2,      603.22,     0.54758],
        [     838.37,      838.99,     0.45201]]], dtype=float32)
has_visible: True
orig_shape: (1080, 1920)
shape: (1, 2, 3)
xy: array([[[     1119.2,      603.22],
        [     838.37,      838.99]]], dtype=float32)
xyn: array([[[    0.58291,     0.55854],
        [    0.43665,     0.77684]]], dtype=float32)
Speed: 4.7ms preprocess, 23.4ms inference, 7.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 tables, 20.3ms
Frame 1: Table corners (normalized): ultralytics.engine.results.Keypoints object with attributes:

conf: array([[    0.53165,     0.44068]], dtype=float32)
data: array([[[     1019.6,      589.94,     0.53165],
        [      770.6,      818.21,     0.44068]]], dtype=float32)
has_visible: True
orig_shape: (1080, 1920)
s

In [None]:
model_corners = [[353.24, 494.26], [353.24, 679.19], [1431.2, 679.19], [1431.2, 494.26]]
model_corners1 = [[495, 501], [263, 657], [1457, 692], [1305, 524]]
actual_corners = [[509, 525], [303, 644], [1437, 664], [1293, 539]]

model_corners_np = np.array(model_corners1)
actual_corners_np = np.array(actual_corners)
sse = np.sum((model_corners_np - actual_corners_np) ** 2)
print("Sum of squared errors:", sse)

Sum of squared errors: 4094
