### Code to find Table coordinates based on yolo

In [6]:
import cv2
import numpy as np
from ultralytics import YOLO
import torch

In [7]:
def yolo_on_video(model, video, start_frame, end_frame):
    cap = cv2.VideoCapture(video)
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    frame_num = start_frame
    all_results = []
    while True:
        ret, frame = cap.read()
        if not ret or frame_num > end_frame:
            break

        # Run YOLO inference
        if torch.cuda.is_available():
            results = model(frame, stream=True, device="cuda")
        else:
            results = model(frame, stream=True, device="cpu")

        for r in results:
            all_results.append(r)
            annotated_frame = r.plot()
            cv2.imshow("YOLO Pose - Full", annotated_frame)

            keypoints = r.keypoints.cpu().numpy()  # (num_instances, num_keypoints, 3)
            if len(keypoints) > 0:
                table_corners = keypoints[0][:, :2]  # first instance, all keypoints, x,y only
                print(f"Frame {frame_num}: Table corners (normalized): {table_corners}")

        frame_num += 1

        # Exit on 'q'
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()
    return all_results


In [8]:
def average_results(results):
    if len(results) == 0:
        print("No results to average")
        return None

    sum_corners = np.zeros((4, 2), dtype=np.float32)
    count = 0

    for r in results:
        keypoints = r.keypoints.cpu().numpy()
        table_corners = None

        # Case 1: use keypoints if available
        if len(keypoints) > 0:
            candidate = keypoints[0][:, :2]   # take first detection (x, y only)
            if candidate.shape == (4, 2):
                table_corners = candidate

        # Case 2: fallback to bounding box
        if table_corners is None:
            if hasattr(r, "boxes") and len(r.boxes) > 0:
                box = r.boxes[0].xyxy.cpu().numpy()[0]  # (x1, y1, x2, y2)
                x1, y1, x2, y2 = box
                table_corners = np.array([
                    [x1, y1],  # top-left
                    [x2, y1],  # top-right
                    [x2, y2],  # bottom-right
                    [x1, y2],  # bottom-left
                ], dtype=np.float32)

        # Add if we have valid corners
        if table_corners is not None and table_corners.shape == (4, 2):
            sum_corners += table_corners
            count += 1

    if count == 0:
        return None

    avg_corners = sum_corners / count
    return avg_corners


In [9]:
def annotate_frame_with_table(frame, table_corners):
    if table_corners is None:
        return frame  # nothing to draw
    
    # Convert to integer pixel coordinates
    pts = np.int32(table_corners).reshape((-1, 1, 2))

    annotated = frame.copy()
    cv2.polylines(annotated, [pts], isClosed=True, color=(0, 255, 0), thickness=3)

    return annotated


In [15]:
model_path = "TableDetection.pt"
video_path = "../Videos/test_2.mp4"

model = YOLO(model_path)  # load an official model
start_frame = 0
end_frame = 50
results = yolo_on_video(model, video_path, start_frame, end_frame)
avg_corners = average_results(results)
print(f"Average Table corners (normalized): {avg_corners}")
if avg_corners is not None:
    print("Corners being drawn:")
    for corner in avg_corners:
        print(corner[0],",",corner[1])
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
    ret, frame = cap.read()
    if ret:
        annotated = annotate_frame_with_table(frame, avg_corners)
        cv2.imshow("Average Table Position", annotated)
        cv2.waitKey(0)
    cap.release()
    cv2.destroyAllWindows()
else:
    print("No valid table corners detected in any frame.")


0: 384x640 3 tables, 23.7ms
Frame 0: Table corners (normalized): ultralytics.engine.results.Keypoints object with attributes:

conf: array([[    0.58151,      0.5257]], dtype=float32)
data: array([[[     1121.1,      536.19,     0.58151],
        [     707.89,      831.28,      0.5257]]], dtype=float32)
has_visible: True
orig_shape: (1080, 1920)
shape: (1, 2, 3)
xy: array([[[     1121.1,      536.19],
        [     707.89,      831.28]]], dtype=float32)
xyn: array([[[     0.5839,     0.49647],
        [    0.36869,      0.7697]]], dtype=float32)
Speed: 3.0ms preprocess, 23.7ms inference, 5.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 tables, 20.9ms
Frame 1: Table corners (normalized): ultralytics.engine.results.Keypoints object with attributes:

conf: array([[    0.57998,     0.52794]], dtype=float32)
data: array([[[     1121.8,      537.68,     0.57998],
        [     712.03,      829.04,     0.52794]]], dtype=float32)
has_visible: True
orig_shape: (1080, 1920)
s

In [13]:
import numpy as np

# -----------------------------
# 1. Define corner coordinates
# -----------------------------
model_corners = np.array([[383.31644 , 489.0898],[1488.9855 , 489.0898],[1488.9855 , 665.79016],[ 383.31644 , 665.79016]])

actual_corners = np.array([[557,518],[354,631],[1486,659],[1339,533]])

# ------------------------------------------------
# 2. Compute table size in pixels from actual data
# ------------------------------------------------
# Pixel width (vertical direction in image)
actual_px_width = np.abs(actual_corners[1, 1] - actual_corners[0, 1])

# Pixel length (horizontal direction in image)
actual_px_length = np.abs(actual_corners[2, 0] - actual_corners[1, 0])

print(f"Actual pixel width: {actual_px_width:.2f}")
print(f"Actual pixel length: {actual_px_length:.2f}")

# ------------------------------------------------
# 3. Compute pixel differences (errors) per axis
# ------------------------------------------------
# Differences in x (horizontal) and y (vertical) directions
x_diffs = model_corners[:, 0] - actual_corners[:, 0]
y_diffs = model_corners[:, 1] - actual_corners[:, 1]

# ------------------------------------------------
# 4. Aggregate and normalize errors
# ------------------------------------------------
# Width error (horizontal)
error_width = np.abs(x_diffs[1] + x_diffs[2]) / actual_px_width

# Length error (vertical)
error_length = np.abs(y_diffs[0] + y_diffs[1]) / actual_px_length

print(f"Normalized width error: {error_width:.4f}")
print(f"Normalized length error: {error_length:.4f}")

# ------------------------------------------------
# 5. Compute overall shape error (Euclidean)
# ------------------------------------------------
SSE = np.sqrt(error_width**2 + error_length**2)
print(f"Overall shape error (SSE): {SSE:.4f}")


Actual pixel width: 113.00
Actual pixel length: 1132.00
Normalized width error: 10.0705
Normalized length error: 0.1509
Overall shape error (SSE): 10.0717


In [2]:
def bbox_iou(box1, box2):
    """
    box1 and box2 are [x1, y1, x2, y2]
    """
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    # Compute intersection area
    inter_area = max(0, x2 - x1) * max(0, y2 - y1)

    # Compute areas of each box
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])

    # Compute IoU
    iou = inter_area / float(area1 + area2 - inter_area + 1e-6)
    return iou


In [None]:
#game_5

box1 = [453.8655,572.60516,1487.3258, 785.4048]
box2 = [564, 593,1494,765]
print(bbox_iou(box1,box2))

0.7183856511393449


In [None]:
#game_1
box1 = [353.24, 494.26,1431.2, 679.19]
box2 = [509, 525, 1437, 664]
print(bbox_iou(box1,box2))

0.6404379672411821


In [None]:
#test_2
model_corners = [[383.31644 , 489.0898],[1488.9855 , 489.0898],[1488.9855 , 665.79016],[ 383.31644 , 665.79016]]
box1 = [383.31644 , 489.0898,1488.9855 , 665.79016]
actual_corners = [[557,518],[354,631],[1486,659],[1339,533]]
box2 = [557,518,1486,659]
print(bbox_iou(box1,box2))


0.6704590161952458


In [None]:
results = """
{
    game_1:{
        bbox:0.6404
        custom:0.3735
    },
    game_5:{
        bbox:0.718
        custom:0.0663
    },
    test_2:{
        bbox:0.67
        custom:10.0717
    }
}
"""