# Test YOLO Bounding Box Detection on Synthetic Images

This notebook tests whether YOLO can properly detect staff bounding boxes on synthetic images and compares them to the original bounding boxes from the dataset.

In [None]:
import json
import ast
from PIL import Image, ImageDraw
from datasets import load_dataset
from ultralytics import YOLO
import matplotlib.pyplot as plt

%matplotlib inline

## Configuration

In [None]:
# Change these values
IMAGE_IDX = 0
YOLO_MODEL_PATH = "../yolo_weigths/yolov11s_20241108.pt"

## Load Synthetic Dataset

In [None]:
print("Loading synthetic dataset...")
dataset = load_dataset("PRAIG/JAZZMUS_Synthetic", split="train")

# Get the image and annotation
image = dataset[IMAGE_IDX]["image"]
annotation_data = dataset[IMAGE_IDX]["annotation"]

print(f"Image {IMAGE_IDX} loaded. Size: {image.size}")

## Parse Original Bounding Boxes from Dataset

In [None]:
# Parse annotation
if isinstance(annotation_data, str):
    try:
        regions = json.loads(annotation_data)
    except json.JSONDecodeError:
        regions = ast.literal_eval(annotation_data)
else:
    regions = annotation_data

# Get original bounding boxes
original_boxes = []
for system in regions['systems']:
    if 'bounding_box' in system:
        bbox = system['bounding_box']
        # Convert to (x1, y1, x2, y2) format
        original_boxes.append((bbox['fromX'], bbox['fromY'], bbox['toX'], bbox['toY']))

print(f"Original bounding boxes from dataset: {len(original_boxes)}")
for i, box in enumerate(original_boxes):
    x1, y1, x2, y2 = box
    print(f"  System {i}: ({x1}, {y1}) -> ({x2}, {y2}) | size: {x2-x1}x{y2-y1}")

## Display Original Image

In [None]:
plt.figure(figsize=(15, 10))
plt.imshow(image)
plt.title(f"Original Synthetic Image {IMAGE_IDX}")
plt.axis('off')
plt.show()

## Load YOLO Model

In [None]:
print(f"Loading YOLO model from: {YOLO_MODEL_PATH}")
try:
    model = YOLO(YOLO_MODEL_PATH)
    print("✓ YOLO model loaded successfully")
    print(f"Model classes: {model.names}")
except Exception as e:
    print(f"✗ Error loading YOLO model: {e}")
    raise

## Run YOLO Inference

In [None]:
# Save image temporarily for YOLO
temp_image_path = "temp_synthetic_image.jpg"
image.save(temp_image_path)

print("Running YOLO inference...")
results = model([temp_image_path])
result = results[0]

boxes = result.boxes
names = result.names

print(f"Total detections: {len(boxes)}")
print(f"\nDetections by class:")
for cls in boxes.cls.unique():
    class_name = names[int(cls)]
    count = (boxes.cls == cls).sum().item()
    print(f"  {class_name}: {count}")

## Extract Staff Bounding Boxes from YOLO

In [None]:
# Extract staff boxes
yolo_boxes = []
for box, cls in zip(boxes.xyxy, boxes.cls):
    class_name = names[int(cls)]
    if class_name.lower() == "staff":
        x1, y1, x2, y2 = map(int, box)
        yolo_boxes.append((x1, y1, x2, y2))

# Sort by y position (top to bottom)
yolo_boxes.sort(key=lambda b: b[1])

print(f"YOLO detected staff boxes: {len(yolo_boxes)}")
for i, box in enumerate(yolo_boxes):
    x1, y1, x2, y2 = box
    print(f"  Staff {i}: ({x1}, {y1}) -> ({x2}, {y2}) | size: {x2-x1}x{y2-y1}")

## Compare Original vs YOLO Bounding Boxes

In [None]:
print("="*60)
print("COMPARISON:")
print("="*60)
print(f"Original boxes: {len(original_boxes)}")
print(f"YOLO boxes:     {len(yolo_boxes)}")

if len(original_boxes) == len(yolo_boxes):
    print("\n✓ Same number of systems detected!")
    print("\nDifferences per system:")
    for i, (orig, yolo) in enumerate(zip(original_boxes, yolo_boxes)):
        ox1, oy1, ox2, oy2 = orig
        yx1, yy1, yx2, yy2 = yolo

        print(f"\nSystem {i}:")
        print(f"  Original: ({ox1}, {oy1}) -> ({ox2}, {oy2})")
        print(f"  YOLO:     ({yx1}, {yy1}) -> ({yx2}, {yy2})")
        print(f"  Δ top-left:     ({yx1-ox1:+4d}, {yy1-oy1:+4d})")
        print(f"  Δ bottom-right: ({yx2-ox2:+4d}, {yy2-oy2:+4d})")
        print(f"  Δ size:         ({(yx2-yx1)-(ox2-ox1):+4d}, {(yy2-yy1)-(oy2-oy1):+4d})")
else:
    print("\n✗ Different number of systems detected!")

## Visualize Original Bounding Boxes

In [None]:
# Draw original boxes
img_with_orig = image.copy()
draw = ImageDraw.Draw(img_with_orig)

for i, box in enumerate(original_boxes):
    x1, y1, x2, y2 = box
    draw.rectangle([(x1, y1), (x2, y2)], outline="red", width=4)
    draw.text((x1, y1 - 30), f"Orig {i}", fill="red")

plt.figure(figsize=(15, 10))
plt.imshow(img_with_orig)
plt.title("Original Bounding Boxes (from dataset) - RED")
plt.axis('off')
plt.show()

## Visualize YOLO Bounding Boxes

In [None]:
# Draw YOLO boxes
img_with_yolo = image.copy()
draw = ImageDraw.Draw(img_with_yolo)

for i, box in enumerate(yolo_boxes):
    x1, y1, x2, y2 = box
    draw.rectangle([(x1, y1), (x2, y2)], outline="green", width=4)
    draw.text((x1, y1 - 30), f"YOLO {i}", fill="green")

plt.figure(figsize=(15, 10))
plt.imshow(img_with_yolo)
plt.title("YOLO Detected Bounding Boxes - GREEN")
plt.axis('off')
plt.show()

## Overlay Both Bounding Boxes for Direct Comparison

In [None]:
# Draw both on same image
img_comparison = image.copy()
draw = ImageDraw.Draw(img_comparison)

# Draw original in red
for i, box in enumerate(original_boxes):
    x1, y1, x2, y2 = box
    draw.rectangle([(x1, y1), (x2, y2)], outline="red", width=3)
    draw.text((x1, y1 - 50), f"Orig {i}", fill="red")

# Draw YOLO in green
for i, box in enumerate(yolo_boxes):
    x1, y1, x2, y2 = box
    draw.rectangle([(x1, y1), (x2, y2)], outline="green", width=3)
    draw.text((x1, y1 - 30), f"YOLO {i}", fill="green")

plt.figure(figsize=(15, 10))
plt.imshow(img_comparison)
plt.title("Comparison: RED = Original Dataset | GREEN = YOLO Detected")
plt.axis('off')
plt.show()

# Save it
img_comparison.save("test_bbox_comparison_overlay.jpg")
print("Saved: test_bbox_comparison_overlay.jpg")

## Compare Cropped Images - First System

In [None]:
if len(original_boxes) > 0 and len(yolo_boxes) > 0:
    # Crop first system with original bbox
    ox1, oy1, ox2, oy2 = original_boxes[0]
    cropped_orig = image.crop((ox1, oy1, ox2, oy2))

    # Crop first system with YOLO bbox
    yx1, yy1, yx2, yy2 = yolo_boxes[0]
    cropped_yolo = image.crop((yx1, yy1, yx2, yy2))

    # Display side by side
    fig, axes = plt.subplots(2, 1, figsize=(15, 8))

    axes[0].imshow(cropped_orig)
    axes[0].set_title(f"Original Dataset Bbox - Size: {cropped_orig.size}")
    axes[0].axis('off')

    axes[1].imshow(cropped_yolo)
    axes[1].set_title(f"YOLO Detected Bbox - Size: {cropped_yolo.size}")
    axes[1].axis('off')

    plt.tight_layout()
    plt.show()

    # Save them
    cropped_orig.save("test_crop_original_bbox.jpg")
    cropped_yolo.save("test_crop_yolo_bbox.jpg")
    print("Saved:")
    print("  - test_crop_original_bbox.jpg")
    print("  - test_crop_yolo_bbox.jpg")
else:
    print("No boxes to compare!")

## View YOLO's Full Detection Output

In [None]:
# Display YOLO's annotated result
result.save(filename="test_yolo_all_detections.jpg")
yolo_result_img = Image.open("test_yolo_all_detections.jpg")

plt.figure(figsize=(15, 10))
plt.imshow(yolo_result_img)
plt.title("YOLO Annotated Image (All Detections)")
plt.axis('off')
plt.show()

## Summary & Next Steps

In [None]:
print("="*60)
print("SUMMARY")
print("="*60)
print(f"\nImage Index: {IMAGE_IDX}")
print(f"Image Size: {image.size}")
print(f"\nOriginal bounding boxes: {len(original_boxes)}")
print(f"YOLO detected boxes:     {len(yolo_boxes)}")

if len(original_boxes) == len(yolo_boxes):
    print("\n✓ YOLO detected the same number of systems!")
    print("\nNext steps:")
    print("1. Check if YOLO boxes are more accurate by looking at the cropped images")
    print("2. If YOLO is better, we can regenerate all bounding boxes")
    print("3. Update the dataset or create corrected annotations")
else:
    print("\n✗ Different number of systems detected")
    print("\nNext steps:")
    print("1. Investigate why the counts differ")
    print("2. Check YOLO's confidence thresholds")
    print("3. Verify the original annotations are correct")

print("\n" + "="*60)