In [2]:
import os
import json
from PIL import Image

# Paths
image_dir = "dataset/test/images"
label_dir = "dataset/val/labels"
output_json = "dataset/val/annotations.json"

# Helper function to convert YOLO format to COCO bbox
def yolo_to_coco_bbox(yolo_bbox, img_width, img_height):
    x_center, y_center, width, height = yolo_bbox
    x_center *= img_width
    y_center *= img_height
    width *= img_width
    height *= img_height
    x_min = x_center - (width / 2)
    y_min = y_center - (height / 2)
    return [x_min, y_min, width, height]

# Initialize COCO format structure
coco_format = {
    "images": [],
    "annotations": [],
    "categories": [{"id": 1, "name": "pothole"}]  # Define one category: pothole
}

annotation_id = 1
image_id = 1

# Iterate through images and labels
for image_filename in os.listdir(image_dir):
    if not image_filename.endswith(".jpg"):
        continue

    # Image file details
    image_path = os.path.join(image_dir, image_filename)
    img = Image.open(image_path)
    width, height = img.size

    # Add image info to COCO format
    coco_format["images"].append({
        "id": image_id,
        "file_name": image_filename,
        "width": width,
        "height": height
    })

    # Label file corresponding to the image
    label_filename = os.path.splitext(image_filename)[0] + ".txt"
    label_path = os.path.join(label_dir, label_filename)

    if os.path.exists(label_path):
        with open(label_path, "r") as label_file:
            for line in label_file:
                # YOLO format: class_id x_center y_center width height
                parts = line.strip().split()
                class_id = int(parts[0])
                yolo_bbox = list(map(float, parts[1:]))
                coco_bbox = yolo_to_coco_bbox(yolo_bbox, width, height)

                # Add annotation info to COCO format
                coco_format["annotations"].append({
                    "id": annotation_id,
                    "image_id": image_id,
                    "category_id": class_id + 1,  # Class IDs start from 1 in COCO
                    "bbox": coco_bbox,
                    "area": coco_bbox[2] * coco_bbox[3],
                    "iscrowd": 0
                })
                annotation_id += 1

    image_id += 1

# Save COCO annotations to JSON file
with open(output_json, "w") as f:
    json.dump(coco_format, f, indent=4)

print(f"COCO annotations saved to {output_json}")

COCO annotations saved to dataset/val/annotations.json
