In [11]:
import os
import json
from tqdm import tqdm

In [None]:
def coco_to_yolo(coco_json_path, output_dir):
    with open(coco_json_path, 'r') as f:
        coco_data = json.load(f)

    os.makedirs(output_dir, exist_ok=True)

    image_id_to_info = {img['id']: img for img in coco_data['images']}

    for ann in tqdm(coco_data['annotations'], desc="Converting COCO to YOLO"):
        image_id = ann['image_id']
        image_info = image_id_to_info.get(image_id)

        if not image_info:
            print(f"Warning: No image found for image_id {image_id}")
            continue

        filename = image_info['file_name']
        img_width = image_info['width']
        img_height = image_info['height']

        yolo_label_path = os.path.join(output_dir, os.path.splitext(filename)[0] + ".txt")

        x_min, y_min, x_max, y_max = ann["bbox"]

        x_min = max(0, min(x_min, img_width))
        y_min = max(0, min(y_min, img_height))
        x_max = max(0, min(x_max, img_width))
        y_max = max(0, min(y_max, img_height))

        width = x_max - x_min
        height = y_max - y_min

        if width <= 0 or height <= 0:
            print(f"Skipping annotation with invalid dimensions: {ann}")
            continue

        x_center = x_min + width / 2
        y_center = y_min + height / 2

        x_center /= img_width
        y_center /= img_height
        width /= img_width
        height /= img_height

        if not (0 <= x_center <= 1 and 0 <= y_center <= 1 and 0 <= width <= 1 and 0 <= height <= 1):
            print(f"Skipping annotation with out-of-bounds coordinates: {ann}")
            continue

        category_id = ann["category_id"]

        with open(yolo_label_path, "a") as f:
            f.write(f"{category_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

    print(f"YOLO annotations saved in {output_dir}")

In [13]:
# train
train_json = "/data/ephemeral/home/aihub/train.json"
train_dir = "/data/ephemeral/home/aihub/out/train"

coco_to_yolo(train_json, train_dir)

Converting COCO to YOLO: 100%|██████████| 20606/20606 [00:00<00:00, 28927.38it/s]

YOLO annotations saved in /data/ephemeral/home/aihub/out/train





In [14]:
# val
val_json = "/data/ephemeral/home/aihub/val.json"
val_dir = "/data/ephemeral/home/aihub/out/val"

coco_to_yolo(val_json, val_dir)

Converting COCO to YOLO:   0%|          | 0/6614 [00:00<?, ?it/s]

Converting COCO to YOLO: 100%|██████████| 6614/6614 [00:00<00:00, 30142.78it/s]

YOLO annotations saved in /data/ephemeral/home/aihub/out/val





In [15]:
# test
test_json = "/data/ephemeral/home/aihub/test.json"
test_dir = "/data/ephemeral/home/aihub/out/test"

coco_to_yolo(test_json, test_dir)

Converting COCO to YOLO:  44%|████▍     | 2893/6533 [00:00<00:00, 28926.51it/s]

Converting COCO to YOLO: 100%|██████████| 6533/6533 [00:00<00:00, 29325.84it/s]

YOLO annotations saved in /data/ephemeral/home/aihub/out/test



