In [5]:
import os
import shutil
from pathlib import Path

def copy_yolo_to_coco_structure(yolo_root, coco_root):
    yolo_root = Path(yolo_root)
    coco_root = Path(coco_root)
    splits = ["train", "val", "test"]

    image_extensions = ["*.jpg", "*.jpeg", "*.JPG", "*.JPEG"]

    for split in splits:
        dst_images = coco_root / split / "images"
        dst_images.mkdir(parents=True, exist_ok=True)

        possible_sources = [yolo_root / split / "images", yolo_root / split]

        copied = 0
        for src in possible_sources:
            if src.exists():
                for ext in image_extensions:
                    for img_file in src.glob(ext):
                        shutil.copy(str(img_file), str(dst_images / img_file.name))
                        copied += 1

        print(f"📁 Copied {copied} images for split: {split}")

    (coco_root / "annotations").mkdir(exist_ok=True)
    print(f"✅ COCO folder structure created at {coco_root}")

if __name__ == "__main__":
    YOLO_DATASET_ROOT = "/home/rshah133/bcd/dataset"
    COCO_DATASET_ROOT = "dataset_coco"

    copy_yolo_to_coco_structure(YOLO_DATASET_ROOT, COCO_DATASET_ROOT)
    print("🎯 Folder structure set up for DINO!")

📁 Copied 337 images for split: train
📁 Copied 38 images for split: val
📁 Copied 706 images for split: test
✅ COCO folder structure created at dataset_coco
🎯 Folder structure set up for DINO!


In [2]:
import json
from pathlib import Path
from PIL import Image

def convert_yolo_to_coco(images_dir, labels_dir, class_names):
    images = []
    annotations = []
    ann_id = 1
    img_id = 1
    for img_path in sorted(images_dir.glob("*.jpg")):
        label_path = labels_dir / img_path.with_suffix(".txt").name
        with Image.open(img_path) as im:
            width, height = im.size
        images.append({
            "file_name": img_path.name,
            "height": height,
            "width": width,
            "id": img_id
        })
        if label_path.exists():
            with open(label_path, 'r') as f:
                for line in f:
                    cls, x, y, w, h = map(float, line.strip().split())
                    x_min = (x - w / 2) * width
                    y_min = (y - h / 2) * height
                    annotations.append({
                        "id": ann_id,
                        "image_id": img_id,
                        "category_id": int(cls),
                        "bbox": [x_min, y_min, w * width, h * height],
                        "area": w * width * h * height,
                        "iscrowd": 0
                    })
                    ann_id += 1
        img_id += 1
    categories = [{"id": i, "name": name, "supercategory": "none"} for i, name in enumerate(class_names)]
    return {
        "images": images,
        "annotations": annotations,
        "categories": categories
    }

def convert_splits_to_coco_annotations(dataset_root, output_dir, class_names):
    for split in ["train", "val", "test"]:
        images_dir = Path(dataset_root) / split / "images"
        labels_dir = Path(dataset_root) / split / "labels"
        output_path = Path(output_dir) / f"instances_{split}.json"
        coco_data = convert_yolo_to_coco(images_dir, labels_dir, class_names)
        with open(output_path, "w") as f:
            json.dump(coco_data, f, indent=4)
        print(f"Saved {output_path}")

# Example usage:
dataset_root = "/home/rshah133/bcd/dataset_yolo"
output_dir = "/home/rshah133/bcd/dataset_coco/annotations"
class_names = ["Mass", "Spiculation", "Suspicious Calcification", "Architectural Distortion",
               "Asymmetry", "Focal Asymmetry", "Skin Thickening", "Global Asymmetry",
               "Suspicious Lymph Node", "Skin Retraction", "Nipple Retraction"]

convert_splits_to_coco_annotations(dataset_root, output_dir, class_names)


Saved /home/rshah133/bcd/dataset_coco/annotations/instances_train.json
Saved /home/rshah133/bcd/dataset_coco/annotations/instances_val.json
Saved /home/rshah133/bcd/dataset_coco/annotations/instances_test.json
