In [3]:
pip install pascal-voc-writer pycocotools tqdm

Collecting pascal-voc-writer
  Downloading pascal_voc_writer-0.1.4-py2.py3-none-any.whl.metadata (1.3 kB)
Downloading pascal_voc_writer-0.1.4-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: pascal-voc-writer
Successfully installed pascal-voc-writer-0.1.4
Note: you may need to restart the kernel to use updated packages.


In [5]:
import os
import json
import xml.etree.ElementTree as ET
from tqdm import tqdm

def convert_voc_to_coco(voc_folder, output_json_path, label_list):
    image_id = 0
    ann_id = 0
    coco = {
        "info": {
            "description": "Dataset converted from Pascal VOC to COCO format",
            "version": "1.0"
        },
        "images": [],
        "annotations": [],
        "categories": []
    }

    label_map = {name: idx + 1 for idx, name in enumerate(label_list)}  # COCO uses category_id starting from 1

    for label, idx in label_map.items():
        coco["categories"].append({
            "id": idx,
            "name": label,
            "supercategory": "object"
        })

    for fname in tqdm(os.listdir(voc_folder)):
        if not fname.endswith(".xml"):
            continue
        xml_path = os.path.join(voc_folder, fname)
        tree = ET.parse(xml_path)
        root = tree.getroot()

        filename = root.find("filename").text
        size = root.find("size")
        width = int(size.find("width").text)
        height = int(size.find("height").text)

        coco["images"].append({
            "id": image_id,
            "file_name": filename,
            "width": width,
            "height": height
        })

        for obj in root.findall("object"):
            label = obj.find("name").text
            if label not in label_map:
                continue  # Skip unknown labels

            bndbox = obj.find("bndbox")
            xmin = float(bndbox.find("xmin").text)
            ymin = float(bndbox.find("ymin").text)
            xmax = float(bndbox.find("xmax").text)
            ymax = float(bndbox.find("ymax").text)
            w = xmax - xmin
            h = ymax - ymin

            coco["annotations"].append({
                "id": ann_id,
                "image_id": image_id,
                "category_id": label_map[label],
                "bbox": [xmin, ymin, w, h],
                "area": w * h,
                "iscrowd": 0
            })
            ann_id += 1
        image_id += 1

    with open(output_json_path, "w") as f:
        json.dump(coco, f, indent=4)
    print(f"COCO annotation saved to: {output_json_path}")

In [7]:
import os
import xml.etree.ElementTree as ET

def extract_labels(directory):
    labels = set()
    for file in os.listdir(directory):
        if file.endswith(".xml"):
            tree = ET.parse(os.path.join(directory, file))
            for obj in tree.findall("object"):
                labels.add(obj.find("name").text)
    return sorted(list(labels))


In [8]:
train_dir = "Object-detection-dataset/train"
valid_dir = "Object-detection-dataset/valid"

# Step 1: Get all unique class names from train + val
label_list = sorted(set(
    extract_labels(train_dir) + extract_labels(valid_dir)
))

# Step 2: Convert to COCO JSON
convert_voc_to_coco(train_dir, "train_coco.json", label_list)
convert_voc_to_coco(valid_dir, "valid_coco.json", label_list)


100%|██████████████████████████████████████████████████████████████████████████████| 820/820 [00:00<00:00, 1661.09it/s]


COCO annotation saved to: train_coco.json


100%|██████████████████████████████████████████████████████████████████████████████| 201/201 [00:00<00:00, 1615.37it/s]


COCO annotation saved to: valid_coco.json
