In [2]:
import os
import xml.etree.ElementTree as ET

def convert_voc_to_yolo(images_dir, annotations_dir, output_dir, class_name='licence'):
    os.makedirs(output_dir, exist_ok=True)
    image_files = sorted([f for f in os.listdir(images_dir) if f.endswith('.png')])

    for image_name in image_files:
        annotation_path = os.path.join(annotations_dir, image_name.replace('.png', '.xml'))
        txt_path = os.path.join(output_dir, image_name.replace('.png', '.txt'))

        tree = ET.parse(annotation_path)
        root = tree.getroot()

        img_width = int(root.find("size/width").text)
        img_height = int(root.find("size/height").text)

        with open(txt_path, "w") as f:
            for obj in root.iter('object'):
                if obj.find("name").text == class_name:
                    bndbox = obj.find("bndbox")
                    xmin = int(bndbox.find("xmin").text)
                    ymin = int(bndbox.find("ymin").text)
                    xmax = int(bndbox.find("xmax").text)
                    ymax = int(bndbox.find("ymax").text)

                    # YOLO format needs to be normalized
                    # https://docs.ultralytics.com/de/datasets/detect/#ultralytics-yolo-format
                    x_center = (xmin + xmax) / 2.0 / img_width
                    y_center = (ymin + ymax) / 2.0 / img_height
                    width = (xmax - xmin) / img_width
                    height = (ymax - ymin) / img_height

                    # class_id is 0 for license plates
                    f.write(f"0 {x_center} {y_center} {width} {height}\n")

In [None]:
convert_voc_to_yolo("../../data/dataset_small/images", "../../data/dataset_small/annotations", "../../data/dataset_small_yolo")