In [None]:
import os
import cv2
import glob
import shutil
import xml.etree.ElementTree as ET
from xml.dom import minidom

# Change these to your source directories.
source_images_dir = r"C:\Users\cmara\Dropbox (Personal)\Bark_Beetle_Images\Object_detection_reviewed_images\images"         # Root folder containing subdirectories with .png files.
source_ann_dir = r"C:\Users\cmara\Dropbox (Personal)\Bark_Beetle_Images\Object_detection_reviewed_images\annotations"         # Root folder containing subdirectories with .txt files.

# Set up your output dataset structure.
output_root = "/path/to/your/formatted_dataset"  # e.g., a new folder named "formatted_dataset"
output_images_dir = os.path.join(output_root, "images")
output_ann_dir = os.path.join(output_root, "annotations")

os.makedirs(output_images_dir, exist_ok=True)
os.makedirs(output_ann_dir, exist_ok=True)

# Find all .png images recursively.
image_files = sorted(glob.glob(os.path.join(source_images_dir, '**/*.png'), recursive=True))

# Create the root element for CVAT XML.
root = ET.Element("annotations")
version = ET.SubElement(root, "version")
version.text = "1.1"

meta = ET.SubElement(root, "meta")
task = ET.SubElement(meta, "task")
ET.SubElement(task, "id").text = "1"
ET.SubElement(task, "name").text = "Converted YOLO Annotations"
ET.SubElement(task, "size").text = str(len(image_files))
ET.SubElement(task, "mode").text = "annotation"
ET.SubElement(task, "overlap").text = "0"
ET.SubElement(task, "bugtracker")
ET.SubElement(task, "created").text = "2025-03-04"
ET.SubElement(task, "updated").text = "2025-03-04"

# Define label(s). In this example, we map YOLO class "0" to "object".
labels = ET.SubElement(task, "labels")
label_elem = ET.SubElement(labels, "label")
ET.SubElement(label_elem, "name").text = "object"

# Process each image.
for idx, img_path in enumerate(image_files):
    # Compute the relative path from the source images root.
    rel_path = os.path.relpath(img_path, source_images_dir)  # e.g., "subfolder1/img.png"
    
    # Destination path in the output images folder.
    dest_img_path = os.path.join(output_images_dir, rel_path)
    os.makedirs(os.path.dirname(dest_img_path), exist_ok=True)
    shutil.copy2(img_path, dest_img_path)

    # Read the image to get its dimensions.
    image = cv2.imread(img_path)
    if image is None:
        print(f"Warning: Could not read {img_path}. Skipping.")
        continue
    height, width, _ = image.shape

    # In the annotation XML, set the image "name" relative to the output folder.
    # Here we use "images/..." so that when you unzip your dataset the structure is:
    # formatted_dataset/
    #   images/...
    #   annotations/annotations_cvat.xml
    image_elem = ET.SubElement(root, "image", id=str(idx), name=os.path.join("images", rel_path),
                               width=str(width), height=str(height))

    # Construct the corresponding annotation file path.
    ann_rel_path = os.path.splitext(rel_path)[0] + ".txt"
    source_ann_path = os.path.join(source_ann_dir, ann_rel_path)
    
    if not os.path.exists(source_ann_path):
        print(f"Warning: No annotation file for {rel_path}.")
        continue

    # Read and convert annotation(s) from YOLO format.
    with open(source_ann_path, "r") as f:
        lines = f.readlines()

    for line in lines:
        parts = line.strip().split()
        if len(parts) != 5:
            continue
        class_id, x_center_norm, y_center_norm, w_norm, h_norm = parts

        # Convert normalized coordinates to absolute pixel values.
        x_center = float(x_center_norm) * width
        y_center = float(y_center_norm) * height
        box_width = float(w_norm) * width
        box_height = float(h_norm) * height

        x_min = x_center - box_width / 2
        y_min = y_center - box_height / 2
        x_max = x_center + box_width / 2
        y_max = y_center + box_height / 2

        ET.SubElement(image_elem, "box",
                      label="object",
                      xtl=str(x_min),
                      ytl=str(y_min),
                      xbr=str(x_max),
                      ybr=str(y_max))

# Pretty-print the XML.
xml_str = ET.tostring(root, encoding="utf-8")
parsed_xml = minidom.parseString(xml_str)
pretty_xml = parsed_xml.toprettyxml(indent="  ")

# Save the annotation file in the output annotations folder.
output_xml_path = os.path.join(output_ann_dir, "annotations_cvat.xml")
with open(output_xml_path, "w") as f:
    f.write(pretty_xml)

print("Conversion complete.")
print(f"Images are copied to: {output_images_dir}")
print(f"Annotations XML is saved to: {output_xml_path}")
