# CVATtoYOLODataConverter #
his file contains an implementation of a counter that is used to convert labels from the CVAT format to the format supported by the YOLO neural network.
This conversion is necessary in order to use the tool available at:
https://app.cvat.ai
The most accurate and useful output (especially for later training of a U-Net network) is provided in the CVAT format.


In [1]:
import os
import xml.etree.ElementTree as ET

In [2]:
label_map = {
    "gland": 0
}

def parse_polygon_points(points_str):

    points = []
    for point in points_str.split(';'):
        if point.strip():
            x_str, y_str = point.split(',')
            points.append((float(x_str), float(y_str)))
    return points

def polygon_to_bbox(points):
   
    xs = [p[0] for p in points]
    ys = [p[1] for p in points]
    return min(xs), min(ys), max(xs), max(ys)

def convert_annotation(folderPath):

    xml_file = os.path.join(folderPath, 'annotations.xml')      
    labels_dir = os.path.join(folderPath, 'labels')              

    if not os.path.exists(labels_dir):
        os.makedirs(labels_dir)
    
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    for image in root.findall('image'):
        image_name = image.attrib['name']
        image_width = float(image.attrib['width'])
        image_height = float(image.attrib['height'])

        txt_file_name = os.path.splitext(image_name)[0] + ".txt"
        txt_file_path = os.path.join(labels_dir, txt_file_name)
        
        yolo_lines = []
        
        for poly in image.findall('polygon'):
            label = poly.attrib.get('label')
            if label not in label_map:
                print(f"Warning: label '{label}' is not mapped. Skipping this object.")
                continue

            class_id = label_map[label]
            points_str = poly.attrib.get('points')
            
            points = parse_polygon_points(points_str)
            xtl, ytl, xbr, ybr = polygon_to_bbox(points)
            
            x_center = (xtl + xbr) / 2.0
            y_center = (ytl + ybr) / 2.0
            box_width = xbr - xtl
            box_height = ybr - ytl

            x_center_norm = x_center / image_width
            y_center_norm = y_center / image_height
            width_norm = box_width / image_width
            height_norm = box_height / image_height
            
            yolo_line = f"{class_id} {x_center_norm:.6f} {y_center_norm:.6f} {width_norm:.6f} {height_norm:.6f}"
            yolo_lines.append(yolo_line)
        
        for box in image.findall('box'):
            label = box.attrib.get('label')
            if label not in label_map:
                print(f"Warning: label '{label}' is not mapped. Skipping this object.")   
                continue

            class_id = label_map[label]
            xtl = float(box.attrib['xtl'])
            ytl = float(box.attrib['ytl'])
            xbr = float(box.attrib['xbr'])
            ybr = float(box.attrib['ybr'])
            
            x_center = (xtl + xbr) / 2.0
            y_center = (ytl + ybr) / 2.0
            box_width = xbr - xtl
            box_height = ybr - ytl

            x_center_norm = x_center / image_width
            y_center_norm = y_center / image_height
            width_norm = box_width / image_width
            height_norm = box_height / image_height

            yolo_line = f"{class_id} {x_center_norm:.6f} {y_center_norm:.6f} {width_norm:.6f} {height_norm:.6f}"
            yolo_lines.append(yolo_line)

        with open(txt_file_path, "w") as f:
            f.write("\n".join(yolo_lines))
        
        print(f"Processed: {image_name} -> {txt_file_name}")



In [3]:
file_path = "../preprocessedData/LearnSet/anotations" 
convert_annotation(file_path)

Processed: tile_12544_53760.tif -> tile_12544_53760.txt
Processed: tile_16128_50176.tif -> tile_16128_50176.txt
Processed: tile_16128_53760.tif -> tile_16128_53760.txt
Processed: tile_17920_53760.tif -> tile_17920_53760.txt
Processed: tile_19712_57344.tif -> tile_19712_57344.txt
Processed: tile_23296_16128.tif -> tile_23296_16128.txt
Processed: tile_23296_51968.tif -> tile_23296_51968.txt
Processed: tile_25088_14336.tif -> tile_25088_14336.txt
Processed: tile_25088_25088.tif -> tile_25088_25088.txt
Processed: tile_26880_21504.tif -> tile_26880_21504.txt
Processed: tile_26880_23296.tif -> tile_26880_23296.txt
Processed: tile_28672_19712.tif -> tile_28672_19712.txt
Processed: tile_28672_21504.tif -> tile_28672_21504.txt
Processed: tile_30464_17920.tif -> tile_30464_17920.txt
