# Convert Data from RDD2022 to YOLO Format

In [23]:
import os, sys
import xml.etree.ElementTree as ET
from tqdm import tqdm

In [24]:
path = '../datasets/RDD2022/images'

In [25]:
# Remove contents inside ../datasets/RDD2022/
os.system('rm -r ../datasets/RDD2022/*')

# Copy ../data/RDD2022/ folder to ../datasets/RDD2022/
os.system('cp -r ../data/RDD2022/ ../datasets/RDD2022/')

0

In [32]:
subdatasets = ['China_Drone', 'China_MotorBike', 'Czech', 'India', 'Japan', 'United_States']

class_name_to_id_dict = {
    'D00': 0,
    'D10': 1,
    'D20': 2,
    'D40': 3,
}

problematic_files = []

# For each subdataset open the train/anotations/xmls folder and add to train/anotations/ a file_name.txt with the anotations in YOLO format
# For example, for China_Drone_000000.xml we will create a file China_Drone_000000.txt with the anotations in YOLO format at train/anotations/
# YOLO format: class x_center y_center width height with values normalized between 0 and 1
print('Creating anotations in YOLO format...')
for subdataset in tqdm(subdatasets):
    train_path = os.path.join(path, subdataset, 'train')
    anotations_path = os.path.join(train_path, 'annotations')
    xmls_path = os.path.join(anotations_path, 'xmls')
    file_names = os.listdir(xmls_path)
    
    for file_name in file_names:
        try:
            file_name = file_name.split('.')[0]
            with open(os.path.join(anotations_path, file_name + '.txt'), 'w') as file:
                with open(os.path.join(xmls_path, file_name + '.xml'), 'r') as xml:
                    tree = ET.parse(xml)
                    root = tree.getroot()
                    # Get the width and height of the image to normalize the anotations
                    size = root.find('size')
                    img_width = int(size.find('width').text)
                    img_height = int(size.find('height').text)

                    # Write the anotations in YOLO format
                    for obj in root.findall('object'):
                        # Class id:
                        class_name = obj.find('name').text
                        class_id = class_name_to_id_dict[class_name]
                        # Bounding box:
                        bbox = obj.find('bndbox')
                        x_center = (int(bbox.find('xmin').text) + int(bbox.find('xmax').text)) / 2
                        y_center = (int(bbox.find('ymin').text) + int(bbox.find('ymax').text)) / 2
                        width = int(bbox.find('xmax').text) - int(bbox.find('xmin').text) 
                        height = int(bbox.find('ymax').text) - int(bbox.find('ymin').text)
                        # Normalize the values
                        x_center /= img_width
                        y_center /= img_height
                        width /= img_width
                        height /= img_height
                        # Write the anotation
                        file.write(f'{class_id} {x_center} {y_center} {width} {height}\n')
                       
        except Exception as e:
            # Most of the errors are due to label not in ['D00', 'D10', 'D20', 'D40']
            # print(f'Error with {file_name}: {e}')
            problematic_files.append(file_name)
            continue

    # Create dataset_{subdataset}.yaml in yaml/ folder with YOLO dataset configuration
    with open(f'yaml/dataset_{subdataset}.yaml', 'w') as file:
        file.write(f'path: ../tfg-informatica/datasets/RDD2022/images\n')
        file.write(f'train: {subdataset}/train/images\n')
        file.write(f'val: {subdataset}/train/images\n') # We don't have validation data. For now...
        # Check if there is a {subdataset}/test folder exist
        if os.path.exists(os.path.join(path, subdataset, 'test')):
            file.write(f'test: {subdataset}/test/images\n')
        # Class names
        file.write(f'\nnames:\n  0: D00\n  1: D10\n  2: D20\n  3: D40\n')

print(f'Num. Problematic files: {len(problematic_files)}')

Creating anotations in YOLO format...


100%|██████████| 6/6 [00:05<00:00,  1.14it/s]

Num. Problematic files: 7439



