In [168]:
import xml.etree.ElementTree as et
import os
import shutil
import math

In [169]:
def extract_from_xml(xml_file):
    root = et.parse(xml_file).getroot()

    # Initialize info dict
    annotation_dict = {}
    annotation_dict['bnboxes'] = []

    # Parse XML Tree
    for elem in root:
        # Get file name
        if elem.tag == 'filename':
            annotation_dict['filename'] = elem.text

        # Get image size
        elif elem.tag == 'size':
            image_size = {}
            for subelem in elem:
                image_size[subelem.tag] = int(subelem.text)
            annotation_dict['size'] = image_size

        # Add bounding box(es)
        elif elem.tag == 'object':
            bnbox = {}
            for subelem in elem:
                if subelem.tag == 'name':
                    bnbox['class'] = subelem.text

                elif subelem.tag == 'bndbox':
                    for subsubelem in subelem:
                        bnbox[subsubelem.tag] = int(subsubelem.text)
            annotation_dict['bnboxes'].append(bnbox)

    return annotation_dict

In [170]:
# Find object class names and map to ids
def class_names_to_id_map():
    class_name_list = []
    annotations_directory = 'openlogo/annotations'

    # Iterate through set directory
    for filename in os.listdir(annotations_directory):
        xml_file = os.path.join(annotations_directory, filename)
        root = et.parse(xml_file).getroot()

        # Add to class to list if needed
        for elem in root:
            if elem.tag == 'object':
                for subelem in elem:
                    if subelem.tag == 'name' and subelem.text not in class_name_list:
                        class_name_list.append(subelem.text)

    # Sort classes and save .txt
    class_name_list.sort()
    print('\n'.join(class_name_list), file=open("classes.txt", 'w'))

    class_name_id_map = {}
    i = 0

    # Iterate to assign ids
    for class_name in class_name_list:
        class_name_id_map[class_name] = i
        i += 1

    return class_name_id_map

In [171]:
# Export extracted XML data in txt for yolo
def export_for_yolo(class_name_id_map, annotation_dict):
    print_objects = []
    
    for bnbox in annotation_dict['bnboxes']:
        image_width = annotation_dict['size']['width']
        image_height = annotation_dict['size']['height']

        class_id = class_name_id_map[bnbox['class']]

        # Normalize bnboxes
        bnbox_x_center = ((bnbox['xmin'] + bnbox['xmax']) / 2) / image_width
        bnbox_y_center = ((bnbox['ymin'] + bnbox['ymax']) / 2) / image_height
        bnbox_width = (bnbox['xmax'] - bnbox['xmin']) / image_width
        bnbox_height = (bnbox['ymax'] - bnbox['ymin']) / image_height

        # Format and save to list
        print_objects.append('{} {:.6f} {:.6f} {:.6f} {:.6f}'.format(class_id, bnbox_x_center, bnbox_y_center, bnbox_width, bnbox_height))

    # Save formatted annotation to .txt
    save_filename = os.path.join('dataset', os.path.splitext(annotation_dict['filename'])[0] + '.txt')
    print('\n'.join(print_objects), file=open(save_filename, 'w'))
    

In [172]:
# read through image sets to move files into correct folder
def split_to_folders():
     
     labels_directory = 'dataset'
     images_directory = 'openlogo/JPEGImages'
     class_sep_directory = 'openlogo/ImageSets/class_sep'

     labels_target_directory = 'dataset/labels'
     images_target_directory = 'dataset/images'
     

     for filename in os.listdir(class_sep_directory):
          split = os.path.join(class_sep_directory, filename)
          split_ids = open(split, 'r').read().split('\n')
          split_ids.pop()
          split_type = filename.split("_")[-1]

          if split_type == 'test.txt':
               for id in split_ids:
                    txt_file = os.path.join(labels_directory, id + '.txt')
                    jpg_file = os.path.join(images_directory, id + '.jpg')
                    try:
                         shutil.move(txt_file, os.path.join(labels_target_directory, 'test'))
                         shutil.move(jpg_file, os.path.join(images_target_directory, 'test'))
                    except:
                         print("Something Went Wrong - Test" )
          
          elif split_type == 'train.txt':
               i = 0
               for id in split_ids:
                    txt_file = os.path.join(labels_directory, id + '.txt')
                    jpg_file = os.path.join(images_directory, id + '.jpg')
                    
                    if i < math.ceil(len(split_ids) / 7):
                         try:
                              shutil.move(txt_file, os.path.join(labels_target_directory, 'val'))
                              shutil.move(jpg_file, os.path.join(images_target_directory, 'val'))
                         except:
                              print("Something Went Wrong - Val")
                              
                         i += 1
                    else:
                         try:
                              shutil.move(txt_file, os.path.join(labels_target_directory, 'train'))
                              shutil.move(jpg_file, os.path.join(images_target_directory, 'train'))
                         except:
                              print("Something Went Wrong - Train")

In [173]:
# Test
os.mkdir('dataset')

os.mkdir('dataset/images')
os.mkdir('dataset/labels')

os.mkdir('dataset/images/train')
os.mkdir('dataset/images/val')
os.mkdir('dataset/images/test')

os.mkdir('dataset/labels/train')
os.mkdir('dataset/labels/val')
os.mkdir('dataset/labels/test')

In [174]:
class_names_id_map = class_names_to_id_map()
for filename in os.listdir('openlogo/annotations'):
    data = extract_from_xml(os.path.join('openlogo/annotations', filename))
    export_for_yolo(class_names_id_map, data)
split_to_folders()


Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong - Test
Something Went Wrong