**Converting QMUL-OpenLogo Dataset for yolov7**
1. Download the QMUL-OpenLogo dataset from [here](https://hangsu0730.github.io/qmul-openlogo/)
2. Extract openlogo folder and place in `model`
3. Run this notebook

This notebook should
- Create `model/dataset` directory with all images and labels of QMUL-OpenLogo in yolo format
- Contain 27,083 images and 352 logo classes
- Create `details/class-list.txt` and `details/class-id-map.txt`
- Delete leftover `model/openlogo` directory

In [4]:
import xml.etree.ElementTree as et
import os
import shutil
import math

In [5]:
def extract_from_xml(xml_file):
    root = et.parse(xml_file).getroot()

    # Initialize info dict
    annotation_dict = {}
    annotation_dict['bnboxes'] = []

    # Parse XML Tree
    for elem in root:
        # Get file name
        if elem.tag == 'filename':
            annotation_dict['filename'] = elem.text

        # Get image size
        elif elem.tag == 'size':
            image_size = {}
            for subelem in elem:
                image_size[subelem.tag] = int(subelem.text)
            annotation_dict['size'] = image_size

        # Add bounding box(es)
        elif elem.tag == 'object':
            bnbox = {}
            for subelem in elem:
                if subelem.tag == 'name':
                    bnbox['class'] = subelem.text

                elif subelem.tag == 'bndbox':
                    for subsubelem in subelem:
                        bnbox[subsubelem.tag] = int(subsubelem.text)
            annotation_dict['bnboxes'].append(bnbox)

    return annotation_dict

In [13]:
# Find object class names and map to ids
def class_names_to_id_map():
    class_name_list = []

    # Iterate through set annotations
    for filename in os.listdir('openlogo/annotations'):
        xml_file = os.path.join('openlogo/annotations', filename)
        root = et.parse(xml_file).getroot()

        # Add to class to list if needed
        for elem in root:
            if elem.tag == 'object':
                for subelem in elem:
                    if subelem.tag == 'name' and subelem.text not in class_name_list:
                        class_name_list.append(subelem.text)

    # Sort classes and save .txt
    class_name_list.sort()
    print(class_name_list, file=open('details/class-list.txt', 'w'))

    class_name_id_map = {}
    i = 0

    # Iterate to assign ids
    for class_name in class_name_list:
        class_name_id_map[class_name] = i
        i += 1

    # Save map in .txt
    print(class_name_id_map, file=open('details/class-id-map.txt', 'w'))

    return class_name_id_map

In [7]:
# Export extracted XML data in txt for yolo
def export_for_yolo(class_name_id_map, annotation_dict):
    print_objects = []
    
    for bnbox in annotation_dict['bnboxes']:
        image_width = annotation_dict['size']['width']
        image_height = annotation_dict['size']['height']

        class_id = class_name_id_map[bnbox['class']]

        # Normalize bnboxes
        bnbox_x_center = ((bnbox['xmin'] + bnbox['xmax']) / 2) / image_width
        bnbox_y_center = ((bnbox['ymin'] + bnbox['ymax']) / 2) / image_height
        bnbox_width = (bnbox['xmax'] - bnbox['xmin']) / image_width
        bnbox_height = (bnbox['ymax'] - bnbox['ymin']) / image_height

        # Format and save to list
        print_objects.append('{} {:.6f} {:.6f} {:.6f} {:.6f}'.format(class_id, bnbox_x_center, bnbox_y_center, bnbox_width, bnbox_height))

    # Save formatted annotation to .txt
    save_filename = os.path.join('dataset', os.path.splitext(annotation_dict['filename'])[0] + '.txt')
    print('\n'.join(print_objects), file=open(save_filename, 'w'))
    

In [8]:
# Read through class folder and move files
def split_to_folders():

     # Iterate through every class file
     for filename in os.listdir('openlogo/ImageSets/class_sep'):
          
          # Get IDs from file
          split = os.path.join('openlogo/ImageSets/class_sep', filename)
          split_ids = open(split, 'r').read().split('\n')
          split_ids.pop()
          split_type = filename.split("_")[-1]

          # Iterate through ID if test file
          if split_type == 'test.txt':
               for id in split_ids:
                    txt_file = os.path.join('dataset/' + id + '.txt')
                    jpg_file = os.path.join('openlogo/JPEGImages', id + '.jpg')
                    
                    shutil.move(txt_file, 'dataset/labels/test')
                    shutil.move(jpg_file, 'dataset/image/test')
          
          # Iterate through ID if train file
          elif split_type == 'train.txt':
               i = 0
               for id in split_ids:
                    txt_file = os.path.join('dataset/' + id + '.txt')
                    jpg_file = os.path.join('openlogo/JPEGImages', id + '.jpg')
                    
                    # Split 1/7 of test data into val set
                    if i < math.ceil(len(split_ids) / 7):
                         shutil.move(txt_file, 'dataset/labels/val')
                         shutil.move(jpg_file, 'dataset/images/val')
                         i += 1
                    
                    # Split rest into train set
                    else:
                         shutil.move(txt_file, 'dataset/labels/train')
                         shutil.move(jpg_file, 'dataset/images/train')


In [9]:
# Create directories 
os.mkdir('dataset')

os.mkdir('dataset/images')
os.mkdir('dataset/labels')

os.mkdir('dataset/images/train')
os.mkdir('dataset/images/val')
os.mkdir('dataset/images/test')

os.mkdir('dataset/labels/train')
os.mkdir('dataset/labels/val')
os.mkdir('dataset/labels/test')

In [14]:
# Create map of classes to ID
class_names_id_map = class_names_to_id_map()

In [None]:
# Generate yolo labels from each XML
for filename in os.listdir('openlogo/annotations'):
    data = extract_from_xml(os.path.join('openlogo/annotations', filename))
    export_for_yolo(class_names_id_map, data)

In [None]:
# Move labels and images into split directories
split_to_folders()

In [3]:
# DESTRUCTIVE!
# Remove leftover openlogo files
# shutil.rmtree('openlogo')
# shutil.rmtree('dataset')