In [None]:
import xml.etree.ElementTree as ET
import os
from os.path import join
import numpy as np
import sys
import src.tools

In [None]:
# parameters
dataset = "plate"

In [None]:
label_file = 'data/%s/label.names' %(dataset)
src.tools.Delete_ipynb_checkpoints('data/')

In [None]:
def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

In [None]:
def convert_annotation(image_id, image_folder, annotation_folder):
    in_file = open('%s/%s.xml'%(annotation_folder, image_id))
    out_file = open('%s/%s.txt'%(image_folder, image_id), 'w')
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

In [None]:
f = open(label_file, 'r')
classes = f.read().splitlines()
f.close()
print('classes = ', classes)

In [None]:
if os.path.exists("data/%s/train/annotations/image_annotation_classes.txt" %(dataset)):
    with open("data/%s/train/annotations/image_annotation_classes.txt" %(dataset), 'r') as f:
        annotation_classes = f.read().splitlines()
    if (sorted(classes) != sorted(annotation_classes)):
        print('label.names classes = ', classes)
        print('image_annotation_classes.txt classes = ', annotation_classes)
        print('The contents of image_annotation_classes.txt and label.names are not equal, please confirm whether the category name is correct')
        sys.exit()

In [None]:
train_image_folder = 'data/%s/train/images/' %(dataset)
train_label_folder = 'data/%s/train/labels/' %(dataset)
train_annotation_folder = 'data/%s/train/annotations/' %(dataset)

valid_image_folder = 'data/%s/val/images/' %(dataset)
valid_label_folder = 'data/%s/val/labels/' %(dataset)
valid_annotation_folder = 'data/%s/val/annotations/' %(dataset)

if not os.path.exists(train_label_folder):
    os.mkdir(train_label_folder)

if not os.path.exists(valid_label_folder):
    os.mkdir(valid_label_folder)
    
if os.path.isdir(train_image_folder):
    for root, dirs, files in os.walk(train_image_folder): 
        for file in files: 
            if os.path.splitext(file)[1].lower() == ".png" or os.path.splitext(file)[1].lower() == ".jpg" or os.path.splitext(file)[1].lower() == ".jpeg":
                convert_annotation(os.path.splitext(file)[0], train_label_folder, train_annotation_folder)
else:
    print('train_image_folder is not exist.')

if os.path.isdir(valid_image_folder):
    for root, dirs, files in os.walk(valid_image_folder): 
        for file in files: 
            if os.path.splitext(file)[1].lower() == ".png" or os.path.splitext(file)[1].lower() == ".jpg"  or os.path.splitext(file)[1].lower() == ".jpeg":
                convert_annotation(os.path.splitext(file)[0], valid_label_folder, valid_annotation_folder)
else:
    print('valid_image_folder is not exist.')

print('Exported to image txt.')

In [None]:
with open("data/%s/voc.yaml"%(dataset), 'w')as f:
    f.write("# train and val datasets (image directory or *.txt file with image paths)\n")
    f.write("train: data/%s/train.txt\n" %(dataset))
    f.write("val: data/%s/val.txt\n\n" %(dataset))
    f.write("# number of classes\n")
    f.write("nc: %d\n\n" %(len(classes)))
    f.write("# class names\n")
    f.write("names: %s\n"%(classes))