In [1]:
import os
import xml.etree.ElementTree as ET 
import struct
import numpy as np
import cv2
import random

In [2]:
ROOT_DATA_DIR = "data_plate/plate"
XML_PATH = 'data_plate/plate.xml'
test_ratio = 0.1
train_file_name = "plate_train.txt"
test_file_name = "plate_test.txt"

In [3]:


def parse_xml(data_root,xml_file_path):
    if os.path.isfile(xml_file_path) == False:
        return [],[]
    tree = ET.parse(xml_file_path)
    root = tree.getroot()
    
    image_paths = []
    labels = []

    images = root.find("images")
    images = images.findall("image")
    
    for image in images:
        image_name = image.get("file").split('\\')[-1]
        image_path = os.path.join(data_root,image_name)
#         print(image_path)
        cur_img = cv2.imread(image_path)
        if cur_img is None:
            print("file no size:" + image_path)
            continue
        img_height,img_width,img_channel= cur_img.shape
        
        image_labels = []
        boxes = image.findall("box")
        if len(boxes) == 0:
            print("file no boxes:" + image_path)
            continue
        for box in boxes:
            top = int(box.get("top"))
            left = int(box.get("left"))
            width = int(box.get("width"))
            height = int(box.get("height"))
            
            xmin = np.max([left,0])
            xmax = np.min([left + width,img_width])
            ymin = np.max([0,top])
            ymax = np.min([top + height,img_height])
            
            image_labels.append([xmin,ymin,xmax,ymax,0])

        if len(image_labels) == 0:
            continue
        image_paths.append(os.path.join(data_root,image_name))
        labels.append(image_labels)
    print(len(image_paths))
    print(len(labels))
    return image_paths,labels

In [4]:
image_paths,labels = parse_xml(ROOT_DATA_DIR,XML_PATH)
#convert_to_string(imagePath,imageLabel)

file no boxes:data_plate/plate/38371fab-82af-46d0-bff3-0642a0bd2ebe.jpg
file no boxes:data_plate/plate/3a268d59-1765-4b16-a252-dbe12d1481ba.jpg
file no boxes:data_plate/plate/4225970d-5ef9-40f4-86e6-5202eec2bf02.jpg
file no boxes:data_plate/plate/54f3d3b4-cf33-4f95-b228-4376b2b667f8.jpg
file no boxes:data_plate/plate/57f63c97-7bc4-4f7c-a8d2-9f4e2821d830.jpg
file no boxes:data_plate/plate/66446799-16d7-44f7-b33b-2e6eda9a22a9.jpg
file no boxes:data_plate/plate/6aa6591f-cbc5-4e7e-94a9-e56bb61ae97e.jpg
file no boxes:data_plate/plate/77fd880e-65dd-41dd-b920-4b34d6b105f2.jpg
file no boxes:data_plate/plate/79ef1f67-9263-4f08-9cd3-c269f03d7d42.jpg
file no boxes:data_plate/plate/7e699be1-bd30-432a-9f31-76d6c1e28a16.jpg
file no boxes:data_plate/plate/8322de31-2f51-436e-828c-47cfcb48d5c6.jpg
file no boxes:data_plate/plate/881725ca-47a7-4ef8-b4b0-d4a3f4089e8e.jpg
file no boxes:data_plate/plate/8c2e7cfd-2742-4ae5-afb0-8348e2c1fe90.jpg
file no boxes:data_plate/plate/b3cd65f8-20a2-4e27-9bd2-616c6f1c5

In [5]:
def convert_to_string(image_path,labels):
    out_string = ''
    out_string += image_path
    for label in labels:
        for i in label:
            out_string += ' ' + str(i)
    out_string += '\n'
#     print(out_string)
    return out_string

records = []
for id, path in enumerate(image_paths):
    label=labels[id]
    records.append(convert_to_string(path,label))
random.shuffle(records)
print(len(records))
print(records[0:5])

1554
['data_plate/plate/caa1899e-8ffc-4fbe-99c5-496f1ea4d28f.jpg 35 73 67 86 0\n', 'data_plate/plate/d2d33e20-008a-448a-8abc-18cef5cc62de.jpg 378 93 444 151 0\n', 'data_plate/plate/86995b54-27d8-45dc-9b22-aa2a7b2dbd71.jpg 40 80 84 96 0\n', 'data_plate/plate/23886977-5fd5-4f76-8303-1eef2ac70831.jpg 48 156 99 177 0\n', 'data_plate/plate/64594681-6f40-4917-97c7-5180fe37d93a.jpg 48 87 84 111 0\n']


In [6]:
total_num = len(records)
test_num = int(test_ratio*total_num)
train_num = total_num - test_num
train_records = records[0:train_num]
tests_records = records[train_num:]

In [7]:
train_out_file = open(train_file_name,'w')
for record in train_records:
    train_out_file.write(record)
train_out_file.close()


test_out_file = open(test_file_name,'w')
for record in tests_records:
    test_out_file.write(record)
test_out_file.close()