In [1]:
import os
import xml.etree.ElementTree as ET 
import struct
import numpy as np
from IPython.core.debugger import Tracer

In [2]:
classes_name =  ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"]

classes_num = {'aeroplane': 0, 'bicycle': 1, 'bird': 2, 'boat': 3, 'bottle': 4, 'bus': 5,
    'car': 6, 'cat': 7, 'chair': 8, 'cow': 9, 'diningtable': 10, 'dog': 11,
    'horse': 12, 'motorbike': 13, 'person': 14, 'pottedplant': 15, 'sheep': 16,
    'sofa': 17, 'train': 18, 'tvmonitor': 19}

In [3]:
DATA_PATH_2007 = '../../Datasets/voc/VOCdevkit/VOC2007'
DATA_PATH_2012 = '../../Datasets/voc/VOCdevkit/VOC2012'

In [4]:
def parse_xml(data_path, xml_file, include_diff=True):
    """parse xml_file
    Args:
        xml_file: the input xml file path
    Returns:
        image_path: string
        labels: list of [xmin, ymin, xmax, ymax, class]
    """
    tree = ET.parse(xml_file)
    root = tree.getroot()
    image_path = ''
    labels = []

    for item in root:
        if item.tag == 'filename':
            image_path = os.path.join(data_path, 'JPEGImages', item.text)
        elif item.tag == 'object':
            for sub_item in item:
                if sub_item.tag == 'difficult':
                    diff = sub_item.text
                if sub_item.tag == 'name':
                    obj_name = sub_item.text
                if sub_item.tag == 'bndbox':
                    for coord_item in sub_item:
                        if coord_item.tag == 'xmin':
                            xmin = coord_item.text
                        if coord_item.tag == 'xmax':
                            xmax = coord_item.text
                        if coord_item.tag == 'ymin':
                            ymin = coord_item.text
                        if coord_item.tag == 'ymax':
                            ymax = coord_item.text
            if int(diff) == 1 and include_diff == False:
                continue
            obj_num = classes_num[obj_name]
            labels.append([xmin, ymin, xmax, ymax, obj_num])
    return image_path, labels

In [5]:
def convert_to_string(image_path, labels):
    """convert image_path, lables to string 
    Returns:
    string 
    """
    out_string = ''
    out_string += image_path
    for label in labels:
        for i in label:
            out_string += ' ' + str(i)
    out_string += '\n'
    return out_string

In [6]:
def generate_xml_list(data_path, data_type):
    train_file = data_path + '/ImageSets/Main/' + data_type + '.txt'
    with open(train_file) as f:
        xml_list = f.readlines()
    xml_list = [data_path + "/Annotations/" + x.strip() + ".xml" for x in xml_list]
    return xml_list

In [7]:
train_xml_list_2007 = generate_xml_list(DATA_PATH_2007, 'train')
val_xml_list_2007 = generate_xml_list(DATA_PATH_2007, 'val')
test_xml_list_2007 = generate_xml_list(DATA_PATH_2007, 'test')
train_xml_list_2012 = generate_xml_list(DATA_PATH_2012, 'train')
val_xml_list_2012 = generate_xml_list(DATA_PATH_2012, 'val')

In [8]:
def process_xml_list(data_path, xml_list, out_path, include_diff=True):
    out_file = open(out_path, 'w')
    
    for xml in xml_list:
        try:
            image_path, labels = parse_xml(data_path, xml, include_diff)
            if len(labels) > 0:
                record = convert_to_string(image_path, labels)
                out_file.write(record)
        except Exception:
            pass
    out_file.close()

In [9]:
process_xml_list(DATA_PATH_2007, train_xml_list_2007, 'voc_2007_train.txt', include_diff=True)
process_xml_list(DATA_PATH_2007, val_xml_list_2007, 'voc_2007_val.txt', include_diff=True)
process_xml_list(DATA_PATH_2007, test_xml_list_2007, 'voc_2007_test.txt', include_diff=True)
process_xml_list(DATA_PATH_2012, train_xml_list_2012, 'voc_2012_train.txt', include_diff=True)
process_xml_list(DATA_PATH_2012, val_xml_list_2012, 'voc_2012_val.txt', include_diff=True)

In [10]:
# process_xml_list(DATA_PATH_2007, test_xml_list_2007, 'voc_2007_test.txt', include_diff=True)
process_xml_list(DATA_PATH_2007, test_xml_list_2007, 'voc_2007_test_without_diff.txt', include_diff=False)