In [20]:
import numpy as np
import os
import xml.etree.ElementTree as ET
import pandas as pd

import sys
sys.path.append('../')
import utils.json as json
import utils.image as im

In [21]:
input_dir = r'C:\Users\Nicol\Documents\EPFL\BA7\Project\Code\voc\VOCdevkit\VOC2012\\'

output_dir = r'C:\Users\Nicol\Documents\EPFL\BA7\Project\Code\voc\json\\'

In [3]:
img_names = [name[:-4] for name in os.listdir(input_dir + 'SegmentationObject')]
json.save_json(output_dir + 'voc-img-names', img_names)

In [4]:
object_annotations = {}

for name in img_names:
    tree = ET.parse(input_dir + 'Annotations\\' + name + '.xml')
    path = './/object/bndbox//'
    xmin = tree.findall(path + 'xmin')
    ymin = tree.findall(path + 'ymin')
    xmax = tree.findall(path + 'xmax')
    ymax = tree.findall(path + 'ymax')

    obj_class = [c.text for c in tree.findall('.//object/name')]
    obj_difficult = [int(d.text) for d in tree.findall('.//object/difficult')]

    def cbbox(bbox):
        return [coord - 1 for coord in bbox]

    obj_bbox = [cbbox([int(coord.text) for coord in bbox]) for bbox in zip(xmin, ymin, xmax, ymax)]

    object_annotations[name] = [(c, d, b) for (c, d, b) in zip(obj_class, obj_difficult, obj_bbox)]
    
json.save_json(output_dir + 'voc-object-annotations', object_annotations)

In [6]:
class_annotations = {}

for name in img_names:
    tree = ET.parse(input_dir + 'Annotations\\' + name + '.xml')
    obj_class = [c.text for c in tree.findall('.//object/name')]
    
    class_annotations[name] = np.sort(np.unique(obj_class)).tolist()
    
json.save_json(output_dir + 'voc-class-annotations', class_annotations)

In [13]:
names = [name for (name, annot) in object_annotations.items() for _ in annot]
classes = [c for annot in object_annotations.values() for (c, d, b) in annot]
difficulties = [int(d) for annot in object_annotations.values() for (c, d, b) in annot]
bboxes = [b for annot in object_annotations.values() for (c, d, b) in annot]
areas = [im.area_dbbox(im.corner_to_delta(b)) for b in bboxes]

df = pd.DataFrame(list(zip(names, classes, difficulties, bboxes, areas)),
               columns =['Name', 'Class', 'Difficulty', 'cBbox', 'Area'])

df.to_csv(output_dir + 'voc-object-annotations.csv', index = False)

In [22]:
object_annotations_clean = json.open_json(output_dir + 'voc-object-annotations-clean')

names = [name for (name, annot) in object_annotations_clean.items() for _ in annot]
classes = [c for annot in object_annotations_clean.values() for (c, d, b) in annot]
difficulties = [int(d) for annot in object_annotations_clean.values() for (c, d, b) in annot]
bboxes = [b for annot in object_annotations_clean.values() for (c, d, b) in annot]
areas = [im.area_dbbox(im.corner_to_delta(b)) for b in bboxes]

df = pd.DataFrame(list(zip(names, classes, difficulties, bboxes, areas)),
               columns =['Name', 'Class', 'Difficulty', 'cBbox', 'Area'])

df.to_csv(output_dir + 'voc-object-annotations-clean.csv', index = False)

In [26]:
class_annotations = json.open_json(output_dir + 'voc-class-annotations')

names = [name for (name, annot) in class_annotations.items() for _ in annot]
classes = [c for annot in class_annotations.values() for c in annot]

df = pd.DataFrame(list(zip(names, classes)), columns =['Name', 'Class'])

df.to_csv(output_dir + 'voc-class-annotations.csv', index = False)

In [19]:
voc_classes = np.unique(df['Class']).tolist()

json.save_json(output_dir + 'voc-classes', voc_classes)