# Convert Vatic Export Format to Tensorflow Format

This notebook is a script to take a vatic export and output tensorflow records. It uses this script as a starting point https://github.com/tensorflow/models/blob/master/object_detection/create_pascal_tf_record.py


In [31]:
LABEL_MAP_FILE ="labelmap.pbtxt"
LABEL_MAP = """
item {
  id: 1
  name: 'plastic-crate'
}

item {
  id: 2
  name: 'egg-carton'
}

item {
  id: 3
  name: 'milk-carton'
}

item {
  id: 4
  name: 'human-head'
}

item {
  id: 5
  name: 'silver-cart'
}

item {
  id: 6
  name: 'table'
}

item {
  id: 7
  name: 'green-grocery-bag'
}

item {
  id: 8
  name: 'green-grocery-bag'
}

item {
  id: 9
  name: 'yellow-grocery-bag'
}

item {
  id: 10
  name: 'not-a-real-object'
}
"""

with open(LABEL_MAP_FILE, "w") as f:
    f.write(LABEL_MAP)

label_map_dict = label_map_util.get_label_map_dict(LABEL_MAP_FILE)

In [33]:
label_map_dict

{'egg-carton': 2,
 'green-grocery-bag': 8,
 'human-head': 4,
 'milk-carton': 3,
 'not-a-real-object': 10,
 'plastic-crate': 1,
 'silver-cart': 5,
 'table': 6,
 'yellow-grocery-bag': 9}

In [55]:
import hashlib
import io
import logging
import os

from lxml import etree
import tensorflow as tf
import PIL.Image

from object_detection.utils import dataset_util
from object_detection.utils import label_map_util

In [45]:
%%sh
cat ~/model1/data/farmstead1/farmstead1-data.pascal/ImageSets/Main/*trainval.txt  \
    > ~/model1/data/farmstead1/farmstead1-data.pascal/ImageSets/Main/all.txt

In [63]:

def dict_to_tf_example(data,
                       dataset_directory,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='JPEGImages'):
    """Convert XML derived dict to tf.Example proto.
    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.
    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      dataset_directory: Path to root directory holding PASCAL dataset
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.
    Returns:
      example: The converted tf.Example.
    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """

    full_path = os.path.join(dataset_directory, image_subdirectory, data['filename'])

    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
        encoded_jpg_io = io.BytesIO(encoded_jpg)

    image = PIL.Image.open(encoded_jpg_io)

    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    for obj in data['object']:
        difficult = bool(int(obj['difficult']))
    
        if ignore_difficult_instances and difficult:
            continue

        difficult_obj.append(int(difficult))

        xmin.append(float(obj['bndbox']['xmin']) / width)
        ymin.append(float(obj['bndbox']['ymin']) / height)
        xmax.append(float(obj['bndbox']['xmax']) / width)
        ymax.append(float(obj['bndbox']['ymax']) / height)
        
        classes_text.append(obj['name'].encode('utf8'))
        
        classes.append(label_map_dict[obj['name']])
        
        truncated.append(int(obj['truncated']))
        poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
    }))
    return example


def main(output_dir, data_dir, label_map_dict, eval_to_train_ratio=0.1):
    """
    Assumes data dir looks like this

    dataDir/
        Annotations/
        JPEGImages/
        ImageSets/
            Main/
    
    set can be train or trainval
    """
    annotations_dir = os.path.join(data_dir, "Annotations")
    examples_path = os.path.join(data_dir, 'ImageSets', 'Main', 'all.txt')
    examples_list = dataset_util.read_examples_list(examples_path)

    eval_path = os.path.join(output_dir, "eval.tfrecords")
    train_path = os.path.join(output_dir, "train.tfrecords")

    eval_interval = int(eval_to_train_ratio * len(examples_list)) 

    with tf.python_io.TFRecordWriter(eval_path) as eval_writer, \
         tf.python_io.TFRecordWriter(train_path) as train_writer:
 
        for idx, example in enumerate(examples_list):
            if idx % 500 == 0:
                print('On image %d of %d' % (idx, len(examples_list)))

            path = os.path.join(annotations_dir, example + '.xml')

            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()

            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, data_dir, label_map_dict,
                                            ignore_difficult_instances=False)

            if idx % eval_interval == 0:
                eval_writer.write(tf_example.SerializeToString())
            else:
                train_writer.write(tf_example.SerializeToString())

            
    print("Done")


main("/home/eli/model1/data/",
     "/home/eli/model1/data/farmstead1/farmstead1-data.pascal/", label_map_dict)

On image 0 of 4525


  if not xml:


On image 500 of 4525
On image 1000 of 4525
On image 1500 of 4525
On image 2000 of 4525
On image 2500 of 4525
On image 3000 of 4525
On image 3500 of 4525
On image 4000 of 4525
On image 4500 of 4525
Done


In [64]:
ls /home/eli/model1/data

eval.tfrecords  [0m[01;34mfarmstead1[0m/  [01;31mfarmstead1-data.tgz[0m  train.tfrecords  [01;34mvoc[0m/
