# Convert the RAW dataset to TFRecord

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from PIL import Image
from pascal_voc_writer import Writer

In [2]:
FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('database', 'Pedestrian',
                           'Training data directory')
tf.app.flags.DEFINE_string('image_dir', 'images',
                           'Training data directory')
tf.app.flags.DEFINE_string('test_dir', 'test_images',
                           'Validation data directory')
tf.app.flags.DEFINE_string('csv_file', 'TownCentre-groundtruth.top',
                           'Path to the csv annotation file')
tf.app.flags.DEFINE_string('annotations_dir', 'Annotations',
                           '(Relative) path to annotations directory.')
tf.app.flags.DEFINE_string('xml_dir', 'Annotations/xmls',
                           'Validation data directory')
tf.app.flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
tf.app.flags.DEFINE_string('label_map_path', 'Annotations/pedestrian_label_map.pbtxt',
                           'Path to label map proto')
tf.app.flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
                            'difficult instances')
tf.app.flags.DEFINE_integer('num_shards', 10, 'Number of TFRecord shards')

## Convert *.csv to *.xml(s)

The default csv annotation format is: `<filename>;<xmin;ymin;xmax;ymax>;<class_id>`

For example, this is how we will process csv file(s) and convert it into xml(s)

```python
obj_attributes_string = '00000.ppm;774;411;815;446;11'

obj_attributes_split = obj_attributes_string.split(';')

writer = Writer(obj_attributes_split[0], 300, 300, database='Unknown')
writer.addObject(
    obj_attributes_split[-1],
    obj_attributes_split[1],
    obj_attributes_split[2],
    obj_attributes_split[3],
    obj_attributes_split[4])
writer.save('out.xml')
```

However, in this dataset, the annotation format will be:

```
personNumber, frameNumber, headValid, bodyValid, headLeft, headTop, headRight, headBottom, bodyLeft, bodyTop, bodyRight, bodyBottom
```

## To make it easier to access the annotation points, this dictionary will be used:

In [None]:
anno_dict = {
    'personNumber': 0,
    'frameNumber': 1,
    'headValid': 2,
    'bodyValid': 3,
    'headLeft': 4,
    'headTop': 5,
    'headRight': 6,
    'headBottom': 7,
    'bodyLeft': 8,
    'bodyTop': 9,
    'bodyRight': 10,
    'bodyBottom': 11
}

In [4]:
def create_xml_annotation(database, csv_file_path, image_dir, xml_dir, factor = 2):
    if not os.path.isdir(xml_dir):
            os.makedirs(xml_dir)

    data = pd.read_csv('TownCentre-groundtruth.top', header=None)
    train_size = 4501

    for frame_number in range(train_size):
        Frame = data.loc[data[1] == frame_number] 
        x1 = list(Frame[8])
        y1 = list(Frame[11])
        x2 = list(Frame[10])
        y2 = list(Frame[9])
        points = [[(round(x1_), round(y1_)), (round(x2_), round(y2_))] for x1_,y1_,x2_,y2_ in zip(x1,y1,x2,y2)]
        
        image_path = os.path.join(image_dir, f'{frame_number}.jpg')
        image = Image.open(image_path)
        width, height = image.size
        writer = Writer(image_path, width, height, database=FLAGS.database)

        for point in points:

            top_left = point[0]
            bottom_right = point[1]

            if top_left[0] > bottom_right[0]:
                xmax, xmin = top_left[0] // factor, bottom_right[0] // factor
            else:
                xmin, xmax = top_left[0] // factor, bottom_right[0] // factor

            if top_left[1] > bottom_right[1]:
                ymax, ymin = top_left[1] // factor, bottom_right[1] // factor
            else:
                ymin, ymax = top_left[1] // factor, bottom_right[1] // factor
            
            # Validate image dimensions
            if xmin > width:
                xmin = width
            if xmax > width:
                xmax = width
            if ymin > height:
                ymin = height
            if ymax > height:
                ymax = height

            writer.addObject('pedestrian', xmin, ymin, xmax, ymax)
        
        xml_name = f'{frame_number}.xml'
        writer.save(os.path.join(xml_dir, xml_name))

        
def main(unused_argv):
    create_xml_annotation(FLAGS.database, FLAGS.csv_file, FLAGS.image_dir, FLAGS.xml_dir)

    
if __name__ == '__main__':
    tf.app.run()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


## Convert the PASCAL VOC data to TFRecord¶

In [5]:
import hashlib
import io
import logging
import os
import random
import re

import contextlib2
from lxml import etree
from PIL import Image
import numpy as np
import tensorflow as tf

from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util


DATASETS = ['GTSDB_VOC', 'Pedestrian']

In [6]:
def dict_to_tf_example(data,
                       label_map_dict,
                       ignore_difficult_instances=False):
    """Convert XML derived dict to tf.Example proto.
    
    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.
    
    Args:
        data: dict holding PASCAL XML fields for a single image (obtained by
            running dataset_util.recursive_parse_xml_to_dict)
        dataset_directory: Path to root directory holding PASCAL dataset
        label_map_dict: A map from string label names to integers ids.
        image_subdirectory: String specifying subdirectory within the
            PASCAL dataset directory holding the actual image data.
        ignore_difficult_instances: Whether to skip difficult instances in the
            dataset  (default: False).
    
    Returns:
        example: The converted tf.Example.
    
    Raises:
        ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    img_path = os.path.join(data['folder'], data['filename'])
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format is not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult = bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            truncated.append(int(obj['truncated']))
            poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/source_id': dataset_util.bytes_feature(
            data['filename'].encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
        'image/object/truncated': dataset_util.int64_list_feature(truncated),
        'image/object/view': dataset_util.bytes_list_feature(poses),
        }))
    
    return example

In [7]:
def create_tf_record(output_filename,
                     num_shards,
                     label_map_dict,
                     annotations_dir,
                     examples):
    """Creates a TFRecord file from examples.
    Args:
      output_filename: Path to where output file is saved.
      num_shards: Number of shards for output file.
      label_map_dict: The label map dictionary.
      annotations_dir: Directory where annotation files are stored.
      examples: Examples to parse and save to tf record.
    """
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_filename, num_shards)
        for idx, example in enumerate(examples):
            if idx % 100 == 0:
                logging.info(f'On image {idx} of {len(examples)}')
            xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
            
            if not os.path.exists(xml_path):
                logging.warning(f'Could not find {xml_path}, ignoring example.')
                continue
            with tf.gfile.GFile(xml_path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            try:
                tf_example = dict_to_tf_example(
                    data,
                    label_map_dict)
                if tf_example:
                    shard_idx = idx % num_shards
                    output_tfrecords[shard_idx].write(
                        tf_example.SerializeToString())
            except ValueError:
                logging.warning(f'Invalid example: {xml_path}, ignoring.')

In [8]:
def main(unused_argv):
    if FLAGS.database not in DATASETS:
        raise ValueError(f'set must be in : {DATASETS}')
    
    data_dir = os.getcwd()
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
    
    logging.info(f'Reading from {FLAGS.database} dataset.')
    annotations_dir = os.path.join(data_dir, 'Annotations')
    examples_path = os.path.join(annotations_dir, 'trainval.txt')
    examples_list = dataset_util.read_examples_list(examples_path)
    
    # Test images are not included in the downloaded data set, so we shall perform
    # our own split.
    random.seed(42)
    random.shuffle(examples_list)
    num_examples = len(examples_list)
    num_train = int(0.7 * num_examples)
    train_examples = examples_list[:num_train]
    val_examples = examples_list[num_train:]
    logging.info('%d training and %d validation examples.',
        len(train_examples), len(val_examples))
    
    train_output_path = os.path.join(FLAGS.output_dir, 'train.record')
    val_output_path = os.path.join(FLAGS.output_dir, 'val.record')
    
    create_tf_record(
        train_output_path,
        FLAGS.num_shards,
        label_map_dict,
        annotations_dir,
        train_examples)
    create_tf_record(
        val_output_path,
        FLAGS.num_shards,
        label_map_dict,
        annotations_dir,
        val_examples)


if __name__ == '__main__':
    tf.app.run()

  if not xml:


SystemExit: 