In [2]:
import os
import pathlib
import json
from glob import glob
from PIL import Image

import tensorflow as tf

# Change these paths
VAL_IMG_PATH = os.path.join("/Users/work/data/object_detection/WIDER/WIDER_val/images", '*', '*.jpg')
TRAIN_IMG_PATH = os.path.join("/Users/work/data/object_detection/WIDER/WIDER_train/images", '*', '*.jpg')

def int64_feature(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def int64_list_feature(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))


def bytes_feature(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def bytes_list_feature(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))


def float_list_feature(value):
  return tf.train.Feature(float_list=tf.train.FloatList(value=value))

In [4]:
def get_images(image_path):
    for img_filename in glob(image_path):
        yield img_filename

In [5]:
val_img_files = list(get_images(VAL_IMG_PATH))
train_img_files = list(get_images(TRAIN_IMG_PATH))

In [3]:
def load_image(filename):
    img = Image.open(filename)
    width = img.width
    height = img.height
    with open(filename, 'rb') as f:
        return f.read(), width, height

In [30]:
img_files = list(img_files)

In [6]:
len(val_img_files)

3226

In [22]:
pathlib.Path(img_files[0]).

'13--Interview'

In [7]:
def load_bbox_data(bbox_def_file):
    bbox_by_image = {}
    with open(bbox_def_file) as f:
        current_filename = None
        bboxes_left_in_run = -1
        current_bboxes = []
        bboxes = []
        for i, line in enumerate(f):
            line = line.strip()
            if line.isnumeric():
                bbox_by_image[current_filename]['n_bboxes'] = int(line)
            elif all(token.isnumeric() for token in line.split()):
                x1, y1, w, h, *rest = line.split()
                bbox = {"x1": int(x1), "y1": int(y1), "x2": int(x1) + int(w), "y2": int(y1) + int(h)}
                bboxes.append(bbox)
            else:
                if current_filename and bboxes:
                    bbox_by_image[current_filename]['bboxes'] = bboxes
                    bboxes = []
                current_filename = line
                bbox_by_image[current_filename] = {}

    return bbox_by_image

In [32]:
file = list(img_files)[0]

In [33]:
file

'/Users/work/data/object_detection/WIDER/WIDER_val/images/11--Meeting/11_Meeting_Meeting_11_Meeting_Meeting_11_663.jpg'

In [8]:
def get_file_key(file):
    file_name = pathlib.Path(file).name
    parent_dir = pathlib.Path(file).parent.name
    return os.path.join(parent_dir, file_name)

In [40]:
file_key

'11--Meeting/11_Meeting_Meeting_11_Meeting_Meeting_11_663.jpg'

In [9]:
val_bbox = load_bbox_data("/Users/work/data/object_detection/WIDER/WIDER_val/wider_face_val_bbx_gt.txt")

In [70]:
len(list(val_bbox.keys()))

3226

In [71]:
len(img_files)

3226

In [10]:
with open("/Users/work/data/object_detection/WIDER/WIDER_val/wider_face_val_bbox_dict.json", 'w') as f:
    json.dump(val_bbox, f)

In [11]:
train_bbox = load_bbox_data("/Users/work/data/object_detection/WIDER/WIDER_train/wider_face_train_bbx_gt.txt")

In [12]:
len(list(train_bbox.keys()))

12880

In [13]:
with open("/Users/work/data/object_detection/WIDER/WIDER_train/wider_face_train_bbox_dict.json", 'w') as f:
    json.dump(train_bbox, f)

In [14]:
def create_tf_example(image_filename, encoded_img, width, height, bboxes):
  """Creates a tf.Example proto for a given image.

  Args:
    encoded_img: The jpg encoded data of the image.

  Returns:
    example: The created tf.Example.
  """


  image_format = b'jpg'

  xmins = []
  xmaxs = []
  ymins = []
  ymaxs = []
  classes_text = []
  classes = []

  for bbox_data in bboxes:
      # bbox coordinates are normalized
      height = height
      width = width
      box_x1 = bbox_data['x1'] / width
      box_x2 = bbox_data['x2'] / width
      box_y1 = bbox_data['y1'] / height
      box_y2 = bbox_data['y2'] / height
      if box_x1 < 0 or box_x2 > 1.0 or box_y1 < 0 or box_y2 > 1.0:
        continue

      xmins.append(box_x1)
      xmaxs.append(box_x2)
      ymins.append(box_y1)
      ymaxs.append(box_y2)
      classes_text.append(b'face')
      classes.append(1)

  tf_example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': int64_feature(height),
      'image/width': int64_feature(width),
      'image/filename': bytes_feature(bytes(image_filename, encoding='utf-8')),
      'image/source_id': bytes_feature(bytes(image_filename, encoding='utf-8')),
      'image/encoded': bytes_feature(encoded_img),
      'image/format': bytes_feature(image_format),
      'image/object/bbox/xmin': float_list_feature(xmins),
      'image/object/bbox/xmax': float_list_feature(xmaxs),
      'image/object/bbox/ymin': float_list_feature(ymins),
      'image/object/bbox/ymax': float_list_feature(ymaxs),
      'image/object/class/text': bytes_list_feature(classes_text),
      'image/object/class/label': int64_list_feature(classes),
  }))
  return tf_example

In [18]:
def create_dataset(image_filenames, bbox_by_image):
    for image_file in image_filenames:
        encoded_img, width, height = load_image(image_file)
        file_key = get_file_key(image_file)
        try:
            yield create_tf_example(image_file, encoded_img, width, height, bbox_by_image[file_key]['bboxes'])
#             for bbox in bbox_by_image[file_key]['bboxes']:
#                 try:
#                     yield create_tf_example(image_file, encoded_img, bbox)
#                 except ZeroDivisionError:
#                     pass
        except (KeyError, ZeroDivisionError):
            pass

In [19]:
val_dataset = list(create_dataset(val_img_files, val_bbox))

In [20]:
len(val_dataset)

3225

In [140]:
list(val_bbox.values())[0]

{'bboxes': [{'x1': 131, 'x2': 164, 'y1': 187, 'y2': 230},
  {'x1': 243, 'x2': 276, 'y1': 214, 'y2': 261},
  {'x1': 363, 'x2': 402, 'y1': 204, 'y2': 247},
  {'x1': 481, 'x2': 525, 'y1': 156, 'y2': 215},
  {'x1': 584, 'x2': 608, 'y1': 200, 'y2': 239},
  {'x1': 641, 'x2': 691, 'y1': 165, 'y2': 223},
  {'x1': 728, 'x2': 761, 'y1': 177, 'y2': 224},
  {'x1': 760, 'x2': 814, 'y1': 128, 'y2': 201},
  {'x1': 989, 'x2': 1022, 'y1': 188, 'y2': 246},
  {'x1': 105, 'x2': 131, 'y1': 243, 'y2': 272}],
 'n_bboxes': 10}

In [21]:
def write_dataset(tf_examples, output_path):
  writer = tf.python_io.TFRecordWriter(output_path)

  for tf_example in tf_examples:
    writer.write(tf_example.SerializeToString())

  writer.close()

In [22]:
write_dataset(val_dataset, "/Users/work/data/object_detection/WIDER/WIDER_val/WIDER_val.tfrecord")

In [23]:
train_dataset = list(create_dataset(train_img_files, train_bbox))

In [24]:
write_dataset(train_dataset, "/Users/work/data/object_detection/WIDER/WIDER_train/WIDER_train.tfrecord")

In [25]:
len(train_dataset)

12879