# Setup environment

In [1]:
import os
import random
import math
import xml.etree.cElementTree as ET

import tensorflow as tf

from object_detection.utils import dataset_util

# Input data

## Fetch class names

In [2]:
FOOD256_DIR = os.path.join(os.path.abspath(os.sep), "Datasets", "food256")
CLASS_PATH = os.path.join(FOOD256_DIR, "category.txt")

with open(CLASS_PATH, 'r') as file:
    file.readline()
    class_names = [line.split('\t')[1].strip() for line in file.readlines()]
    
class_names

['rice',
 'eels on rice',
 'pilaf',
 "chicken-'n'-egg on rice",
 'pork cutlet on rice',
 'beef curry',
 'sushi',
 'chicken rice',
 'fried rice',
 'tempura bowl',
 'bibimbap',
 'toast',
 'croissant',
 'roll bread',
 'raisin bread',
 'chip butty',
 'hamburger',
 'pizza',
 'sandwiches',
 'udon noodle',
 'tempura udon',
 'soba noodle',
 'ramen noodle',
 'beef noodle',
 'tensin noodle',
 'fried noodle',
 'spaghetti',
 'Japanese-style pancake',
 'takoyaki',
 'gratin',
 'sauteed vegetables',
 'croquette',
 'grilled eggplant',
 'sauteed spinach',
 'vegetable tempura',
 'miso soup',
 'potage',
 'sausage',
 'oden',
 'omelet',
 'ganmodoki',
 'jiaozi',
 'stew',
 'teriyaki grilled fish',
 'fried fish',
 'grilled salmon',
 'salmon meuniere',
 'sashimi',
 'grilled pacific saury',
 'sukiyaki',
 'sweet and sour pork',
 'lightly roasted fish',
 'steamed egg hotchpotch',
 'tempura',
 'fried chicken',
 'sirloin cutlet',
 'nanbanzuke',
 'boiled fish',
 'seasoned beef with potatoes',
 'hambarg steak',
 'ste

## Map dataset to sharded TFRecord

In [None]:
ANNOTATIONS_DIR = os.path.join(FOOD256_DIR, "Annotations")
IMAGES_DIR = os.path.join(FOOD256_DIR, "JPEGImages")
SPLIT_DIR = os.path.join(FOOD256_DIR, "ImageSets", "Main")

TRAIN_RECORDS_PATH = os.path.join(os.path.abspath(os.sep), "Datasets", "food256", "Tensorflow1", "train_dataset.record")
TEST_RECORDS_PATH = os.path.join(os.path.abspath(os.sep), "Datasets", "food256", "Tensorflow1", "val_dataset.record")

def format_example(example_id):
    filename = example_id+".jpg"
    img_path = os.path.join(IMAGES_DIR, filename)
    lbl_path = os.path.join(ANNOTATIONS_DIR, example_id+".xml")
    
    tree = ET.parse(lbl_path)
    root = tree.getroot()
    
    # get image metadata
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    
    # read image as byte string
    encoded_image_data = open(img_path, 'rb').read()
    image_format = b"jpg"
    
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes = []
    classes_text = []

    # get object annotations
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        
        # get bounding box
        box = obj.find('bndbox')
        x1 = int(box.find('xmin').text)
        y1 = int(box.find('ymin').text)
        x2 = int(box.find('xmax').text)
        y2 = int(box.find('ymax').text)
        
        xmins.append(x1/width)
        xmaxs.append(x2/width)
        ymins.append(y1/height)
        ymaxs.append(y2/height)
        classes.append(class_names.index(class_name) + 1)
        classes_text.append(str.encode(class_name))
            
    # encode filename as byte string
    filename = str.encode(filename)
    
    return tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))

for split in ["train", "val"]:
    count = 0
    with open(os.path.join(SPLIT_DIR, split+".txt"), 'r') as split_file:
        with tf.io.TFRecordWriter(TRAIN_RECORDS_PATH if split == "train" else TEST_RECORDS_PATH) as writer:
            for img_filename in split_file.readlines():
                # format example and write to TFRecord
                tf_example = format_example(img_filename.strip().replace(".jpg", ""))
                writer.write(tf_example.SerializeToString())
                
                count += 1
                
                if count > 1000:
                    break
    print("{0} dataset contains {1} images".format(split, count))

train dataset contains 26007 images


## Create label map file

In [17]:
LABELS_PATH = os.path.join(os.path.abspath(os.sep), "Datasets", "food256", "Tensorflow", "label_map.pbtxt")

with open(LABELS_PATH, 'w') as file:
    i = 1
    for class_name in class_names:
        line = "item {{\n  id: {0}\n  name: '{1}'\n}}\n\n".format(i, class_name)
        i += 1
        file.write(line)