# Setup environment

In [2]:
import os
import random
import math
import xml.etree.cElementTree as ET

import tensorflow as tf

from object_detection.utils import dataset_util

# Input data

## Fetch class names

In [3]:
FOOD256_DIR = os.path.join(os.path.abspath(os.sep), "Datasets", "food256")
CLASS_PATH = os.path.join(FOOD256_DIR, "condensed-category.txt")

with open(CLASS_PATH, 'r') as file:
    file.readline()
    class_names = [line.split('\t')[1].strip() for line in file.readlines()]
    
class_names

['rice',
 "chicken-'n'-egg on rice",
 'beef curry',
 'sushi',
 'chicken rice',
 'fried rice',
 'toast',
 'croissant',
 'roll bread',
 'raisin bread',
 'hamburger',
 'pizza',
 'sandwiches',
 'spaghetti',
 'vegetable tempura',
 'sausage',
 'omelet',
 'stew',
 'fried fish',
 'grilled salmon',
 'sweet and sour pork',
 'tempura',
 'fried chicken',
 'steak',
 'egg sunny-side up',
 'roast chicken',
 'fried shrimp',
 'potato salad',
 'green salad',
 'pizza toast',
 'hot dog',
 'french fries',
 'mixed rice',
 'green curry',
 'paella',
 'pancake',
 'crape',
 'tiramisu',
 'waffle',
 'shortcake',
 'mushroom risotto',
 'french toast',
 'minestrone',
 'chicken nugget',
 'french bread',
 'bagel',
 'scone',
 'tortilla',
 'tacos',
 'nachos',
 'meat loaf',
 'scrambled egg',
 'lasagna',
 'Caesar salad',
 'oatmeal',
 'muffin',
 'popcorn',
 'doughnut',
 'apple pie',
 'lamb kebabs',
 'roast duck',
 'hot pot',
 'pork belly',
 'custard tart',
 'stir-fried mixed vegetables',
 'Pork with lemon',
 'Deep Fried Ch

## Map dataset to sharded TFRecord

In [6]:
ANNOTATIONS_DIR = os.path.join(FOOD256_DIR, "Annotations")
IMAGES_DIR = os.path.join(FOOD256_DIR, "condensed images")
SPLIT_DIR = os.path.join(FOOD256_DIR, "ImageSets", "Main")

TRAIN_RECORDS_PATH = os.path.join(os.path.abspath(os.sep), "Datasets", "food75", "train_dataset.record")
TEST_RECORDS_PATH = os.path.join(os.path.abspath(os.sep), "Datasets", "food75", "test_dataset.record")

def format_example(example_id):
    filename = example_id+".jpg"
    img_path = os.path.join(IMAGES_DIR, filename)
    lbl_path = os.path.join(ANNOTATIONS_DIR, example_id+".xml")
    
    tree = ET.parse(lbl_path)
    root = tree.getroot()
    
    # get image metadata
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    
    # read image as byte string
    encoded_image_data = open(img_path, 'rb').read()
    image_format = b"jpg"
    
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes = []
    classes_text = []

    # get object annotations
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        
        # skip if class not in class_names
        if class_name not in class_names:
            continue
        
        # get bounding box
        box = obj.find('bndbox')
        x1 = int(box.find('xmin').text)
        y1 = int(box.find('ymin').text)
        x2 = int(box.find('xmax').text)
        y2 = int(box.find('ymax').text)
        
        xmins.append(x1/width)
        xmaxs.append(x2/width)
        ymins.append(y1/height)
        ymaxs.append(y2/height)
        classes.append(class_names.index(class_name) + 1)
        classes_text.append(str.encode(class_name))
        
    # return none if no objects in image
    if len(classes) < 1:
        return
            
    # encode filename as byte string
    filename = str.encode(filename)
    
    return tf.train.Example(features=tf.train.Features(feature={
      'image/height': dataset_util.int64_feature(height),
      'image/width': dataset_util.int64_feature(width),
      'image/filename': dataset_util.bytes_feature(filename),
      'image/source_id': dataset_util.bytes_feature(filename),
      'image/encoded': dataset_util.bytes_feature(encoded_image_data),
      'image/format': dataset_util.bytes_feature(image_format),
      'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
      'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
      'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
      'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
      'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
      'image/object/class/label': dataset_util.int64_list_feature(classes),
  }))

for split in ["train", "test"]:
    with open(os.path.join(SPLIT_DIR, split+".txt"), 'r') as split_file:
        with tf.io.TFRecordWriter(TRAIN_RECORDS_PATH if split == "train" else TEST_RECORDS_PATH) as writer:
            for img_filename in split_file.readlines():
                # format example and write to TFRecord
                tf_example = format_example(img_filename.strip().replace(".jpg", ""))
                
                # skip if no class found
                if tf_example:
                    writer.write(tf_example.SerializeToString())

240 320 b'4284.jpg' b'jpg' [0.0] [0.0] [1.0] [1.0] [b'stew'] [18]
614 406 b'155515.jpg' b'jpg' [0.13054187192118227] [0.0749185667752443] [0.9482758620689655] [0.9364820846905537] [b'roast duck'] [61]


## Create label map file

In [17]:
LABELS_PATH = os.path.join(os.path.abspath(os.sep), "Datasets", "food75", "label_map.pbtxt")

with open(LABELS_PATH, 'w') as file:
    i = 1
    for class_name in class_names:
        line = "item {{\n  id: {0}\n  name: '{1}'\n}}\n\n".format(i, class_name)
        i += 1
        file.write(line)