### TFRecord Generation
This is the last stage for combining image annotations and image into two seperate records (train/test) split that will be ready to be uploaded into GCloud for processing. Image augmentation, preprocessing should be done prior to using functions on this script.


In [None]:
## Global Variables
processed_annotations = []~
train_dataset = []
test_dataset  = []

In [None]:
import random

def LoadAndSplitDataset(annotation_file):
    # Flush Global Vars
    global processed_annotations
    global train_dataset
    global test_dataset
    # Open File
    annotation_data = open(annotation_file, "r")
    # Load Combined Data Annotations
    for line in annotation_data:
        filename, class_idx, x_min_norm, y_min_norm, x_max_norm, y_max_norm = line.rstrip().split(" ")
        processed_annotations.append((filename, int(class_idx), float(x_min_norm), float(y_min_norm), float(x_max_norm), float(y_max_norm)))
        
    annotation_data.close()
    
    # Shuffle and Split
    random.shuffle(processed_annotations)

    train_test_split = int(0.70 * len(processed_annotations))
    train_dataset = processed_annotations[:train_test_split]
    test_dataset  = processed_annotations[train_test_split:]

    # Sanity Check
    print("Training Data Size  : {}\nTesting Data Size   : {}".format(len(train_dataset), len(test_dataset)))
    print("Sanity Check Status : {}".format(len(train_dataset) + len(test_dataset) == len(processed_annotations)))

In [None]:
LoadAndSplitDataset("/Users/Work/Documents/Conversion/data/augfin.txt")

In [None]:
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Utility functions for creating TFRecord data sets."""



def int64_feature(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def int64_list_feature(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))


def bytes_feature(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def bytes_list_feature(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))


def float_list_feature(value):
  return tf.train.Feature(float_list=tf.train.FloatList(value=value))

In [None]:
# This function is used to convert a JSON array into a single tfrecord
# For Visdrone example, a single tfrecord will typically store data correlating to one video image.
def create_tf_example(data_info):
  # TODO START: Populate the following variables from your example.
  height = data_info['height'] # Image height
  width = data_info['width'] # Image width
  filename = data_info['filename'].encode() # Filename of the image. Empty if image is not from file
  encoded_image_data = data_info['encoded_image_data'] # Encoded image bytes
  image_format = data_info['image_format'].encode() # b'jpeg' or b'png'

  xmins = data_info['xmins'] # List of normalized left x coordinates in bounding box (1 per box)
  xmaxs = data_info['xmaxs'] # List of normalized right x coordinates in bounding box
             # (1 per box)
  ymins = data_info['ymins'] # List of normalized top y coordinates in bounding box (1 per box)
  ymaxs = data_info['ymaxs'] # List of normalized bottom y coordinates in bounding box
             # (1 per box)
  classes_text = data_info['classes_text'] # List of string class name of bounding box (1 per box)
  classes = data_info['classes'] # List of integer class id of bounding box (1 per box)
  # TODO END
  tf_label_and_data = tf.train.Example(features=tf.train.Features(feature={
      'image/height': int64_feature(height),
      'image/width': int64_feature(width),
      'image/filename': bytes_feature(filename),
      'image/source_id': bytes_feature(filename),
      'image/encoded': bytes_feature(encoded_image_data),
      'image/format': bytes_feature(image_format),
      'image/object/bbox/xmin': float_list_feature(xmins),
      'image/object/bbox/xmax': float_list_feature(xmaxs),
      'image/object/bbox/ymin': float_list_feature(ymins),
      'image/object/bbox/ymax': float_list_feature(ymaxs),
      'image/object/class/text': bytes_list_feature(classes_text),
      'image/object/class/label': int64_list_feature(classes),
  }))
  return tf_label_and_data

In [None]:
import os 
import cv2 as cv
import tensorflow as tf

# Sanity Check
print("Training Data Size  : {}\nTesting Data Size   : {}".format(len(train_dataset), len(test_dataset)))
print("Sanity Check Status : {}".format(len(train_dataset) + len(test_dataset) == len(processed_annotations)))

def process_labels(processed_annotations, dest_file):
    writer = tf.compat.v1.python_io.TFRecordWriter(dest_file)
    labels = ["Okay, Defect"]
    
    for array in processed_annotations:
        img_path = '/Users/Work/Documents/Conversion/data/datasetAug/' + array[0]
        img = cv.imread(img_path)
        height, width, channels = img.shape
        image_format = array[0][-4:]

        data_info = {
            'filename': img_path,
            'image_format': image_format,
            'width': width, 
            'height': height
        }
        
        with tf.io.gfile.GFile(img_path, 'rb') as fid:
            encoded_image_data = fid.read()
            data_info['encoded_image_data'] = encoded_image_data  
        
        filename, class_idx, x_min_norm, y_min_norm, x_max_norm, y_max_norm = array

        class_idx  = [int(x) for x in [class_idx]]
        x_min_norm = [float(x) for x in [x_min_norm]]
        y_min_norm = [float(x) for x in [y_min_norm]]
        x_max_norm = [float(x) for x in [x_max_norm]]
        y_max_norm = [float(x) for x in [y_max_norm]]
        
        class_text = [labels[c-1].encode() for c in class_idx]
        
        data_info['xmins'] = x_min_norm
        data_info['xmaxs'] = x_max_norm
        data_info['ymins'] = y_min_norm
        data_info['ymaxs'] = y_max_norm
        data_info['classes_text'] = class_text
        data_info['classes'] = class_idx
        
        tf_record = create_tf_example(data_info)
        writer.write(tf_record.SerializeToString())
    writer.close()

In [7]:
process_labels(train_dataset, "/Users/Work/Documents/Conversion/train.tfrecord")
process_labels(test_dataset, "/Users/Work/Documents/Conversion/test.tfrecord")

KeyboardInterrupt: 

In [None]:
image_augmentation(train_dataset)