**lib**

In [1]:
import cv2
import matplotlib.pyplot as plt
import os
import tensorflow as tf

def draw_boundingbox(image_object, train_image_path):
    image_path = os.path.join(train_image_path, image_object["image_file_name"])
    image = plt.imread(image_path)
    # draw bb
    x1 = int(image_object["xmin"])
    y1 = int(image_object["ymin"])
    x2 = int(image_object["xmax"])
    y2 = int(image_object["ymax"])
    image = cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
    plt.figure(figsize=(20, 20))
    plt.imshow(image)

def get_bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))

def get_float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def get_int_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

In [24]:
import os
import numpy as np
import io

label_path = '/Users/privy/Desktop/all_repos/3D_bb_estimation/data/label_2'
label_name = 'Car'
train_image_path = '/Users/privy/Desktop/all_repos/3D_bb_estimation/data/training/image_2'
out_file = "/Users/privy/Desktop/all_repos/3D_bb_estimation/data/train.tfrecords"
number_bin = 2
overlap = 0.1
image_size = (224, 224)

def parse_label(label_path, label_name, train_image_path):
    all_objects = []
    # we have one label file for each image
    for file in os.listdir(label_path):
        with open(os.path.join(label_path, file), 'r') as txt_file:
            for line in txt_file:
                data_list = line.strip().split(" ")
                # considering cars only and which are not truncated and not occluded
                label = data_list[0]
                truncated = np.abs(float(data_list[1]))
                occluded = np.abs(float(data_list[2]))
                if label == label_name and truncated < 0.1 and occluded < 0.1:
                    # define angle
                    angle = float(data_list[3]) + np.pi/2.
                    if angle < 0:
                        angle = angle + 2.*np.pi
                    angle = angle - int(angle/(2.*np.pi))*(2.*np.pi)
                    
                    car_object = {
                        "image_file_name": file.replace(".txt",".png"),
                        "xmin": float(data_list[4]),
                        "ymin": float(data_list[5]),
                        "xmax": float(data_list[6]),
                        "ymax": float(data_list[7]),
                        "dims": np.array([float(number) for number in data_list[8:11]]),
                        "angle": angle,
                    }
                    all_objects.append(car_object)
    return all_objects

def compute_anchors(angle, number_bin, overlap):
    anchors = []
    
    wedge = 2.*np.pi/number_bin
    l_index = int(angle/wedge)
    r_index = l_index + 1
    
    if (angle - l_index*wedge) < wedge/2 * (1+overlap/2):
        anchors.append([l_index, angle - l_index*wedge])
        
    if (r_index*wedge - angle) < wedge/2 * (1+overlap/2):
        anchors.append([r_index%number_bin, angle - r_index*wedge])
        
    return anchors

def write_tfrecords(image, dimension, orientation, confidence, writer):
    feature_dict = {
        "image": get_bytes_feature([image]),
        "dimension": get_float_feature(dimension),
        "orientation": get_float_feature(list(orientation)),
        "confidence": get_float_feature(confidence),
    }
    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    writer.write(example.SerializeToString())
    
def main():
    # parse label
    all_objects = parse_label(label_path, label_name, train_image_path)
    # calculate average dimension of car
    average_dims = np.mean([obj["dims"] for obj in all_objects], axis=0)
    # define tfrecords writer
    writer = tf.io.TFRecordWriter(out_file)
    for i, image_obj in enumerate(all_objects):
        # network need to predict residual value of dimesion
        object_dimension = image_obj["dims"] - average_dims
    
        # network need to predict confidence for number of bin
        # if BIN = 2 confidence for bin1 and bin2
        confidence = np.zeros(number_bin)
    
        # network need to predict cos and sin for each bin
        orientation = np.zeros((number_bin,2))
    
        # get anchors
        anchors = compute_anchors(image_obj["angle"], number_bin, overlap)
        # prepare data for confidence and orientation
        for anchor in anchors:
            confidence[anchor[0]] = 1
            orientation[anchor[0]] = np.array([np.cos(anchor[1]), np.sin(anchor[1])])
    
        # prepare X
        # read and crop image using 2D detection
        image_array = cv2.imread(os.path.join(train_image_path, image_obj["image_file_name"]))
        cropped_image = image_array[int(image_obj["ymin"]):int(image_obj["ymax"]),
                               int(image_obj["xmin"]):int(image_obj["xmax"])]
        cropped_image = cv2.resize(cropped_image, image_size)
        _, encoded_image = cv2.imencode('.png', cropped_image)
        encoded_image = encoded_image.tobytes()
    
        # write data to tfrecords
        # resize orientation to number_bin*2, 1
        orientation = np.resize(orientation, (number_bin*2))
        write_tfrecords(encoded_image, object_dimension, orientation, confidence, writer)
        if i == 200:
            break
main()