# Pose Estimation Models

## Imports

In [1]:
import pandas as pd
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras_tuner
import keras.backend as K
import math as m
import time
import os

from sklearn.model_selection import train_test_split

np.set_printoptions(suppress = True)

---

## Variables

In [2]:
data_path_prefix = "D:/Uni Stuff/IP/Data/"
image_path_prefix = data_path_prefix + "Blimp Images/Raw/image_"

blimp_vertices = np.array([[1, 1, 1], [1, 1, -1], [1, -1, 1], [1, -1, -1], [-1, 1, 1], [-1, 1, -1], [-1, -1, -1], [-1, -1, 1]])

test_split = 0.25

desired_image_size = 16

image_columns = []
colours = {0:"R", 1:"G", 2:"B"}

# Create list of column names like '0 0 R', '0 0 G', '0 0 B', '0 1 R' etc
for i in range(desired_image_size):
    for j in range(desired_image_size):
        for k in range(3):
            image_columns.append(str(i) + " " + str(j) + " " + colours[k])

---

## Data Pre-processing

### CSVs

Load the bounding box and pose csv files and add to dataframes

In [None]:
bbox_data = pd.read_csv(data_path_prefix + "bbox data.csv")

pose_data = pd.read_csv(data_path_prefix + "blimp poses.csv")
y_variables = list(pose_data.columns)

### Images

Load the images from file, crop these by the bounding box and then resize to the desired square image size with padding to preserve aspect ratio. Image data is flattened and added to dataframe.

In [None]:
# Crop the image to the bounding box coordinates
def cutout_image(image, bbox):
    width = image.shape[1]
    height = image.shape[0]

    center_x = bbox['cent_x'] * width
    center_y = bbox['cent_y'] * height

    bbox_width = bbox['width'] * width
    bbox_height = bbox['height'] * height

    min_x = np.clip(int(center_x - (bbox_width / 2)), 0, width)
    max_x = np.clip(int(center_x + (bbox_width / 2)), 0, width) 

    min_y = np.clip(int(center_y - (bbox_height / 2)), 0, height)
    max_y = np.clip(int(center_y + (bbox_height / 2)), 0, height)

    return image[min_y:max_y, min_x:max_x]

# Resize the image to the desired_image_size square and pad with black if necessary
def resize_and_pad_image(image):
    old_size = image.shape[:2]

    size_ratio = float(desired_image_size) / max(old_size)

    new_size = tuple([int(x * size_ratio) for x in old_size])

    resized_image = cv.resize(image, (new_size[1], new_size[0]), interpolation=cv.INTER_AREA)

    delta_w = desired_image_size - new_size[1]
    delta_h = desired_image_size - new_size[0]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)

    return cv.copyMakeBorder(resized_image, top, bottom, left, right, cv.BORDER_CONSTANT, value=[0, 0, 0])

for i in range(len(pose_data)):
    img = cv.imread(image_path_prefix + str(i) + ".png")
    cropped = cutout_image(img, bbox_data.iloc[i])
    padded = resize_and_pad_image(cropped)

    cv.imwrite(data_path_prefix + "Processed Images/image_" + str(i) + ".png", padded)

In [None]:
load_path = data_path_prefix + "Processed Images/image_"

save_path = data_path_prefix + "Pose Estimation Images/"
train_save_path = save_path + "train/"
test_save_path = save_path + "test/"

X_train_nums, X_test_nums, y_train, y_test = train_test_split(np.arange(15000), pose_data, test_size=0.15)

for num in X_train_nums:
    image = cv.imread(load_path + str(num) + ".png")
    cv.imwrite(train_save_path + "image_" + str(num) + ".png", image)

print("Train images done")

for num in X_test_nums:
    image = cv.imread(load_path + str(num) + ".png")
    cv.imwrite(test_save_path + "image_" + str(num) + ".png", image)

print("Test images done")

y_train.to_csv(train_save_path + "data.csv")
y_test.to_csv(test_save_path + "data.csv")

print("Data done")

### Cleanup

Remove unecessary variables from RAM

In [None]:
del bbox_data
del pose_data

---

## ADD Metric

Create functions to calculate ADD

In [None]:
# Get the rotation matrix of an XYZ rotation with the euler angles vector
def get_rotation_matrix(rot_vec):
    x_matrix = np.matrix([[ 1, 0, 0],
                          [ 0, m.cos(m.radians(rot_vec[0])),-m.sin(m.radians(rot_vec[0]))],                         
                          [ 0, m.sin(m.radians(rot_vec[0])), m.cos(m.radians(rot_vec[0]))]])
    
    y_matrix = np.matrix([[ m.cos(m.radians(rot_vec[1])), 0, m.sin(m.radians(rot_vec[1]))],
                          [ 0, 1, 0],
                          [-m.sin(m.radians(rot_vec[1])), 0, m.cos(m.radians(rot_vec[1]))]])
    
    z_matrix = np.matrix([[ m.cos(m.radians(rot_vec[2])), -m.sin(m.radians(rot_vec[2])), 0 ],
                          [ m.sin(m.radians(rot_vec[2])),  m.cos(m.radians(rot_vec[2])), 0 ],
                          [ 0, 0, 1 ]])
    
    return z_matrix * y_matrix * x_matrix

# Get the average distance between true and predicted model points
def get_ADD(true_trans, true_rot_vec, pred_trans, pred_rot_vec):
    true_rot = get_rotation_matrix(true_rot_vec).T
    pred_rot = get_rotation_matrix(pred_rot_vec).T

    total_distance = 0

    for vertex_pos in blimp_vertices:
        true_pos = np.matmul(true_rot, vertex_pos) - true_trans
        pred_pos = np.matmul(pred_rot, vertex_pos) - pred_trans

        distance = np.linalg.norm(true_pos - pred_pos)
        total_distance += distance

    return total_distance / len(blimp_vertices)

In [None]:
def ADD_metric(y_true, y_pred):
    true_trans = y_true[:3]
    true_rot_vec = y_true[3:]

    pred_trans = y_pred[:3]
    pred_rot_vec = y_pred[3:]

    #print(K.eval(true_trans))

    try: 
        true_trans = K.eval(true_trans)
        true_rot_vec = K.eval(true_rot_vec)
        pred_trans = K.eval(pred_trans)
        pred_rot_vec = K.eval(pred_rot_vec)
    except:
        return 10000

    return get_ADD(true_trans, true_rot_vec, pred_trans, pred_rot_vec)

---

In [3]:
train_images_path = data_path_prefix + "Pose Estimation Data/train/images/"
test_images_path = data_path_prefix + "Pose Estimation Data/test/images/"

X_train = pd.DataFrame(columns=image_columns)
X_test = pd.DataFrame(columns=image_columns)

for image_name in os.listdir(train_images_path):
    image = cv.imread(train_images_path + image_name)
    X_train.loc[len(X_train)] = image.flatten()

for image_name in os.listdir(test_images_path):
    image = cv.imread(test_images_path + image_name)
    X_test.loc[len(X_test)] = image.flatten()

y_train = pd.read_csv(data_path_prefix + "Pose Estimation Data/train/data.csv")
y_test = pd.read_csv(data_path_prefix + "Pose Estimation Data/test/data.csv")

In [None]:
y_train = y_train.drop(y_train.columns[0], axis=1)
y_test = y_test.drop(y_test.columns[0], axis=1)

## Training

Create normalizer function based on input data

In [None]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(X_train))

In [None]:
tf.compat.v1.enable_eager_execution()

Create the DNN model and print details

In [None]:
def build_and_compile_model(hp):
  #NAME = f"{dense_layers}x{nodes}-{activation}-{optimizer.name}({optimizer.learning_rate})-{int(time.time())}"
  #tensorboard = tf.keras.callbacks.TensorBoard(log_dir=f'logs/{NAME}')

  layers = [normalizer]

  for i in range(hp.Choice("blocks", [1])):
    for j in range(hp.Choice("layers_per_block", [64])):
      layers.append(tf.keras.layers.Dense(hp.Choice("nodes_per_layer", [64]), activation='relu'))
    
    #layers.append(tf.keras.layers.MaxPooling3D(data_format="channels_first"))

  layers.append(tf.keras.layers.Dense(6))

  model = tf.keras.Sequential(layers)
  model.compile(loss = 'mean_absolute_error', metrics=['accuracy', 'mean_absolute_error'], optimizer=tf.keras.optimizers.Adam(0.001))

  return model

#dnn_model = build_and_compile_model(normalizer, 2, 64, 'relu', tf.keras.optimizers.Adam(0.001))
#dnn_model.summary()


tuner = keras_tuner.RandomSearch(
    build_and_compile_model,
    max_trials=10,
    # Do not resume the previous search in the same directory.
    overwrite=True,
    objective="val_loss",
    # Set a directory to store the intermediate results.
    directory="logs/",
)

tuner.search(
    X_train,
    y_train,
    validation_split=0.215,
    epochs=10,
    # Use the TensorBoard callback.
    # The logs will be write to "/tmp/tb_logs".
    callbacks=[tf.keras.callbacks.TensorBoard("logs/")],
)

In [None]:
best_model = tuner.get_best_models()[0]

---

## Evaluate

Plot the loss and epoch graph

In [None]:
def plot_loss(history):
  plt.plot(history.history['loss'], label='loss')
  plt.plot(history.history['val_loss'], label='val_loss')
  plt.xlabel('Epoch')
  plt.ylabel('Error')
  plt.legend()
  plt.grid(True)

plot_loss(best_model)

Evaluate performance out-of-sample on the testing data set

In [None]:
best_model.evaluate(X_test, y_test)

Print the true and predicted poses

In [None]:
print("Actual:")
print(y_test)

print("Predicted:")
predictions = best_model.predict(X_test)
print(pd.DataFrame(predictions, columns=["pos_x", "pos_y", "pos_z", "rot_x", "rot_y", "rot_z"]))

---

## Tuning