# Pose Estimation

## Import

In [1]:
from IPython.display import Image
from PIL import Image as pilImage
import tensorflow as tf
import numpy as np
import os
import time
import math
from datetime import timedelta
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

## Load Data

## Setting Variables

In [None]:
# Learning rate
learning_rate = 1e-4

### Placeholder variables

Everytime we execute the TensorFlow computational graph, we can feed different values to the Placeholder variables. These Placeholder variable are multi-dimensional array called <b>tensor</b> and the data-type is set to <b>float32</b>. 

<font color="red">**None means it can hold an arbitrary number of images.</font>
<br>The images input shape: [None, img_size, img_size, num_channels]
<br>- Each image being <b>img_size</b> pixels high and <b>img_size</b> pixels wide and with <b>num_channels</b> colour channels.
<br>The labels shape: [None, num_classes]
<br>The class shape: [None]

In [None]:
# TensorFlow expect 4-dim input X, so we have to reshape x
# Output: [arbitrary, img_height, img_width, num_channels]
# Note that all input is square, thus, img_height == img_width == img_size
# num_images can be inferred automatically by using -1 
x = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS], name='x')

# ConfidenceMap y associated with X 
# Output: [arbitrary, ????]
y_true_cm = tf.placeholder(tf.float32, shape=[None, ???], name='y_true_cm')

# Affinity field y associated with X 
# Output: [arbitrary, ????]
y_true_af = tf.placeholder(tf.float32, shape=[None, ???], name='y_true_af')

## Architecture 

In [None]:
conv1_1, w1_1 = cnn.new_conv_layer(input=x, num_input_channels=3, filter_size=3, num_filters=64, padding='SAME')
conv1_2, w1_2 = cnn.new_conv_layer(input=conv1_1, num_input_channels=3, filter_size=3, num_filters=64, padding='SAME', use_pooling=True)
conv2_1, w2_1 = cnn.new_conv_layer(input=conv1_2, num_input_channels=64, filter_size=3, num_filters=128, padding='SAME')
conv2_2, w2_2 = cnn.new_conv_layer(input=conv2_1, num_input_channels=128, filter_size=3, num_filters=128, padding='SAME', use_pooling=True)
conv3_1, w3_1 = cnn.new_conv_layer(input=conv2_2, num_input_channels=128, filter_size=3, num_filters=256, padding='SAME')
conv3_2, w3_2 = cnn.new_conv_layer(input=conv3_1, num_input_channels=256, filter_size=3, num_filters=256, padding='SAME')
conv3_3, w3_3 = cnn.new_conv_layer(input=conv3_2, num_input_channels=256, filter_size=3, num_filters=256, padding='SAME')
conv3_4, w3_4 = cnn.new_conv_layer(input=conv3_3, num_input_channels=256, filter_size=3, num_filters=256, padding='SAME', use_pooling=True)
conv4_1, w4_1 = cnn.new_conv_layer(input=conv3_4, num_input_channels=256, filter_size=3, num_filters=512, padding='SAME')
conv4_2, w4_2 = cnn.new_conv_layer(input=conv4_1, num_input_channels=512, filter_size=3, num_filters=512, padding='SAME')
conv4_3, w4_3 = cnn.new_conv_layer(input=conv4_2, num_input_channels=512, filter_size=3, num_filters=256, padding='SAME')
conv4_4, w4_4 = cnn.new_conv_layer(input=conv4_3, num_input_channels=256, filter_size=3, num_filters=128, padding='SAME')
# stage 1 branch 1
conv1_1_1, w1_1_1 = cnn.new_conv_layer(input=conv4_4, num_input_channels=128, filter_size=3, num_filters=128, padding='SAME')
conv1_2_1, w1_2_1 = cnn.new_conv_layer(input=conv1_1_1, num_input_channels=128, filter_size=3, num_filters=128, padding='SAME')
conv1_3_1, w1_3_1 = cnn.new_conv_layer(input=conv1_2_1, num_input_channels=128, filter_size=3, num_filters=128, padding='SAME')
conv1_4_1, w1_4_1 = cnn.new_conv_layer(input=conv1_3_1, num_input_channels=128, filter_size=1, num_filters=512, padding='SAME')
conv1_5_1, w1_5_1 = cnn.new_conv_layer(input=conv1_4_1, num_input_channels=512, filter_size=1, num_filters=38, padding='SAME', use_relu=False)
# stage 1 branch 2
conv1_1_2, w1_1_2 = cnn.new_conv_layer(input=conv4_4, num_input_channels=128, filter_size=3, num_filters=128, padding='SAME')
conv1_2_2, w1_2_2 = cnn.new_conv_layer(input=conv1_1_2, num_input_channels=128, filter_size=3, num_filters=128, padding='SAME')
conv1_3_2, w1_3_2 = cnn.new_conv_layer(input=conv1_2_2, num_input_channels=128, filter_size=3, num_filters=128, padding='SAME')
conv1_4_2, w1_4_2 = cnn.new_conv_layer(input=conv1_3_2, num_input_channels=128, filter_size=1, num_filters=512, padding='SAME')
conv1_5_2, w1_5_2 = cnn.new_conv_layer(input=conv1_4_2, num_input_channels=512, filter_size=1, num_filters=19, padding='SAME', use_relu=False)
# concat 1
concat1 = tf.concat([conv1_5_1, conv1_5_2], 1)
# stage 2 branch 1
conv2_1_1, w2_1_1 = cnn.new_conv_layer(input=concat1, num_input_channels=38, filter_size=7, num_filters=128, padding='SAME')
conv2_2_1, w2_2_1 = cnn.new_conv_layer(input=conv2_1_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv2_3_1, w2_3_1 = cnn.new_conv_layer(input=conv2_2_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv2_4_1, w2_4_1 = cnn.new_conv_layer(input=conv2_3_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv2_5_1, w2_5_1 = cnn.new_conv_layer(input=conv2_4_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv2_6_1, w2_6_1 = cnn.new_conv_layer(input=conv2_5_1, num_input_channels=128, filter_size=1, num_filters=128, padding='SAME')
conv2_7_1, w2_7_1 = cnn.new_conv_layer(input=conv2_6_1, num_input_channels=128, filter_size=1, num_filters=38, padding='SAME', use_relu=False)
# stage 2 branch 2
conv2_1_2, w2_1_2 = cnn.new_conv_layer(input=concat1, num_input_channels=19, filter_size=7, num_filters=128, padding='SAME')
conv2_2_2, w2_2_2 = cnn.new_conv_layer(input=conv2_1_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv2_3_2, w2_3_2 = cnn.new_conv_layer(input=conv2_2_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv2_4_2, w2_4_2 = cnn.new_conv_layer(input=conv2_3_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv2_5_2, w2_5_2 = cnn.new_conv_layer(input=conv2_4_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv2_6_2, w2_6_2 = cnn.new_conv_layer(input=conv2_5_2, num_input_channels=128, filter_size=1, num_filters=128, padding='SAME')
conv2_7_2, w2_7_2 = cnn.new_conv_layer(input=conv2_6_2, num_input_channels=128, filter_size=1, num_filters=19, padding='SAME', use_relu=False)
# concat 2
concat2 = tf.concat([conv2_7_1, conv2_7_2], 1)
# stage 3 branch 1
conv3_1_1, w3_1_1 = cnn.new_conv_layer(input=concat2, num_input_channels=38, filter_size=7, num_filters=128, padding='SAME')
conv3_2_1, w3_2_1 = cnn.new_conv_layer(input=conv3_1_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv3_3_1, w3_3_1 = cnn.new_conv_layer(input=conv3_2_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv3_4_1, w3_4_1 = cnn.new_conv_layer(input=conv3_3_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv3_5_1, w3_5_1 = cnn.new_conv_layer(input=conv3_4_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv3_6_1, w3_6_1 = cnn.new_conv_layer(input=conv3_5_1, num_input_channels=128, filter_size=1, num_filters=128, padding='SAME')
conv3_7_1, w3_7_1 = cnn.new_conv_layer(input=conv3_6_1, num_input_channels=128, filter_size=1, num_filters=38, padding='SAME', use_relu=False)
# stage 3 branch 2
conv3_1_2, w3_1_2 = cnn.new_conv_layer(input=concat2, num_input_channels=19, filter_size=7, num_filters=128, padding='SAME')
conv3_2_2, w3_2_2 = cnn.new_conv_layer(input=conv3_1_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv3_3_2, w3_3_2 = cnn.new_conv_layer(input=conv3_2_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv3_4_2, w3_4_2 = cnn.new_conv_layer(input=conv3_3_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv3_5_2, w3_5_2 = cnn.new_conv_layer(input=conv3_4_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv3_6_2, w3_6_2 = cnn.new_conv_layer(input=conv3_5_2, num_input_channels=128, filter_size=1, num_filters=128, padding='SAME')
conv3_7_2, w3_7_2 = cnn.new_conv_layer(input=conv3_6_2, num_input_channels=128, filter_size=1, num_filters=19, padding='SAME', use_relu=False)
# concat 3
concat3 = tf.concat([conv3_7_1, conv3_7_2], 1)
# stage 4 branch 1
conv4_1_1, w4_1_1 = cnn.new_conv_layer(input=concat3, num_input_channels=38, filter_size=7, num_filters=128, padding='SAME')
conv4_2_1, w4_2_1 = cnn.new_conv_layer(input=conv4_1_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv4_3_1, w4_3_1 = cnn.new_conv_layer(input=conv4_2_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv4_4_1, w4_4_1 = cnn.new_conv_layer(input=conv4_3_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv4_5_1, w4_5_1 = cnn.new_conv_layer(input=conv4_4_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv4_6_1, w4_6_1 = cnn.new_conv_layer(input=conv4_5_1, num_input_channels=128, filter_size=1, num_filters=128, padding='SAME')
conv4_7_1, w4_7_1 = cnn.new_conv_layer(input=conv4_6_1, num_input_channels=128, filter_size=1, num_filters=38, padding='SAME', use_relu=False)
# stage 4 branch 2
conv4_1_2, w4_1_2 = cnn.new_conv_layer(input=concat3, num_input_channels=19, filter_size=7, num_filters=128, padding='SAME')
conv4_2_2, w4_2_2 = cnn.new_conv_layer(input=conv4_1_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv4_3_2, w4_3_2 = cnn.new_conv_layer(input=conv4_2_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv4_4_2, w4_4_2 = cnn.new_conv_layer(input=conv4_3_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv4_5_2, w4_5_2 = cnn.new_conv_layer(input=conv4_4_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv4_6_2, w4_6_2 = cnn.new_conv_layer(input=conv4_5_2, num_input_channels=128, filter_size=1, num_filters=128, padding='SAME')
conv4_7_2, w4_7_2 = cnn.new_conv_layer(input=conv4_6_2, num_input_channels=128, filter_size=1, num_filters=19, padding='SAME', use_relu=False)
# concat 4
concat4 = tf.concat([conv4_7_1, conv4_7_2], 1)
# stage 5 branch 1
conv5_1_1, w5_1_1 = cnn.new_conv_layer(input=concat4, num_input_channels=38, filter_size=7, num_filters=128, padding='SAME')
conv5_2_1, w5_2_1 = cnn.new_conv_layer(input=conv5_1_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv5_3_1, w5_3_1 = cnn.new_conv_layer(input=conv5_2_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv5_4_1, w5_4_1 = cnn.new_conv_layer(input=conv5_3_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv5_5_1, w5_5_1 = cnn.new_conv_layer(input=conv5_4_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv5_6_1, w5_6_1 = cnn.new_conv_layer(input=conv5_5_1, num_input_channels=128, filter_size=1, num_filters=128, padding='SAME')
conv5_7_1, w5_7_1 = cnn.new_conv_layer(input=conv5_6_1, num_input_channels=128, filter_size=1, num_filters=38, padding='SAME', use_relu=False)
# stage 5 branch 2
conv5_1_2, w5_1_2 = cnn.new_conv_layer(input=concat4, num_input_channels=19, filter_size=7, num_filters=128, padding='SAME')
conv5_2_2, w5_2_2 = cnn.new_conv_layer(input=conv5_1_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv5_3_2, w5_3_2 = cnn.new_conv_layer(input=conv5_2_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv5_4_2, w5_4_2 = cnn.new_conv_layer(input=conv5_3_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv5_5_2, w5_5_2 = cnn.new_conv_layer(input=conv5_4_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv5_6_2, w5_6_2 = cnn.new_conv_layer(input=conv5_5_2, num_input_channels=128, filter_size=1, num_filters=128, padding='SAME')
conv5_7_2, w5_7_2 = cnn.new_conv_layer(input=conv5_6_2, num_input_channels=128, filter_size=1, num_filters=19, padding='SAME', use_relu=False)
# concat 5
concat5 = tf.concat([conv5_7_1, conv5_7_2], 1)
# stage 6 branch 1
conv6_1_1, w6_1_1 = cnn.new_conv_layer(input=concat5, num_input_channels=38, filter_size=7, num_filters=128, padding='SAME')
conv6_2_1, w6_2_1 = cnn.new_conv_layer(input=conv6_1_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv6_3_1, w6_3_1 = cnn.new_conv_layer(input=conv6_2_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv6_4_1, w6_4_1 = cnn.new_conv_layer(input=conv6_3_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv6_5_1, w6_5_1 = cnn.new_conv_layer(input=conv6_4_1, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv6_6_1, w6_6_1 = cnn.new_conv_layer(input=conv6_5_1, num_input_channels=128, filter_size=1, num_filters=128, padding='SAME')
conv6_7_1, w6_7_1 = cnn.new_conv_layer(input=conv6_6_1, num_input_channels=128, filter_size=1, num_filters=38, padding='SAME', use_relu=False)
# stage 6 branch 2
conv6_1_2, w6_1_2 = cnn.new_conv_layer(input=concat5, num_input_channels=19, filter_size=7, num_filters=128, padding='SAME')
conv6_2_2, w6_2_2 = cnn.new_conv_layer(input=conv6_1_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv6_3_2, w6_3_2 = cnn.new_conv_layer(input=conv6_2_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv6_4_2, w6_4_2 = cnn.new_conv_layer(input=conv6_3_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv6_5_2, w6_5_2 = cnn.new_conv_layer(input=conv6_4_2, num_input_channels=128, filter_size=7, num_filters=128, padding='SAME')
conv6_6_2, w6_6_2 = cnn.new_conv_layer(input=conv6_5_2, num_input_channels=128, filter_size=1, num_filters=128, padding='SAME')
conv6_7_2, w6_7_2 = cnn.new_conv_layer(input=conv6_6_2, num_input_channels=128, filter_size=1, num_filters=19, padding='SAME', use_relu=False)

## Architecture (Optimisation Part)
### L2 Loss

In [None]:
# b1_loss = tf.nn.l2_loss(w6_7_1 - y_true_cm)
b1_loss = tf.reduce_mean(tf.square(w6_7_1 - y_true_cm))
#b2_loss = tf.nn.l2_loss(w6_7_2 - y_true_af)
tf.reduce_mean(tf.square(w6_7_2 - y_true_af))
tf.summary.scalar("b1 loss", b1_loss)
tf.summary.scalar("b2 loss", b2_loss)

### Optimization Method
AdamOptimizer which is an advanced form of Gradient Descent we can use for minimise the cost.
<br>
**Note that optimization is not performed at this point. In fact, nothing is calculated at all, we just add the optimizer-object to the TensorFlow graph for later execution.

In [None]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(b1_loss, b2_loss)

### Saver
Save variables of the neural network to reloaded quickly without having to train the network again.
<br>Ref: https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/04_Save_Restore.ipynb

**Note that nothing is actually saved at this point, which will be done further below.

In [None]:
saver = tf.train.Saver()

## Main (Execute)

In [None]:
# Create TensorFlow Session to execute the TensorFlow graph
session = tf.Session()
# Collecting accuracy for TensorBoard
# Summaries such as scalars can be seen by running the command below
# tensorboard --logdir="./log"
# http://192.168.0.150:6006
merged = tf.summary.merge_all()
writer = tf.summary.FileWriter('./log', session.graph)
# Initialise weights and bias
#session.run(tf.global_variables_initializer())

### Executing-function - Perform optimization iterations
This function is called to execute the training process of CNN. A number of optimization iterations so as to gradually improve the variables of the network layers. Each iteration, new batch of data is selected from the training-set and TensorFlow executes the optimizer using them. 

Input:<br>
num_iterations - Number of optimization iterations<br>
Output:<br>
None<br>
*weights is updated with-in the model and message of <b>Training Accuracy</b> is printed every 100 iterations

In [None]:
def optimize(num_iterations):
    # Start-time used for printing time-usage below.
    start_time = time.time()

    for i in range(num_iterations):

        # Get a batch of training examples.
        # x_batch - batch of image
        # y_true_batch - labels of x_batch
        x_batch, y_true_batch = random_batch()

        # Put the batch into a dict with the proper names
        # for placeholder variables in the TensorFlow graph.
        feed_dict_train = {x: x_batch, y_true: y_true_batch}

        # Run the optimizer using this batch of training data.
        # TensorFlow assigns the variables in feed_dict_train
        # to the placeholder variables and then runs the optimizer.
        # !!! Add [merged, opt] for TensorBoard !!! Very Important, Order has no effect
        i_global, summary, _ = session.run([global_step, merged, optimizer], feed_dict=feed_dict_train)
        # Add summary to TensorBoard
        writer.add_summary(summary, i)

        # Print status to screen every 100 iterations (and last).
        if (i_global % 100 == 0) or (i == num_iterations - 1):
            # Calculate the accuracy on the training-batch.
            batch_acc = session.run(accuracy,
                                    feed_dict=feed_dict_train)

            # Print status.
            msg = "Global Step: {0:>6}, Training Batch Accuracy: {1:>6.1%}"
            print(msg.format(i_global, batch_acc))
        # Save a checkpoint to disk every 1000 iterations (and last).
        if (i_global % 1000 == 0) or (i == num_iterations - 1):
            # Save all variables of the TensorFlow graph to a
            # checkpoint. Append the global_step counter
            # to the filename so we save the last several checkpoints.
            saver.save(session,
                       save_path=save_path,
                       global_step=global_step)

            print("Saved checkpoint.")

    # Ending time.
    end_time = time.time()

    # Difference between start and end-times.
    time_dif = end_time - start_time

    # Print the time-usage.
    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))

### Restore or initialize variables
Training this neural network may take a long time, especially if you do not have a GPU. We therefore save checkpoints during training so we can continue training at another time (e.g. during the night), and also for performing analysis later without having to train the neural network every time we want to use it.

If you want to restart the training of the neural network, you have to delete the checkpoints first.

This is the directory used for the checkpoints.

In [None]:
save_dir = 'checkpoints/'
# Create directory if not exist
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
save_path = os.path.join(save_dir, 'att_cnn')

# Try to restore the latest checkpoint. 
# If checkpoint doesn't exist or TensorFlow graph has been modified, exception will raise.
# When exception appears, initialise will be made
try:
    print("Trying to restore last checkpoint ...")

    # Use TensorFlow to find the latest checkpoint - if any.
    last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=save_dir)

    # Try and load the data in the checkpoint.
    saver.restore(session, save_path=last_chk_path)

    # If we get to this point, the checkpoint was successfully loaded.
    print("Restored checkpoint from:", last_chk_path)
except:
    # If the above failed for some reason, simply
    # initialise all the variables for the TensorFlow graph.
    print("Failed to restore checkpoint. Initializing variables instead.")
    session.run(tf.global_variables_initializer())

In [None]:
optimize(num_iterations=900)