# An example of pipeline for CPM (Convolutional Pose Machine)

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

NUM_STAGES = 3
NUM_TRAINING_STEPS = 100
NUM_KEYPOINTS = 4
BATCH_SIZE = 32

x = tf.placeholder(tf.float32, shape=(None, 128, 128, 3))
y = tf.placeholder(tf.float32, shape=(None, 16, 16, NUM_KEYPOINTS+1))

# Define CPM modules

In [2]:
def get_context_feature(x):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (9, 9), activation='relu', padding='same', input_shape=(128, 128, 3)))
    model.add(layers.MaxPool2D((2,2)))
    model.add(layers.Conv2D(32, (9, 9), activation='relu', padding='same'))
    model.add(layers.MaxPool2D((2,2)))
    model.add(layers.Conv2D(32, (9, 9), activation='relu', padding='same'))
    model.add(layers.MaxPool2D((2,2)))
    model.add(layers.Conv2D(32, (5, 5), activation='relu', padding='same'))
    #print(model.summary())

    context_feature = model(x)
    return context_feature


def get_first_stage_heatmap(context_feature):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (9, 9), activation='relu', padding='same'))
    model.add(layers.Conv2D(32, (1, 1), activation='relu', padding='same'))
    model.add(layers.Conv2D(NUM_KEYPOINTS+1, (1, 1), activation='relu', padding='same'))
    #print(model.summary())
    
    first_stage_heatmap = model(context_feature)
    return first_stage_heatmap


def get_refined_heatmap(context_feature, previous_heatmap):    
    model = models.Sequential()
    model.add(layers.Conv2D(32+NUM_KEYPOINTS+1, (11, 11), activation='relu', padding='same'))
    model.add(layers.Conv2D(32, (11, 11), activation='relu', padding='same'))
    model.add(layers.Conv2D(32, (11, 11), activation='relu', padding='same'))
    model.add(layers.Conv2D(32, (1, 1), activation='relu', padding='same'))
    model.add(layers.Conv2D(NUM_KEYPOINTS+1, (1, 1), activation='relu', padding='same'))
    #print(model.summary())

    previous_outputs = tf.concat([context_feature, previous_heatmap], 3)
    refined_heatmap = model(previous_outputs)
    return refined_heatmap

# Extract context feature map, and initial/refined heatmaps

In [3]:
context_feature = get_context_feature(x)

heatmaps = []
current_heatmap = get_first_stage_heatmap(context_feature)
heatmaps.append(current_heatmap)
for i in range(NUM_STAGES-1):
    previous_heatmap = heatmaps[-1]
    current_heatmap = get_refined_heatmap(context_feature, previous_heatmap)
    heatmaps.append(current_heatmap)

# Calculate loss

In [4]:
loss = 0.
for heatmap in heatmaps:
    loss += tf.reduce_sum(tf.nn.l2_loss(heatmap-y))

# Initialize optimizer and session

In [5]:
learning_rate = tf.Variable(0.0001)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train = optimizer.minimize(loss)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

# Define fake data generator

In [6]:
def preprocess_label(y, size):
    y = tf.image.resize_images(y, (size, size))
    return y

def get_fake_data(batch_size):
    images = np.random.rand(batch_size, 128, 128, 3)
    ground_truth_heatmaps = np.random.rand(batch_size, 16, 16, NUM_KEYPOINTS+1)
    return (images, ground_truth_heatmaps)

# Run training

In [7]:
for step in range(NUM_TRAINING_STEPS):
    images, labels = get_fake_data(BATCH_SIZE)
    feed_dict = {x:images, y:labels}
    _, loss_ = sess.run([train, loss], feed_dict=feed_dict)
    if step % 20 == 0:
        print('{:4} {:.2f}'.format(step, loss_))

   0 20060.34
  20 20626.52
  40 20491.06
  60 20367.55
  80 20486.84
