In [20]:
%load_ext autoreload
%autoreload
import tensorflow as tf
import numpy as np
import surreal, config
import functools, operator, copy
import tensorflow.contrib.slim as slim
from nets.resnet_v2 import resnet_v2_101
from nets.mobilenet import mobilenet_v2
from tensor_info import INPUT_TENSOR_INFO, OUTPUT_TENSOR_INFO
tf.reset_default_graph()
'''
for x in surreal.load():
    print(x)
    break
'''

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'\nfor x in surreal.load():\n    print(x)\n    break\n'

In [21]:
types = tuple(t['type'] for t in INPUT_TENSOR_INFO)
input_tensors = tf.data.Dataset.from_generator(surreal.load, types) \
                               .batch(config.BATCH_SIZE) \
                               .prefetch(config.PREFETCH_SIZE) \
                               .make_one_shot_iterator() \
                               .get_next()
tensors = {}
for tensor, info in zip(input_tensors, INPUT_TENSOR_INFO):
    tensor.set_shape(info['shape'])
    tensors[info['name']] = tensor

In [22]:
config.STRIDE = 16

MD_H = int(config.TAR_H/config.STRIDE)
MD_W = int(config.TAR_W/config.STRIDE)

DEPTH = [ti['shape'][-1] for ti in OUTPUT_TENSOR_INFO]
RESULT_SHAPE = (config.BATCH_SIZE, MD_H, MD_W, sum(DEPTH))
RESULT_SIZE = functools.reduce(operator.mul, RESULT_SHAPE[1:])
OUTPUT_SHAPE = (config.BATCH_SIZE, config.TAR_H, config.TAR_W, sum(DEPTH))
OUTPUT_SIZE = functools.reduce(operator.mul, OUTPUT_SHAPE[1:])


In [23]:
def bilinear(indices):
    oy = tf.clip_by_value(indices[1], 0, MD_H-1)
    ox = tf.clip_by_value(indices[2], 0, MD_W-1)
    iy = [tf.floor(oy), tf.ceil(oy)]
    ix = [tf.floor(ox), tf.ceil(ox)]
    idx_p = []
    for y_i in range(2):
        for x_i in range(2):
            indices[1] = iy[y_i]
            indices[2] = ix[x_i]
            idx = tf.cast(tf.stack(indices, axis=-1), tf.int32)
            p = (1 - tf.abs(iy[y_i] - oy)) * (1 - tf.abs(ix[x_i] - ox))
            idx_p.append((idx, p))
    return idx_p

def gather_bilinear(params, indices):
    idx_p = bilinear(indices)
    res = []
    for idx, p in idx_p:
        r = tf.gather_nd(params, idx)
        res.append(r * p)
    return tf.add_n(res)

def scatter_bilinear(params, indices, shape):
    idx_p = bilinear(indices)
    res = []
    for idx, p in idx_p:
        r = tf.scatter_nd(idx, params, shape)
        if len(r.shape) > len(p.shape):
            p = tf.expand_dims(p, axis=-1)
        res.append(r * p)
    return tf.add_n(res)

In [24]:
class TestCell(tf.contrib.rnn.RNNCell):
    def __init__(self, is_training):
        super().__init__(self)
        self.is_training = is_training
        
    def resize(self, tensor):
        return tf.image.resize_images(
            tensor,
            (config.TAR_H, config.TAR_W),
            method=tf.image.ResizeMethod.BICUBIC,
            align_corners=True)
    
    def __call__(self, frame_tensor, state):
        model_output, _ = resnet_v2_101(frame_tensor, output_stride=config.STRIDE)
        # state = tf.reshape(state, RESULT_SHAPE)
        
        # model_output = tf.concat([model_output, state], axis=-1)
        # _, so_x_prev, so_y_prev, mo_x_prev, mo_y_prev = tf.split(state, DEPTH, axis=-1)
        
        hm_pred = slim.conv2d(model_output, config.NUM_KP, [1, 1])
        so_x_pred = slim.conv2d(model_output, config.NUM_KP, [1, 1]) # + so_x_prev
        so_y_pred = slim.conv2d(model_output, config.NUM_KP, [1, 1]) # + so_y_prev
        mo_x_pred = slim.conv2d(model_output, config.NUM_EDGE, [1, 1]) # + mo_x_prev
        mo_y_pred = slim.conv2d(model_output, config.NUM_EDGE, [1, 1]) # + mo_y_prev
        
        '''
        mv_x_pred = tf.squeeze(slim.conv2d(model_output, 1, [1, 1]), axis=[-1])
        mv_y_pred = tf.squeeze(slim.conv2d(model_output, 1, [1, 1]), axis=[-1])
        
        b, y, x = np.mgrid[:config.BATCH_SIZE, :MD_H, :MD_W]
        mv_p = [b, y + mv_y_pred, x + mv_x_pred]
        
        hm_expect = scatter_bilinear(hm_pred, mv_p, hm_pred.shape)
        so_x_expect = scatter_bilinear(so_x_pred, mv_p, so_x_pred.shape)
        so_y_expect = scatter_bilinear(so_y_pred, mv_p, so_y_pred.shape)
        
        b, y, x, i = np.mgrid[:config.BATCH_SIZE, :MD_H, :MD_W, :config.NUM_EDGE]
        for _ in range(config.NUM_RECURRENT):
            mo_p = [b, y+mo_y_pred, x+mo_x_pred, i]
            mo_x_pred = gather_bilinear(so_x_pred, mo_p) + mo_x_pred
            mo_y_pred = gather_bilinear(so_y_pred, mo_p) + mo_y_pred
        
        mo_p = [b, y+mo_y_pred, x+mo_x_pred]
        mo_x_expect_in_cp = gather_bilinear(mv_x_pred, mo_p) + \
                            mo_x_pred - tf.expand_dims(mv_x_pred, axis=-1)
        mo_y_expect_in_cp = gather_bilinear(mv_y_pred, mo_p) + \
                            mo_y_pred - tf.expand_dims(mv_y_pred, axis=-1)
        mo_x_expect = scatter_bilinear(mo_x_expect_in_cp, mv_p, mo_x_pred.shape)
        mo_y_expect = scatter_bilinear(mo_y_expect_in_cp, mv_p, mo_y_pred.shape)
        
        next_state = tf.concat([hm_expect, so_x_expect, so_y_expect, mo_x_expect, mo_y_expect], axis=-1)
        next_state = tf.reshape(next_state, [config.BATCH_SIZE, RESULT_SIZE])
        '''
        output = tf.concat([hm_pred, \
                            so_x_pred*config.STRIDE, so_y_pred*config.STRIDE, \
                            mo_x_pred*config.STRIDE, mo_y_pred*config.STRIDE], axis=-1)
        output = self.resize(output)
        output = tf.reshape(output, [config.BATCH_SIZE, OUTPUT_SIZE])
        
        return output, state #next_state

    @property
    def state_size(self):
        return RESULT_SIZE

    @property
    def output_size(self):
        return OUTPUT_SIZE


In [25]:
test_cell = TestCell(is_training=True)
#with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=True)):
pred_sum, _ = tf.nn.dynamic_rnn(test_cell, tensors['image'], sequence_length=tensors['seq_len'], dtype=tf.float32)

In [26]:
TOTAL_SHAPE = (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, sum(DEPTH))
pred_sum = tf.reshape(pred_sum, TOTAL_SHAPE)
hm_out, so_x_out, so_y_out, mo_x_out, mo_y_out = tf.split(pred_sum, DEPTH, axis=-1)

In [27]:
hm_sig = tf.sigmoid(hm_out)
hm_loss = - tf.reduce_mean(tensors['hm'] * tf.log(hm_sig + 1e-9) + (1 - tensors['hm']) * tf.log(1 - hm_sig + 1e-9))
so_loss = tf.abs(tensors['so_x'] - so_x_out) / config.RADIUS + tf.abs(tensors['so_y'] - so_y_out) / config.RADIUS
mo_loss = tf.abs(tensors['mo_x'] - mo_x_out) / config.RADIUS + tf.abs(tensors['mo_y'] - mo_y_out) / config.RADIUS

disc_only = tf.cast(tensors['hm'], tf.float32)
disc_size = tf.reduce_sum(disc_only, axis=[2, 3]) + 1e-9
so_loss = tf.reduce_mean(tf.reduce_sum(so_loss * disc_only, axis=[2, 3]) / disc_size)

disc_only = tf.cast(tf.gather(tensors['hm'], config.EDGES[:, 0], axis=-1), tf.float32)
disc_size = tf.reduce_sum(disc_only, axis=[2, 3]) + 1e-9
mo_loss = tf.reduce_mean(tf.reduce_sum(mo_loss * disc_only, axis=[2, 3]) / disc_size)

In [28]:
total_loss = hm_loss * 4.0 + so_loss * 1.0 + mo_loss * 0.5
#total_loss = so_loss * 1.0 + mo_loss * 0.5

In [29]:
tf.summary.scalar('losses/hm_loss', hm_loss)
tf.summary.scalar('losses/so_loss', so_loss)
tf.summary.scalar('losses/mo_loss', mo_loss)
tf.summary.scalar('losses/total_loss', total_loss)

<tf.Tensor 'losses/total_loss:0' shape=() dtype=string>

In [30]:
tf.summary.scalar('val/hm_sum', tf.reduce_sum(hm_out))
tf.summary.scalar('val/hm_min', tf.reduce_min(hm_out))
tf.summary.scalar('val/hm_max', tf.reduce_max(hm_out))
tf.summary.scalar('val/so_min', tf.reduce_min(so_x_out))
tf.summary.scalar('val/so_max', tf.reduce_max(so_x_out))
tf.summary.scalar('val/so_sum', tf.reduce_sum(tf.abs(so_x_out)) + tf.reduce_sum(tf.abs(so_y_out)))
tf.summary.scalar('val/mo_sum', tf.reduce_sum(tf.abs(mo_x_out)) + tf.reduce_sum(tf.abs(mo_y_out)))
tf.summary.scalar('val/hm_true_sum', tf.reduce_sum(tensors['hm']))
tf.summary.scalar('val/so_true_sum', tf.reduce_sum(tf.abs(tensors['so_x'])) + tf.reduce_sum(tf.abs(tensors['so_y'])))
tf.summary.scalar('val/mo_true_sum', tf.reduce_sum(tf.abs(tensors['mo_x'])) + tf.reduce_sum(tf.abs(tensors['mo_y'])))
optimizer = tf.train.AdamOptimizer()
train_op = slim.learning.create_train_op(total_loss, optimizer)

checkpoint_path = 'mbnet/mobilenet_v2_1.0_224.ckpt'
variables = slim.get_model_variables()
restore_map = {}
for v in variables:
    if not v.name.startswith('rnn/MobilenetV2'):
        continue
    org_name = v.name[4:].split(':')[0]
    restore_map[org_name] = v
    print(org_name, ':', v.name)
init_assign_op, init_feed_dict = slim.assign_from_checkpoint(checkpoint_path, restore_map)

In [31]:
import time, os, shutil
#log_dir = 'logs/log_' + str(time.time())[-5:]
log_dir = 'logs/log_test2'
shutil.rmtree(log_dir)
os.mkdir(log_dir)

In [32]:
def InitAssignFn(sess):
    sess.run(init_assign_op, init_feed_dict)
tf.contrib.slim.learning.train(train_op,
                               '/home/ubuntu/personlab/'+log_dir,
                               init_fn=InitAssignFn,
                               log_every_n_steps=100,
                               save_summaries_secs=30,
                              )

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Starting Session.
INFO:tensorflow:Saving checkpoint to path /home/ubuntu/personlab/logs/log_test2/model.ckpt
INFO:tensorflow:Starting Queues.
INFO:tensorflow:Recording summary at step 0.
INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.InvalidArgumentError'>, The node 'train_op/CheckNumerics' has inputs from different frames. The input 'add_8' is in frame ''. The input 'rnn/while/resnet_v2_101/postnorm/AssignMovingAvg_1' is in frame 'rnn/while/while_context'.


InvalidArgumentError: The node 'train_op/CheckNumerics' has inputs from different frames. The input 'add_8' is in frame ''. The input 'rnn/while/resnet_v2_101/postnorm/AssignMovingAvg_1' is in frame 'rnn/while/while_context'.