In [48]:
%load_ext autoreload
%autoreload
import tensorflow as tf
import numpy as np
import surreal, config
import functools, operator, copy
import tensorflow.contrib.slim as slim
from nets.resnet_v2 import resnet_v2_101
from nets.mobilenet import mobilenet_v2

tf.reset_default_graph()
'''
for x in surreal.load():
    print(x)
    break
'''

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'\nfor x in surreal.load():\n    print(x)\n    break\n'

In [49]:
TENSOR_INFO = [
    {
        'name': 'image',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, 3),
        'type': tf.float32,
    },{
        'name': 'hm',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, config.NUM_KP),
        'type': tf.float32,
    },{
        'name': 'so_x',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, config.NUM_KP),
        'type': tf.float32,
    },{
        'name': 'so_y',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, config.NUM_KP),
        'type': tf.float32,
    },{
        'name': 'mo_x',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, config.NUM_EDGE),
        'type': tf.float32,
    },{
        'name': 'mo_y',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, config.NUM_EDGE),
        'type': tf.float32,
    },{
        'name': 'seq_len',
        'shape': (config.BATCH_SIZE,),
        'type': tf.int32,
    }
]

types = tuple(t['type'] for t in TENSOR_INFO)
input_tensors = tf.data.Dataset.from_generator(surreal.load, types) \
                               .batch(config.BATCH_SIZE) \
                               .prefetch(config.PREFETCH_SIZE) \
                               .make_one_shot_iterator() \
                               .get_next()
tensors = {}
for tensor, info in zip(input_tensors, TENSOR_INFO):
    tensor.set_shape(info['shape'])
    tensors[info['name']] = tensor

In [50]:
config.STRIDE = 16

TENSOR_INFO = [
    {
        'name': 'hm',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, config.NUM_KP),
        'type': tf.int32,
    },{
        'name': 'so_x',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, config.NUM_KP),
        'type': tf.float32,
    },{
        'name': 'so_y',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, config.NUM_KP),
        'type': tf.float32,
    },{
        'name': 'mo_x',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, config.NUM_EDGE),
        'type': tf.float32,
    },{
        'name': 'mo_y',
        'shape': (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, config.NUM_EDGE),
        'type': tf.float32,
    }
]

MD_H = int(config.TAR_H/config.STRIDE)
MD_W = int(config.TAR_W/config.STRIDE)

DEPTH = [ti['shape'][-1] for ti in TENSOR_INFO]
RESULT_SHAPE = (config.BATCH_SIZE, MD_H, MD_W, sum(DEPTH))
RESULT_SIZE = functools.reduce(operator.mul, RESULT_SHAPE[1:])
OUTPUT_SHAPE = (config.BATCH_SIZE, config.TAR_H, config.TAR_W, sum(DEPTH))
OUTPUT_SIZE = functools.reduce(operator.mul, OUTPUT_SHAPE[1:])


class TestCell(tf.contrib.rnn.RNNCell):
    def __init__(self, is_training):
        super().__init__(self)
        self.is_training = is_training
        
    def resize(self, tensor):
        return tf.image.resize_images(
            tensor,
            (config.TAR_H, config.TAR_W),
            method=tf.image.ResizeMethod.BICUBIC,
            align_corners=True)
    
    def gather_bilinear(self, params, index):
        bp = [tf.ceil(index), tf.floor(index)]
        ld = index.shape[-1]
        print(params, index)
        res = 0
        for x_i in range(2):
            for y_i in range(2):
                ti = [index[..., k] for k in range(ld)]
                ti[1] = bp[x_i][..., 1]
                ti[2] = bp[y_i][..., 2]
                ti = tf.stack(ti, axis=-1)
                dp = ti - index
                d = (1 - tf.abs(dp[...,1])) * (1 - tf.abs(dp[...,2]))
                r = tf.gather_nd(params, tf.cast(ti, tf.int32))
                res += r * d
        print(res)
        return res
        
    def __call__(self, frame_tensor, state):
        mbnet2_output, _ = mobilenet_v2.mobilenet_base(frame_tensor, output_stride=config.STRIDE)
        
        # parse expectation from previous frame
        state = tf.reshape(state, RESULT_SHAPE)
        mbnet2_output = tf.concat([mbnet2_output, state], axis=-1)
        _, so_x_prev, so_y_prev, mo_x_prev, mo_y_prev = tf.split(state, DEPTH, axis=-1)
        
        # prediction of current frame
        hm_pred = slim.conv2d(mbnet2_output, config.NUM_KP, [1, 1])
        
        so_x_pred = slim.conv2d(mbnet2_output, config.NUM_KP, [1, 1])
        so_x_pred = tf.clip_by_value(so_x_pred + so_x_prev, -MD_W, MD_W)
        
        so_y_pred = slim.conv2d(mbnet2_output, config.NUM_KP, [1, 1])
        so_y_pred = tf.clip_by_value(so_y_pred + so_y_prev, -MD_H, MD_H)
        
        mo_x_pred = slim.conv2d(mbnet2_output, config.NUM_EDGE, [1, 1])
        mo_x_pred = tf.clip_by_value(mo_x_pred + mo_x_prev, -MD_W, MD_W)
        
        mo_y_pred = slim.conv2d(mbnet2_output, config.NUM_EDGE, [1, 1])
        mo_y_pred = tf.clip_by_value(mo_y_pred + mo_y_prev, -MD_H, MD_H)
        
        # expect point in next frame
        mv_x_pred = slim.conv2d(mbnet2_output, 1, [1, 1])
        mv_y_pred = slim.conv2d(mbnet2_output, 1, [1, 1])
        
        # construct expectation data
        
        cur_x = np.tile(np.arange(MD_W), [config.BATCH_SIZE, MD_H, 1, 1]).transpose([0, 1, 3, 2])
        cur_y = np.tile(np.arange(MD_H), [config.BATCH_SIZE, MD_W, 1, 1]).transpose([0, 3, 1, 2])
        mvp_b = np.tile(np.arange(config.BATCH_SIZE), [MD_H, MD_W, 1, 1]).transpose([3, 0, 1, 2])
        mvp_x = tf.cast(tf.clip_by_value(tf.round(cur_x + mv_x_pred), 0, MD_W-1), 'int32')
        mvp_y = tf.cast(tf.clip_by_value(tf.round(cur_y + mv_y_pred), 0, MD_H-1), 'int32')
        print(mvp_b.shape, mvp_x.shape, mvp_y.shape)
        mvp = tf.stack([mvp_b, mvp_x, mvp_y], axis=-1)
        print(mvp,'!!!!!!!')
        
        hm_expect = tf.scatter_nd(mvp, hm_pred, hm_pred.shape)
        so_x_expect = tf.scatter_nd(mvp, so_x_pred, so_x_pred.shape) ## 이거도 bilinear scatter 구현
        so_y_expect = tf.scatter_nd(mvp, so_y_pred, so_y_pred.shape)
        
        mo_end_b = np.tile(np.arange(config.BATCH_SIZE), [MD_H, MD_W, config.NUM_EDGE, 1]).transpose([3, 0, 1, 2])
        
        mo_end_x = tf.clip_by_value(cur_x + mo_x_pred, 0, MD_W-1)
        mo_end_y = tf.clip_by_value(cur_y + mo_y_pred, 0, MD_H-1)
        mo_end = tf.stack([mo_end_b, mo_end_x, mo_end_y], axis=-1)
        
        mo_x_expect_cp = tf.squeeze(self.gather_bilinear(mv_x_pred, mo_end), axis=[-1]) + mo_x_pred - mv_x_pred
        mo_y_expect_cp = tf.squeeze(self.gather_bilinear(mv_y_pred, mo_end), axis=[-1]) + mo_y_pred - mv_y_pred
        mo_x_expect = tf.scatter_nd(mvp, mo_x_expect_cp, mo_x_pred.shape)
        mo_y_expect = tf.scatter_nd(mvp, mo_y_expect_cp, mo_y_pred.shape)
        
        mvp_b = np.tile(np.arange(config.BATCH_SIZE), [MD_H, MD_W, config.NUM_EDGE, 1]).transpose([3, 0, 1, 2])
        mvp_i = np.tile(np.arange(config.NUM_EDGE), [MD_H, MD_W, config.BATCH_SIZE, 1]).transpose([2, 0, 1, 3])
        for _ in range(config.NUM_RECURRENT):
            mvp_x = tf.clip_by_value(cur_x + mo_x_pred, 0, MD_W-1)
            mvp_y = tf.clip_by_value(cur_y + mo_y_pred, 0, MD_H-1)
            mvp = tf.stack([mvp_b, mvp_x, mvp_y, mvp_i], axis=-1)
            mo_x_pred = self.gather_bilinear(so_x_pred, mvp) + mo_x_pred
            mo_y_pred = self.gather_bilinear(so_y_pred, mvp) + mo_y_pred
        
        next_state = tf.concat([hm_expect, so_x_expect, so_y_expect, mo_x_expect, mo_y_expect], axis=-1)
        next_state = tf.reshape(next_state, [config.BATCH_SIZE, RESULT_SIZE])
        output = tf.concat([hm_pred, so_x_pred, so_y_pred, mo_x_pred, mo_y_pred], axis=-1)
        output = self.resize(output)
        output = tf.reshape(output, [config.BATCH_SIZE, OUTPUT_SIZE])
        
        return output, next_state

    @property
    def state_size(self):
        return RESULT_SIZE

    @property
    def output_size(self):
        return OUTPUT_SIZE


test_cell = TestCell(is_training=True)
#with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=True)):
pred_sum, _ = tf.nn.dynamic_rnn(test_cell, tensors['image'], sequence_length=tensors['seq_len'], dtype=tf.float32)

(1, 25, 25, 1) (1, 25, 25, 1) (1, 25, 25, 1)
Tensor("rnn/while/Squeeze:0", shape=(1, 25, 25, 3), dtype=int32) !!!!!!!
Tensor("rnn/while/Conv_5/Relu:0", shape=(1, 25, 25, 1), dtype=float32) Tensor("rnn/while/stack_3:0", shape=(1, 25, 25, 15, 3), dtype=float32)
Tensor("rnn/while/add_12:0", shape=(1, 25, 25, 15, 1), dtype=float32)
Tensor("rnn/while/Conv_6/Relu:0", shape=(1, 25, 25, 1), dtype=float32) Tensor("rnn/while/stack_3:0", shape=(1, 25, 25, 15, 3), dtype=float32)
Tensor("rnn/while/add_17:0", shape=(1, 25, 25, 15, 1), dtype=float32)
Tensor("rnn/while/clip_by_value:0", shape=(1, 25, 25, 16), dtype=float32) Tensor("rnn/while/stack_13:0", shape=(1, 25, 25, 15, 4), dtype=float32)


ValueError: Dimensions must be equal, but are 25 and 15 for 'rnn/while/mul_17' (op: 'Mul') with input shapes: [1,25,25,15], [1,25,25,15,1].

In [None]:
pred_sum

In [None]:
TOTAL_SHAPE = (config.BATCH_SIZE, config.MAX_FRAME_SIZE, config.TAR_H, config.TAR_W, sum(DEPTH))
pred_sum = tf.reshape(pred_sum, TOTAL_SHAPE)
hm_out, so_x_out, so_y_out, mo_x_out, mo_y_out = tf.split(pred_sum, DEPTH, axis=-1)

In [None]:
hm_sig = tf.sigmoid(hm_out)
hm_loss = - tf.reduce_mean(tensors['hm'] * tf.log(hm_sig + 1e-9) + (1 - tensors['hm']) * tf.log(1 - hm_sig + 1e-9))
so_loss = tf.abs(tensors['so_x'] - so_x_out) / config.RADIUS + tf.abs(tensors['so_y'] - so_y_out) / config.RADIUS
mo_loss = tf.abs(tensors['mo_x'] - mo_x_out) / config.RADIUS + tf.abs(tensors['mo_y'] - mo_y_out) / config.RADIUS

disc_only = tf.cast(tensors['hm'], tf.float32)
disc_size = tf.reduce_sum(disc_only, axis=[2, 3]) + 1e-9
so_loss = tf.reduce_mean(tf.reduce_sum(so_loss * disc_only, axis=[2, 3]) / disc_size)

disc_only = tf.cast(tf.gather(tensors['hm'], config.EDGES[:, 0], axis=-1), tf.float32)
disc_size = tf.reduce_sum(disc_only, axis=[2, 3]) + 1e-9
mo_loss = tf.reduce_mean(tf.reduce_sum(mo_loss * disc_only, axis=[2, 3]) / disc_size)

In [None]:
total_loss = hm_loss * 4.0 + so_loss * 1.0 + mo_loss * 0.5
#total_loss = so_loss * 1.0 + mo_loss * 0.5

In [None]:
tf.summary.scalar('losses/hm_loss', hm_loss)
tf.summary.scalar('losses/so_loss', so_loss)
tf.summary.scalar('losses/mo_loss', mo_loss)
tf.summary.scalar('losses/total_loss', total_loss)

In [None]:
tf.summary.scalar('val/hm_sum', tf.reduce_sum(hm_out))
tf.summary.scalar('val/hm_min', tf.reduce_min(hm_out))
tf.summary.scalar('val/hm_max', tf.reduce_max(hm_out))
tf.summary.scalar('val/so_min', tf.reduce_min(so_x_out))
tf.summary.scalar('val/so_max', tf.reduce_max(so_x_out))
tf.summary.scalar('val/so_sum', tf.reduce_sum(tf.abs(so_x_out)) + tf.reduce_sum(tf.abs(so_y_out)))
tf.summary.scalar('val/mo_sum', tf.reduce_sum(tf.abs(mo_x_out)) + tf.reduce_sum(tf.abs(mo_y_out)))
tf.summary.scalar('val/hm_true_sum', tf.reduce_sum(tensors['hm']))
tf.summary.scalar('val/so_true_sum', tf.reduce_sum(tf.abs(tensors['so_x'])) + tf.reduce_sum(tf.abs(tensors['so_y'])))
tf.summary.scalar('val/mo_true_sum', tf.reduce_sum(tf.abs(tensors['mo_x'])) + tf.reduce_sum(tf.abs(tensors['mo_y'])))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001)
train_op = slim.learning.create_train_op(total_loss, optimizer)

checkpoint_path = 'mbnet/mobilenet_v2_1.0_224.ckpt'
variables = slim.get_model_variables()
restore_map = {}
for v in variables:
    if not v.name.startswith('rnn/MobilenetV2'):
        continue
    org_name = v.name[4:].split(':')[0]
    restore_map[org_name] = v
    print(org_name, ':', v.name)
init_assign_op, init_feed_dict = slim.assign_from_checkpoint(checkpoint_path, restore_map)

In [193]:
import time, os, shutil
#log_dir = 'logs/log_' + str(time.time())[-5:]
log_dir = 'logs/log_test'
shutil.rmtree(log_dir)
os.mkdir(log_dir)

In [194]:
def InitAssignFn(sess):
    sess.run(init_assign_op, init_feed_dict)
tf.contrib.slim.learning.train(train_op,
                               '/home/ubuntu/personlab/'+log_dir,
                               init_fn=InitAssignFn,
                               log_every_n_steps=100,
                               save_summaries_secs=30,
                              )

INFO:tensorflow:Error reported to Coordinator: <class 'TypeError'>, Cannot interpret feed_dict key as Tensor: Tensor Tensor("placeholder/rnn/MobilenetV2/Conv/weights:0", shape=(3, 3, 3, 32), dtype=float32) is not an element of this graph.


TypeError: Cannot interpret feed_dict key as Tensor: Tensor Tensor("placeholder/rnn/MobilenetV2/Conv/weights:0", shape=(3, 3, 3, 32), dtype=float32) is not an element of this graph.