In [3]:
import os
import numpy as np
from scipy import misc
import matplotlib.pyplot as plt
import tensorflow as tf
from datetime import datetime
import time
import math
import json

In [5]:
%matplotlib inline

In [49]:
with tf.Graph().as_default(): 
    images, labels, keypts = distorted_inputs(10)
    pred = inference_deep(images, 0.5, keypts)
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    kpys = sess.run(keypts)

In [19]:
def extract_train_test():
    train_map = {}
    for annotations in os.listdir('ImageSplits'):
        if annotations == 'actions.txt':
            continue
        if annotations.endswith('train.txt'):
            cls = '_'.join(annotations.split('_')[:-1])
            if cls not in train_map:
                train_map[cls] = set()
            with open('ImageSplits/' + annotations) as f:
                for line in f:
                    train_map[cls].add(line.strip())
            train_folder = 'train/' + cls
            test_folder = 'test/' + cls
            if not os.path.exists(train_folder):
                os.makedirs(train_folder)
            if not os.path.exists(test_folder):
                os.makedirs(test_folder)
    for img_file in os.listdir('JPEGImages'):
        cls = '_'.join(img_file.split('_')[:-1])
        if img_file in train_map[cls]:
            os.rename('JPEGImages/' + img_file, 'train/' + cls + '/' + img_file)
        else:
            os.rename('JPEGImages/' + img_file, 'test/' + cls + '/' + img_file)

In [18]:
def rescale_keypts(train):
    if train:
        path = 'train/'
        keypts_path = 'keypts_train/'
    else:
        path = 'test/'
        keypts_path = 'keypts_test/'
    for folder in os.listdir('keypts'):
        if folder == '.DS_Store':
            continue
        for keypts_file in os.listdir(keypts_path + folder):
            if not keypts_file.endswith('.json'):
                continue
            with open(keypts_path + folder + '/' + keypts_file) as f:
                keypts_js = json.load(f)
            if len(keypts_js['people']) != 1:
                continue
            img_file = path + folder + '/' + '_'.join(keypts_file.split('_')[:-1]) + '.jpg'
            base = keypts_file.rstrip('.json')
            orig_shape = plt.imread(img_file).shape
            ratio = (227.0 / orig_shape[1], 227.0 / orig_shape[0])
            if not os.path.exists(path + folder + '/scaled_keypts'):
                os.mkdir(path + folder + '/scaled_keypts')
            with open(path + folder + '/scaled_keypts/' + base + '_scaled.json', 'wb+') as f:
                people = keypts_js['people'][0]
                keypts = people['pose_keypoints']
                x = keypts[::3]
                y = keypts[1::3]
                c = keypts[2::3]
                
                xs = list(np.array(x) * ratio[0])
                ys = list(np.array(y) * ratio[1])
                
                json.dump(zip(xs, ys), f)
        print '%s done'%folder

In [25]:
def get_keypts(folder, img_list, train):
    results = []
    i = 0
    j = 0
    if train:
        keypts_files = os.listdir('train/' + folder + '/scaled_keypts')
    else:
        keypts_files = os.listdir('test/' + folder + '/scaled_keypts')
    while i < len(img_list) and j < len(keypts_files):
        img_num = img_list[i].rstrip('.jpg').split('_')[-1]
        result = []
        if img_num in keypts_files[j]:
            if train:
                with open('train/' + folder + '/scaled_keypts/' + keypts_files[j]) as js_f:
                    js = json.load(js_f)
            else:
                with open('test/' + folder + '/scaled_keypts/' + keypts_files[j]) as js_f:
                    js = json.load(js_f)
            result = [val for pair in js for val in pair]
            i += 1
            j += 1
        else:
            result = [0.0] * (18*2)
            i += 1
        results.append(result)
    return results

In [5]:
def distorted_inputs(batch_size):
    data_list = []
    keypts_list = []
    i = 0
    for folder in os.listdir('train'):
        if folder == '.DS_Store':
            continue
        img_list = ['train/' + folder + '/' + img 
                    for img in os.listdir('train/' + folder) if img.endswith('.jpg')]
        cls_list = zip(img_list, [i] * len(img_list))
        data_list.extend(cls_list)
        keypts_list.extend(get_keypts(folder, img_list, train=True))
        i += 1
    
    img_data = tf.constant(np.array(data_list))
    keypts_data = tf.constant(np.array(keypts_list), dtype=tf.float32)
    
    data_queue = tf.train.input_producer(img_data)
    keypts_queue = tf.train.input_producer(keypts_data)
    
    val = data_queue.dequeue()
    keypt = keypts_queue.dequeue()
    raw = tf.read_file(val[0])
    img = tf.image.decode_jpeg(raw)
    label = tf.string_to_number(val[1], tf.int32)
    resized_img = tf.image.resize_images(img, tf.constant([227, 227]))
    fliped_img = tf.image.random_flip_left_right(resized_img)
    distorted_img = tf.image.random_brightness(fliped_img, max_delta=0.5)
    distorted_img = tf.image.random_contrast(distorted_img, lower=0.2, upper=1.8)
    float_img = tf.image.per_image_standardization(distorted_img)
    float_img.set_shape([227, 227, 3])
    images, labels, keypts = tf.train.shuffle_batch([float_img, label, keypt],
                                   batch_size=batch_size,
                                   capacity=100 + 3 * batch_size,
                                   min_after_dequeue=100)
    return images, labels, keypts

In [27]:
def inputs(batch_size):
    data_list = []
    keypts_list = []
    i = 0
    for folder in os.listdir('test'):
        if folder == '.DS_Store':
            continue
        img_list = ['test/' + folder + '/' + img 
                    for img in os.listdir('test/' + folder) if img.endswith('.jpg')]
        cls_list = zip(img_list, [i] * len(img_list))
        data_list.extend(cls_list)
        keypts_list.extend(get_keypts(folder, img_list, train=False))
        i += 1
        
    data = tf.constant(np.array(data_list))
    keypts_data = tf.constant(np.array(keypts_list), dtype=tf.float32)
    
    data_queue = tf.train.input_producer(data)
    keypts_queue = tf.train.input_producer(keypts_data)
    
    val = data_queue.dequeue()
    keypt = keypts_queue.dequeue()
    raw = tf.read_file(val[0])
    img = tf.image.decode_jpeg(raw)
    label = tf.string_to_number(val[1], tf.int32)
    
    resized_img = tf.image.resize_images(img, tf.constant([227, 227]))
    float_img = tf.image.per_image_standardization(resized_img)
    float_img.set_shape([227, 227, 3])
    images, labels, keypts  = tf.train.batch([float_img, label, keypt],
                                    batch_size=batch_size,
                                    capacity=100 + 3 * batch_size)
    return images, labels, keypts

In [12]:
def add_summary(x):
    tf.summary.scalar(x.op.name + '/min', tf.reduce_min(x))
    tf.summary.scalar(x.op.name + '/max', tf.reduce_max(x))
    tf.summary.scalar(x.op.name + '/mean', tf.reduce_mean(x))

In [7]:
def inference(images, keep_prob):
     # 1st layer
    with tf.variable_scope('conv1') as scope:
        kernel = tf.get_variable('weights', [11, 11, 3, 64], tf.float32, 
                                 tf.truncated_normal_initializer(stddev=1e-1))
        kernel = tf.verify_tensor_all_finite(kernel, 'kernel1 error')
        conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding='SAME')
        biases = tf.get_variable('biases', [64], tf.float32, tf.constant_initializer(0.0))
        conv1 = tf.nn.relu(conv + biases, name=scope.name)
        conv1 = tf.verify_tensor_all_finite(conv1, 'conv1 error: ')
        
    lrn1 = tf.nn.local_response_normalization(conv1, alpha=1e-4, beta=0.75,
                                              depth_radius=2, bias=2.0, name='lrn1')
    pool1 = tf.nn.max_pool(lrn1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool1')
    
    # 2nd layer
    with tf.variable_scope('conv2') as scope:
        kernel = tf.get_variable('weights', [5, 5, 64, 192], tf.float32, 
                                 tf.truncated_normal_initializer(stddev=1e-1))
        kernel = tf.verify_tensor_all_finite(kernel, 'kernel2 error')
        conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.get_variable('biases', [192], tf.float32, tf.constant_initializer(0.0))
        conv2 = tf.nn.relu(conv + biases, name=scope.name)
        conv2 = tf.verify_tensor_all_finite(conv2, 'conv2 error: ')
    
    lrn2 = tf.nn.local_response_normalization(conv2, alpha=1e-4, beta=0.75, depth_radius=2, bias=2.0, name='lrn2')
    pool2 = tf.nn.max_pool(lrn2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool2')
        
    
    with tf.variable_scope('fc1') as scope:
        W_fc1 = tf.get_variable('weights', [13 * 13 * 192, 1024], tf.float32, 
                                tf.truncated_normal_initializer(stddev=1e-1))
        b_fc1 = tf.get_variable('biases', [1024], tf.float32, tf.constant_initializer(0.0))
        conv2_flat = tf.reshape(pool2, [-1, 13 * 13 * 192])
        fc1 = tf.nn.relu(tf.matmul(conv2_flat, W_fc1) + b_fc1, name=scope.name)
        fc1 = tf.verify_tensor_all_finite(fc1, 'fc1 error: ')
    
    # drop out
    fc1_drop = tf.nn.dropout(fc1, keep_prob)
    
    # read out layer
    with tf.variable_scope('fc2') as scope:
        W_fc2 = tf.get_variable('weights', [1024, 40], tf.float32, tf.truncated_normal_initializer(stddev=1e-1))
        b_fc2 = tf.get_variable('biases', [40], tf.float32, tf.constant_initializer(0.0))
        y_conv = tf.add(tf.matmul(fc1_drop, W_fc2), b_fc2, name=scope.name)
        y_conv = tf.verify_tensor_all_finite(y_conv, 'y_conv error: ')
    
    return y_conv

In [8]:
def conv_relu(name, inputs, k_size, stride, padding, groups):
    # k_size needs to be [h, w, num_in, num_out]
    input_channels = int(inputs.get_shape()[-1])
    assert input_channels % groups == 0
    assert k_size[3] % groups == 0
    with tf.variable_scope(name) as scope:
        k_size[2] /= groups
        kernel = tf.get_variable('weights', k_size, tf.float32, 
                                 tf.truncated_normal_initializer(stddev=math.sqrt(2.0/(np.prod(k_size[:3])))),
                                 tf.nn.l2_loss)
        biases = tf.get_variable('biases', [k_size[3]], tf.float32, tf.constant_initializer(0.0))
        if groups == 1:
            conv = tf.nn.conv2d(inputs, kernel, stride, padding=padding)
        else:
            input_groups = tf.split(inputs, groups, 3)
            kernel_groups = tf.split(kernel, groups, 3)
            output_groups = [tf.nn.conv2d(i, k, stride, padding=padding) for i, k in zip(input_groups, kernel_groups)]
            conv = tf.concat(output_groups, 3)
            
        conv = tf.verify_tensor_all_finite(conv, name + ' infinite error!!!')
        return tf.nn.relu(conv + biases, name=scope.name)
        

In [9]:
def fc(name, inputs, output_size, relu=True):
    # inputs should be flattened to a 2D tensor
    input_size = int(inputs.get_shape()[1])
    with tf.variable_scope(name) as scope:
        W = tf.get_variable('weights', [input_size, output_size], tf.float32, 
                            tf.truncated_normal_initializer(stddev=math.sqrt(2.0/input_size)),
                            tf.nn.l2_loss)
        b = tf.get_variable('biases', [output_size], tf.float32, tf.constant_initializer(0.0))
        out = tf.verify_tensor_all_finite(tf.matmul(inputs, W) + b, name + ' inifite error!!!')
        if relu:
            return tf.nn.relu(out, name=scope.name)
        else:
            return out

In [10]:
def inference_deep(images, keep_prob, keypts=None):
    # 1st layer
    conv1 = conv_relu('conv1', images, [11, 11, 3, 96], [1, 4, 4, 1], 'VALID', 1)
    lrn1 = tf.nn.local_response_normalization(conv1, alpha=2e-5, beta=0.75,
                                              depth_radius=2, bias=1.0, name='lrn1')
    pool1 = tf.nn.max_pool(lrn1, ksize=[1, 3, 3, 1], 
                           strides=[1, 2, 2, 1], padding='VALID', name='pool1')
    
    # 2nd layer
    conv2 = conv_relu('conv2', pool1, [5, 5, 96, 256], [1, 1, 1, 1], 'SAME', 2)
    lrn2 = tf.nn.local_response_normalization(conv2, alpha=2e-5, beta=0.75, 
                                              depth_radius=2, bias=1.0, name='lrn2')
    pool2 = tf.nn.max_pool(lrn2, ksize=[1, 3, 3, 1], 
                           strides=[1, 2, 2, 1], padding='VALID', name='pool2')
    
    # 3rd layer
    conv3 = conv_relu('conv3', pool2, [3, 3, 256, 384], [1, 1, 1, 1], 'SAME', 1)
        
    # 4th layer
    conv4 = conv_relu('conv4', conv3, [3, 3, 384, 384], [1, 1, 1, 1], 'SAME', 2)
        
    # 5th layer
    conv5 = conv_relu('conv5', conv4, [3, 3, 384, 256], [1, 1, 1, 1], 'SAME', 2)
    pool5 = tf.nn.max_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID', name='pool5')
    
    # 6th layer
    pool5_flat = tf.reshape(pool5, [-1, 6 * 6 * 256])
    if keypts is not None:
        pool5_flat = tf.concat([pool5_flat, keypts], axis=1)
    
    fc6 = fc('fc6', pool5_flat, 4096)
    
    # drop out
    fc6_drop = tf.nn.dropout(fc6, keep_prob)
    
    # 7th layer
    fc7 = fc('fc7', fc6_drop, 4096)
    
    # drop out
    fc7_drop = tf.nn.dropout(fc7, keep_prob)
    
    # readout layer
    fc8 = fc('fc8', fc7_drop, 40, relu=False)
    
    return fc8

In [70]:
def load_pretrained_weights(skip_layers, set_untrainable=True, warm_start=False):
    weights = np.load('bvlc_alexnet.npy').item()
    ops = []
    trainables = tf.get_collection_ref(tf.GraphKeys.TRAINABLE_VARIABLES)
    regularizations = tf.get_collection_ref(tf.GraphKeys.REGULARIZATION_LOSSES)
    for layer in weights:
        if layer not in skip_layers or warm_start:
            with tf.variable_scope(layer, reuse=True) as scope:
                kernel = tf.get_variable('weights')
                ops.append(tf.assign(kernel, weights[layer][0]))
                biases = tf.get_variable('biases')
                ops.append(tf.assign(biases, weights[layer][1]))
                if set_untrainable:
                    trainables.remove(kernel)
                    trainables.remove(biases)
                    regularizations.remove(tf.losses.get_regularization_losses(scope.name)[0])
    return tf.group(*ops)

In [12]:
def loss(logits, labels, wd):
    cross_entropy = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=labels, logits=logits))
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    return cross_entropy + wd * tf.add_n(reg_losses)

In [13]:
def train(loss, global_step):
    lr = tf.train.exponential_decay(0.01, global_step, 5000, 0.1, True)
    opt = tf.train.GradientDescentOptimizer(lr)
    train_op = opt.minimize(loss, global_step)
    variable_averages = tf.train.ExponentialMovingAverage(0.999, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())
    with tf.control_dependencies([train_op, variable_averages_op]):
        op = tf.no_op()
    return op

In [106]:
def main(train_from_scratch=False):
    with tf.Graph().as_default() as g:
        global_step = tf.contrib.framework.get_or_create_global_step()
        with tf.device('/cpu:0'):
            images, labels, keypts = distorted_inputs(128)

        pred = inference_deep(images, 0.5)
        total_loss = loss(pred, labels, 0.000)
        train_op = train(total_loss, global_step)
        
        skip_layers = ['fc7', 'fc8']
        load_op = load_pretrained_weights(skip_layers)

        class _LoggerHook(tf.train.SessionRunHook):
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                if self._step % 2 == 0:
                    return tf.train.SessionRunArgs([total_loss, pred])
                else:
                    return None

            def after_run(self, run_context, run_values):
                if self._step % 2 == 0 :
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time
                    print self._step
#                     loss_val = run_values.results
#                     format_str = '%s: step %d, loss = %.3f'
#                     print format_str % (datetime.now(), self._step, loss_val)
                if self._step == 0 or self._step == 1:
                    print tf.trainable_variables()
                    print run_values

        with tf.train.MonitoredTrainingSession(
            checkpoint_dir='./tmp/ckpt',
            hooks=[tf.train.StopAtStepHook(last_step=10000),
                  tf.train.NanTensorHook(total_loss),
                  _LoggerHook()]
        ) as sess:
            if not train_from_scratch:
                sess.run(load_op)
            while not sess.should_stop():
                sess.run(train_op)
                           

In [107]:
main()

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from ./tmp/ckpt/model.ckpt-1872
INFO:tensorflow:Saving checkpoints for 1872 into ./tmp/ckpt/model.ckpt.
0
[<tf.Variable 'fc7/weights:0' shape=(4096, 4096) dtype=float32_ref>, <tf.Variable 'fc7/biases:0' shape=(4096,) dtype=float32_ref>, <tf.Variable 'fc8/weights:0' shape=(4096, 40) dtype=float32_ref>, <tf.Variable 'fc8/biases:0' shape=(40,) dtype=float32_ref>]
SessionRunValues(results=[3.2488077, array([[ 6.47529507,  3.69093847,  0.06187304, ...,  3.8311131 ,
         2.70871902,  2.37528276],
       [ 4.05583143,  0.64246082, -2.12692833, ...,  4.54955339,
         3.39035416,  4.54177475],
       [ 1.97912729,  1.39414263,  2.35173225, ...,  2.63558483,
        -0.29836708, -0.60502499],
       ..., 
       [ 2.61232233,  0.77746058,  0.5801785 , ...,  4.08888531,
         1.03738391, -0.12266219],
       [ 3.08311725,  0.60599864,  1.21579325, ...,  1.55250072,
         0.41512385,  1.49125528],
      

KeyboardInterrupt: 

In [98]:
def evaluate(train=False, batch_size=1000):
    with tf.Graph().as_default() as g:
        if train:
            images, labels, _ = distorted_inputs(batch_size)
        else:
            images, labels, _ = inputs(batch_size)
        pred = inference_deep(images, 0.5)
        cost = loss(pred, labels, 0.0000)
        top_k_op = tf.nn.in_top_k(pred, labels, 1)
        variable_averages = tf.train.ExponentialMovingAverage(0.999)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
#         summary_op = tf.summary.merge_all()
#         summary_write = tf.summary.FileWriter('./tmp/eval', g)
        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state('./tmp/ckpt')
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print 'No checkpoint file found'
                return
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess, coord)
            try:
#                 true_count = np.sum(sess.run(top_k_op))
#                 precision = float(true_count) / batch_size
#                 print 'precision: %.3f' % precision
                print sess.run(cost)
            except Exception as e:
                print e
            coord.request_stop()
            coord.join(threads, stop_grace_period_secs=10)
                

In [105]:
evaluate(train=True, batch_size=128)

INFO:tensorflow:Restoring parameters from ./tmp/ckpt/model.ckpt-1872
1.47241
