### Load training, validation and testing data

In [1]:
from data_helper import load_dataset

In [2]:
IMAGE_FOLDER_PATH = 'dataset/resized/frames/'
LABEL_FOLDER_PATH = 'dataset/labels/'

train_head_image_paths, train_hand_image_paths, train_labels, \
val_head_image_paths, val_hand_image_paths, val_labels, \
test_head_image_paths, test_hand_image_paths, test_labels = load_dataset(image_folder_path=IMAGE_FOLDER_PATH,
                                                                         label_folder_path=LABEL_FOLDER_PATH,
                                                                         label_type='obj',
                                                                         hand_types=['left', 'right'],
                                                                         with_head=True,
                                                                         validation_split_ratio=0.15)

# Only take hand image paths for baseline
train_image_paths =  train_hand_image_paths
val_image_paths = val_hand_image_paths
test_image_paths = test_hand_image_paths

----------------------------------------------------------------------------------------------------
[Train (Head)] number of image paths: 12744
[Train (Hand)] number of image paths: 12744
[Train (Label)] number of labels: 12744
----------------------------------------------------------------------------------------------------
[Validation (Head)] number of image paths: 2248
[Validation (Hand)] number of image paths: 2248
[Validation (Label)] number of labels: 2248
----------------------------------------------------------------------------------------------------
[Test (Head)] number of image paths: 12776
[Test (Hand)] number of image paths: 12776
[Test (Label)] number of labels: 12776


### Use Tensorflow to build computational graph

In [3]:
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets
import vgg_preprocessing

In [4]:
PRETRAINED_VGG_MODEL_PATH = 'model/vgg_16.ckpt'
MODEL_PATH = 'model/hand_obj_vgg_16/hand_obj_vgg_16_model'

num_classes = 24
batch_size = 32
num_workers = 20
max_epochs1 = 20
max_epochs2 = 20
max_patience = 3 # For early stopping
learning_rate1 = 1e-3
learning_rate2 = 1e-5
dropout_keep_prob = 0.5
weight_decay = 5e-4

In [5]:
%%time

def dataset_map_fn(image_path, label, is_training):
    # Load image
    image_string = tf.read_file(image_path)
    image_decoded = tf.image.decode_png(image_string, channels=3)
    image = tf.cast(image_decoded, tf.float32)
    # Preprocess image
    preprocessed_image = tf.cond(is_training,
                                 true_fn=lambda: vgg_preprocessing.preprocess_image(image, 224, 224, is_training=True),
                                 false_fn=lambda: vgg_preprocessing.preprocess_image(image, 224, 224, is_training=False))
    return preprocessed_image, label

graph = tf.Graph()
with graph.as_default():
    # ---------------------------------------------------------------------
    # Indicates whether we are in training or in test mode
    # Since VGG16 has applied `dropout`, we need to disable it when testing.
    is_training = tf.placeholder(dtype=tf.bool, name='is_training')
    
    # Training, validation, testing data to feed in.
    image_paths = tf.placeholder(dtype=tf.string, shape=(None,), name='image_paths')
    labels = tf.placeholder(dtype=tf.int32, shape=(None,), name='labels')
    
    # Use dataset API to automatically generate batch data by iterator.
    dataset = tf.contrib.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(lambda image_path, label: dataset_map_fn(image_path, label, is_training))
    dataset = dataset.shuffle(buffer_size=10000)
    batched_dataset = dataset.batch(batch_size)
    
    # Now we define an iterator that can operator on dataset.
    # The iterator can be reinitialized by calling:
    # sess.run(dataset_init_op, feed_dict={image_paths=train_image_paths, labels=train_labels}) 
    # for 1 epoch on the training set.
    
    # Once this is done, we don't need to feed any value for images and labels
    # as they are automatically pulled out from the iterator queues.

    # A reinitializable iterator is defined by its structure. We could use the
    # `output_types` and `output_shapes` properties of dataset.
    # The dataset will be fed with training, validation or testing data.
    iterator = tf.contrib.data.Iterator.from_structure(batched_dataset.output_types,
                                                       batched_dataset.output_shapes)
    
    # A batch of data to feed into the networks.
    batch_images, batch_labels = iterator.get_next()
    dataset_init_op = iterator.make_initializer(batched_dataset)
    
    # ---------------------------------------------------------------------
    # Now that we have set up the data, it's time to set up the model.
    # For this example, we'll use VGG-16 pretrained on ImageNet. We will remove the
    # last fully connected layer (fc8) and replace it with our own, with an
    # output size `num_classes`
    # We will first train the last layer for a few epochs.
    # Then we will train the entire model on our dataset for a few epochs.

    # Get the pretrained model, specifying the num_classes argument to create a new
    # fully connected replacing the last one, called "vgg_16/fc8"
    # Each model has a different architecture, so "vgg_16/fc8" will change in another model.
    # Here, logits gives us directly the predicted scores we wanted from the images.
    # We pass a scope to initialize "vgg_16/fc8" weights with he_initializer
    vgg = tf.contrib.slim.nets.vgg
    with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)):
        logits, _ = vgg.vgg_16(batch_images, num_classes=num_classes, is_training=is_training,
                               dropout_keep_prob=dropout_keep_prob)
    
    # Restore only the layers up to fc7 (included)
    # Calling function `init_fn(sess)` will load all the pretrained weights.
    variables_to_restore = tf.contrib.framework.get_variables_to_restore(exclude=['vgg_16/fc8'])
    init_fn = tf.contrib.framework.assign_from_checkpoint_fn(PRETRAINED_VGG_MODEL_PATH, variables_to_restore)

    # Initialization operation from scratch for the new "fc8" layers
    # `get_variables` will only return the variables whose name starts with the given pattern
    fc8_variables = tf.contrib.framework.get_variables('vgg_16/fc8')
    fc8_init = tf.variables_initializer(fc8_variables)
    
    # ---------------------------------------------------------------------
    # Using tf.losses, any loss is added to the tf.GraphKeys.LOSSES collection
    # We can then call the total loss easily
    tf.losses.sparse_softmax_cross_entropy(labels=batch_labels, logits=logits)
    loss = tf.losses.get_total_loss()
    
    # First we want to train only the reinitialized last layer fc8 for a few epochs.
    # We run minimize the loss only with respect to the fc8 variables (weight and bias).
    fc8_optimizer = tf.train.GradientDescentOptimizer(learning_rate1)
    fc8_train_op = fc8_optimizer.minimize(loss, var_list=fc8_variables)
    
    # Then we want to finetune the entire model for a few epochs.
    # We run minimize the loss only with respect to all the variables.
    full_optimizer = tf.train.GradientDescentOptimizer(learning_rate2)
    full_train_op = full_optimizer.minimize(loss)

    # Evaluation metrics
    prediction = tf.to_int32(tf.argmax(logits, 1))
    correct_prediction = tf.equal(prediction, batch_labels)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))    
    
    # 'Saver' op to save and restore all the variables
    saver = tf.train.Saver()

CPU times: user 1.86 s, sys: 12 ms, total: 1.87 s
Wall time: 1.87 s


### Start training

In [6]:
from tqdm import tqdm

In [7]:
def evaluate(sess, loss, correct_prediction, dataset_init_op, feed_dict):
    """
        Evaluation in training loop.
        Check the performance of the model on either train, val or test (depending on `dataset_init_op`)
        Note: The arguments are tensorflow operators defined in the graph.
    """
    
    # Initialize the correct dataset.
    sess.run(dataset_init_op, feed_dict=feed_dict)

    data_loss = 0
    num_correct = 0
    num_samples = 0
    
    # Evaluate on every batch.
    while True:
        try:
            # Disable `is_training` since we have `dropout` in VGG net.
            _loss, _correct_prediction = sess.run([loss, correct_prediction], feed_dict={is_training: False})

            data_loss += _loss
            num_correct += _correct_prediction.sum() # e.g: [True, False, True].sum() = 2
            num_samples += _correct_prediction.shape[0] # Batch size
            
        except tf.errors.OutOfRangeError:
            break

    data_loss = data_loss / num_samples
    acc = num_correct / num_samples

    return data_loss, acc

In [8]:
# --------------------------------------------------------------------------
# Now that we have built the graph and finalized it, we define the session.
# The session is the interface to *run* the computational graph.
# We can call our training operations with `sess.run(train_op)` for instance
sess = tf.Session(graph=graph)

In [9]:
# init_fn(sess) # load the pretrained weights
# sess.run(fc8_init)  # initialize the new fc8 layer

saver.restore(sess, MODEL_PATH)

INFO:tensorflow:Restoring parameters from model/hand_obj_vgg_16/hand_obj_vgg_16_model


### Train 'fc8' layer

In [None]:
max_acc = 0.0
patience = 0

# Update only the last layer for a few epochs.
for epoch in tqdm(range(max_epochs1)):
    # Run an epoch over the training data.
    print('-'*110)
    print('Starting epoch {}/{}'.format(epoch+1, max_epochs1))
    # Here we initialize the iterator with the training set.
    # This means that we can go through an entire epoch until the iterator becomes empty.
    sess.run(dataset_init_op, feed_dict={image_paths: train_image_paths,
                                         labels: train_labels,
                                         is_training: True})
    while True:
        try:
            _ = sess.run(fc8_train_op, feed_dict={is_training: True})
        except tf.errors.OutOfRangeError:
            break

    # Check performance every epoch
    train_loss, train_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                     feed_dict={image_paths: train_image_paths,
                                                labels: train_labels,
                                                is_training: True})
    
    val_loss, val_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                 feed_dict={image_paths: val_image_paths,
                                            labels: val_labels,
                                            is_training: False})
    
    print('[Train] loss: {} | accuracy: {}'.format(train_loss, train_acc))
    print('[Validation] loss: {} | accuracy: {}'.format(val_loss, val_acc))
    
    # Save checkpoint
    if val_acc > max_acc:
        patience = 0
        max_acc = val_acc
        save_path = saver.save(sess, MODEL_PATH)
        print("Model updated and saved in file: %s" % save_path)
    else:
        patience += 1
        print('Model not improved at epoch {}/{}. Patience: {}/{}'.format(epoch+1, max_epochs1, patience, max_patience))
    # Early stopping.
    if patience > max_patience:
        print('Max patience exceeded. Early stopping.')
        break

### Train all layers

In [10]:
# Train the entire model for a few more epochs, continuing with the *same* weights.
max_acc = 0.0
patience = 0
for epoch in tqdm(range(max_epochs2)):
    # Run an epoch over the training data.
    print('-'*110)
    print('Starting epoch {}/{}'.format(epoch+1, max_epochs2))
    # Here we initialize the iterator with the training set.
    # This means that we can go through an entire epoch until the iterator becomes empty.
    sess.run(dataset_init_op, feed_dict={image_paths: train_image_paths,
                                         labels: train_labels,
                                         is_training: True})
    while True:
        try:
            _ = sess.run(full_train_op, feed_dict={is_training: True})    
        except tf.errors.OutOfRangeError:
            break

    # Check performance every epoch
    train_loss, train_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                     feed_dict={image_paths: train_image_paths,
                                                labels: train_labels,
                                                is_training: True})
    
    val_loss, val_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                 feed_dict={image_paths: val_image_paths,
                                            labels: val_labels,
                                            is_training: False})
    
    print('[Train] loss: {} | accuracy: {}'.format(train_loss, train_acc))
    print('[Validation] loss: {} | accuracy: {}'.format(val_loss, val_acc))
    
    # Save checkpoint
    if val_acc > max_acc:
        patience = 0
        max_acc = val_acc
        save_path = saver.save(sess, MODEL_PATH)
        print("Model updated and saved in file: %s" % save_path)
    else:
        patience += 1
        print('Model not improved at epoch {}/{}. Patience: {}/{}'.format(epoch+1, max_epochs1, patience, max_patience))
    # Early stopping.
    if patience > max_patience:
        print('Max patience exceeded. Early stopping.')
        break

  0%|          | 0/20 [00:00<?, ?it/s]

--------------------------------------------------------------------------------------------------------------
Starting epoch 1/20
[Train] loss: 0.055268995374983684 | accuracy: 0.6513653483992468
[Validation] loss: 0.08276059597836695 | accuracy: 0.4306049822064057


  5%|▌         | 1/20 [07:24<2:20:53, 444.90s/it]

Model updated and saved in file: model/hand_obj_vgg_16/hand_obj_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 2/20


 10%|█         | 2/20 [14:42<2:12:51, 442.86s/it]

[Train] loss: 0.05534642737426626 | accuracy: 0.6546610169491526
[Validation] loss: 0.08310093229563635 | accuracy: 0.43016014234875444
Model not improved at epoch 2/20. Patience: 1/3
--------------------------------------------------------------------------------------------------------------
Starting epoch 3/20
[Train] loss: 0.05492519088234456 | accuracy: 0.6590552416823603
[Validation] loss: 0.08278657204315756 | accuracy: 0.4314946619217082


 15%|█▌        | 3/20 [22:05<2:05:26, 442.74s/it]

Model updated and saved in file: model/hand_obj_vgg_16/hand_obj_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 4/20
[Train] loss: 0.054958589557425004 | accuracy: 0.6574074074074074
[Validation] loss: 0.08266664452824304 | accuracy: 0.43282918149466193


 20%|██        | 4/20 [29:27<1:58:01, 442.57s/it]

Model updated and saved in file: model/hand_obj_vgg_16/hand_obj_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 5/20


 25%|██▌       | 5/20 [36:45<1:50:15, 441.01s/it]

[Train] loss: 0.05446428804186553 | accuracy: 0.6634494664155681
[Validation] loss: 0.08298731295663692 | accuracy: 0.4319395017793594
Model not improved at epoch 5/20. Patience: 1/3
--------------------------------------------------------------------------------------------------------------
Starting epoch 6/20
[Train] loss: 0.05456968165386868 | accuracy: 0.6622724419334589
[Validation] loss: 0.08271296465524151 | accuracy: 0.4337188612099644


 30%|███       | 6/20 [44:07<1:42:59, 441.38s/it]

Model updated and saved in file: model/hand_obj_vgg_16/hand_obj_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 7/20


 35%|███▌      | 7/20 [51:25<1:35:26, 440.50s/it]

[Train] loss: 0.05409768013294134 | accuracy: 0.6662743251726303
[Validation] loss: 0.0828403556368105 | accuracy: 0.4337188612099644
Model not improved at epoch 7/20. Patience: 1/3
--------------------------------------------------------------------------------------------------------------
Starting epoch 8/20
[Train] loss: 0.05392454056641027 | accuracy: 0.6689422473320779
[Validation] loss: 0.08304883046506563 | accuracy: 0.43683274021352314


 40%|████      | 8/20 [58:49<1:28:16, 441.39s/it]

Model updated and saved in file: model/hand_obj_vgg_16/hand_obj_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 9/20
[Train] loss: 0.05368977528806759 | accuracy: 0.6684714375392341
[Validation] loss: 0.08241481808580962 | accuracy: 0.43994661921708184


 45%|████▌     | 9/20 [1:06:11<1:20:57, 441.59s/it]

Model updated and saved in file: model/hand_obj_vgg_16/hand_obj_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 10/20
[Train] loss: 0.053221925634748146 | accuracy: 0.674278091650973
[Validation] loss: 0.0827677003853686 | accuracy: 0.44261565836298933


 50%|█████     | 10/20 [1:13:32<1:13:35, 441.59s/it]

Model updated and saved in file: model/hand_obj_vgg_16/hand_obj_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 11/20


 55%|█████▌    | 11/20 [1:20:51<1:06:05, 440.61s/it]

[Train] loss: 0.05351293188965852 | accuracy: 0.6701977401129944
[Validation] loss: 0.08271186790856602 | accuracy: 0.44217081850533807
Model not improved at epoch 11/20. Patience: 1/3
--------------------------------------------------------------------------------------------------------------
Starting epoch 12/20


 60%|██████    | 12/20 [1:28:08<58:37, 439.66s/it]  

[Train] loss: 0.05320167783776395 | accuracy: 0.6711393596986818
[Validation] loss: 0.08253948300334482 | accuracy: 0.43683274021352314
Model not improved at epoch 12/20. Patience: 2/3
--------------------------------------------------------------------------------------------------------------
Starting epoch 13/20


 65%|██████▌   | 13/20 [1:35:26<51:14, 439.17s/it]

[Train] loss: 0.05309016950199309 | accuracy: 0.6781230382925298
[Validation] loss: 0.08290098114370027 | accuracy: 0.4363879003558719
Model not improved at epoch 13/20. Patience: 3/3
--------------------------------------------------------------------------------------------------------------
Starting epoch 14/20
[Train] loss: 0.05298506986286681 | accuracy: 0.6755335844318895
[Validation] loss: 0.08286075744764661 | accuracy: 0.43994661921708184
Model not improved at epoch 14/20. Patience: 4/3
Max patience exceeded. Early stopping.


### Testing

In [11]:
test_loss, test_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                 feed_dict={image_paths: test_image_paths,
                                            labels: test_labels,
                                            is_training: False})

print('[Test] loss: {} | accuracy: {}'.format(test_loss, test_acc))

[Test] loss: 0.062447359282744995 | accuracy: 0.5809329993738259
