### Load training, validation and testing data

In [1]:
from data_helper import load_dataset

In [2]:
IMAGE_FOLDER_PATH = 'dataset/resized/frames/'
LABEL_FOLDER_PATH = 'dataset/labels/'

train_head_image_paths, train_hand_image_paths, train_labels, \
val_head_image_paths, val_hand_image_paths, val_labels, \
test_head_image_paths, test_hand_image_paths, test_labels = load_dataset(image_folder_path=IMAGE_FOLDER_PATH,
                                                                         label_folder_path=LABEL_FOLDER_PATH,
                                                                         label_type='ges',
                                                                         hand_types=['left', 'right'],
                                                                         with_head=True,
                                                                         validation_split_ratio=0.15)

# Only take hand image paths for baseline
train_image_paths =  train_hand_image_paths
val_image_paths = val_hand_image_paths
test_image_paths = test_hand_image_paths

----------------------------------------------------------------------------------------------------
[Train (Head)] number of image paths: 12744
[Train (Hand)] number of image paths: 12744
[Train (Label)] number of labels: 12744
----------------------------------------------------------------------------------------------------
[Validation (Head)] number of image paths: 2248
[Validation (Hand)] number of image paths: 2248
[Validation (Label)] number of labels: 2248
----------------------------------------------------------------------------------------------------
[Test (Head)] number of image paths: 12776
[Test (Hand)] number of image paths: 12776
[Test (Label)] number of labels: 12776


### Use Tensorflow to build computational graph

In [3]:
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets
import vgg_preprocessing

In [4]:
PRETRAINED_VGG_MODEL_PATH = 'model/vgg_16.ckpt'
MODEL_PATH = 'model/hand_gesture_vgg_16/hand_gesture_vgg_16_model'

num_classes = 24
batch_size = 32
num_workers = 20
max_epochs1 = 30
max_epochs2 = 30
max_patience = 5 # For early stopping
learning_rate1 = 1e-3
learning_rate2 = 1e-5
dropout_keep_prob = 0.5
weight_decay = 5e-4

In [5]:
%%time

def dataset_map_fn(image_path, label, is_training):
    # Load image
    image_string = tf.read_file(image_path)
    image_decoded = tf.image.decode_png(image_string, channels=3)
    image = tf.cast(image_decoded, tf.float32)
    # Preprocess image
    preprocessed_image = tf.cond(is_training,
                                 true_fn=lambda: vgg_preprocessing.preprocess_image(image, 224, 224, is_training=True),
                                 false_fn=lambda: vgg_preprocessing.preprocess_image(image, 224, 224, is_training=False))
    return preprocessed_image, label

graph = tf.Graph()
with graph.as_default():
    # ---------------------------------------------------------------------
    # Indicates whether we are in training or in test mode
    # Since VGG16 has applied `dropout`, we need to disable it when testing.
    is_training = tf.placeholder(dtype=tf.bool, name='is_training')
    
    # Training, validation, testing data to feed in.
    image_paths = tf.placeholder(dtype=tf.string, shape=(None,), name='image_paths')
    labels = tf.placeholder(dtype=tf.int32, shape=(None,), name='labels')
    
    # Use dataset API to automatically generate batch data by iterator.
    dataset = tf.contrib.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(lambda image_path, label: dataset_map_fn(image_path, label, is_training))
    dataset = dataset.shuffle(buffer_size=10000)
    batched_dataset = dataset.batch(batch_size)
    
    # Now we define an iterator that can operator on dataset.
    # The iterator can be reinitialized by calling:
    # sess.run(dataset_init_op, feed_dict={image_paths=train_image_paths, labels=train_labels}) 
    # for 1 epoch on the training set.
    
    # Once this is done, we don't need to feed any value for images and labels
    # as they are automatically pulled out from the iterator queues.

    # A reinitializable iterator is defined by its structure. We could use the
    # `output_types` and `output_shapes` properties of dataset.
    # The dataset will be fed with training, validation or testing data.
    iterator = tf.contrib.data.Iterator.from_structure(batched_dataset.output_types,
                                                       batched_dataset.output_shapes)
    
    # A batch of data to feed into the networks.
    batch_images, batch_labels = iterator.get_next()
    dataset_init_op = iterator.make_initializer(batched_dataset)
    
    # ---------------------------------------------------------------------
    # Now that we have set up the data, it's time to set up the model.
    # For this example, we'll use VGG-16 pretrained on ImageNet. We will remove the
    # last fully connected layer (fc8) and replace it with our own, with an
    # output size `num_classes`
    # We will first train the last layer for a few epochs.
    # Then we will train the entire model on our dataset for a few epochs.

    # Get the pretrained model, specifying the num_classes argument to create a new
    # fully connected replacing the last one, called "vgg_16/fc8"
    # Each model has a different architecture, so "vgg_16/fc8" will change in another model.
    # Here, logits gives us directly the predicted scores we wanted from the images.
    # We pass a scope to initialize "vgg_16/fc8" weights with he_initializer
    vgg = tf.contrib.slim.nets.vgg
    with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)):
        logits, _ = vgg.vgg_16(batch_images, num_classes=num_classes, is_training=is_training,
                               dropout_keep_prob=dropout_keep_prob)
    
    # Restore only the layers up to fc7 (included)
    # Calling function `init_fn(sess)` will load all the pretrained weights.
    variables_to_restore = tf.contrib.framework.get_variables_to_restore(exclude=['vgg_16/fc8'])
    init_fn = tf.contrib.framework.assign_from_checkpoint_fn(PRETRAINED_VGG_MODEL_PATH, variables_to_restore)

    # Initialization operation from scratch for the new "fc8" layers
    # `get_variables` will only return the variables whose name starts with the given pattern
    fc8_variables = tf.contrib.framework.get_variables('vgg_16/fc8')
    fc8_init = tf.variables_initializer(fc8_variables)
    
    # ---------------------------------------------------------------------
    # Using tf.losses, any loss is added to the tf.GraphKeys.LOSSES collection
    # We can then call the total loss easily
    tf.losses.sparse_softmax_cross_entropy(labels=batch_labels, logits=logits)
    loss = tf.losses.get_total_loss()
    
    # First we want to train only the reinitialized last layer fc8 for a few epochs.
    # We run minimize the loss only with respect to the fc8 variables (weight and bias).
    fc8_optimizer = tf.train.GradientDescentOptimizer(learning_rate1)
    fc8_train_op = fc8_optimizer.minimize(loss, var_list=fc8_variables)
    
    # Then we want to finetune the entire model for a few epochs.
    # We run minimize the loss only with respect to all the variables.
    full_optimizer = tf.train.GradientDescentOptimizer(learning_rate2)
    full_train_op = full_optimizer.minimize(loss)

    # Evaluation metrics
    prediction = tf.to_int32(tf.argmax(logits, 1))
    correct_prediction = tf.equal(prediction, batch_labels)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))    
    
    # 'Saver' op to save and restore all the variables
    saver = tf.train.Saver()

CPU times: user 996 ms, sys: 11.9 ms, total: 1.01 s
Wall time: 1.01 s


### Start training

In [6]:
from tqdm import tqdm

In [7]:
def evaluate(sess, loss, correct_prediction, dataset_init_op, feed_dict):
    """
        Evaluation in training loop.
        Check the performance of the model on either train, val or test (depending on `dataset_init_op`)
        Note: The arguments are tensorflow operators defined in the graph.
    """
    
    # Initialize the correct dataset.
    sess.run(dataset_init_op, feed_dict=feed_dict)

    data_loss = 0
    num_correct = 0
    num_samples = 0
    
    # Evaluate on every batch.
    while True:
        try:
            # Disable `is_training` since we have `dropout` in VGG net.
            _loss, _correct_prediction = sess.run([loss, correct_prediction], feed_dict={is_training: False})

            data_loss += _loss
            num_correct += _correct_prediction.sum() # e.g: [True, False, True].sum() = 2
            num_samples += _correct_prediction.shape[0] # Batch size
            
        except tf.errors.OutOfRangeError:
            break

    data_loss = data_loss / num_samples
    acc = num_correct / num_samples

    return data_loss, acc

In [8]:
# --------------------------------------------------------------------------
# Now that we have built the graph and finalized it, we define the session.
# The session is the interface to *run* the computational graph.
# We can call our training operations with `sess.run(train_op)` for instance
sess = tf.Session(graph=graph)

In [9]:
# init_fn(sess) # load the pretrained weights
# sess.run(fc8_init)  # initialize the new fc8 layer

saver.restore(sess, MODEL_PATH)

INFO:tensorflow:Restoring parameters from model/hand_gesture_vgg_16/hand_gesture_vgg_16_model


### Only train 'fc8' layer.

In [None]:
max_acc = 0.0
patience = 0

# Update only the last layer for a few epochs.
for epoch in tqdm(range(max_epochs1)):
    # Run an epoch over the training data.
    print('-'*110)
    print('Starting epoch {}/{}'.format(epoch+1, max_epochs1))
    # Here we initialize the iterator with the training set.
    # This means that we can go through an entire epoch until the iterator becomes empty.
    sess.run(dataset_init_op, feed_dict={image_paths: train_image_paths,
                                         labels: train_labels,
                                         is_training: True})
    while True:
        try:
            _ = sess.run(fc8_train_op, feed_dict={is_training: True})
        except tf.errors.OutOfRangeError:
            break

    # Check performance every epoch
    train_loss, train_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                     feed_dict={image_paths: train_image_paths,
                                                labels: train_labels,
                                                is_training: True})
    
    val_loss, val_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                 feed_dict={image_paths: val_image_paths,
                                            labels: val_labels,
                                            is_training: False})
    
    print('[Train] loss: {} | accuracy: {}'.format(train_loss, train_acc))
    print('[Validation] loss: {} | accuracy: {}'.format(val_loss, val_acc))
    
    # Save checkpoint
    if val_acc > max_acc:
        patience = 0
        max_acc = val_acc
        save_path = saver.save(sess, MODEL_PATH)
        print("Model updated and saved in file: %s" % save_path)
    else:
        patience += 1
        print('Model not improved at epoch {}/{}. Patience: {}/{}'.format(epoch+1, max_epochs1, patience, max_patience))
    # Early stopping.
    if patience > max_patience:
        print('Max patience exceeded. Early stopping.')
        break

  0%|          | 0/30 [00:00<?, ?it/s]

--------------------------------------------------------------------------------------------------------------
Starting epoch 1/30
[Train] loss: 0.06228925008969867 | accuracy: 0.5976145637162587
[Validation] loss: 0.08147881431935945 | accuracy: 0.41725978647686834


  3%|▎         | 1/30 [05:38<2:43:45, 338.82s/it]

Model updated and saved in file: model/hand_gesture_vgg_16.ckpt
--------------------------------------------------------------------------------------------------------------
Starting epoch 2/30
[Train] loss: 0.058167793675181406 | accuracy: 0.6263339610797238
[Validation] loss: 0.07669812652988366 | accuracy: 0.44483985765124556


  7%|▋         | 2/30 [11:18<2:38:13, 339.06s/it]

Model updated and saved in file: model/hand_gesture_vgg_16.ckpt
--------------------------------------------------------------------------------------------------------------
Starting epoch 3/30


 10%|█         | 3/30 [16:54<2:32:09, 338.12s/it]

[Train] loss: 0.05608863585113506 | accuracy: 0.6345731324544884
[Validation] loss: 0.07903092160980048 | accuracy: 0.4435053380782918
Model not improved at epoch 3/30. Patience: 1/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 4/30


 13%|█▎        | 4/30 [22:30<2:26:16, 337.54s/it]

[Train] loss: 0.05424258221415552 | accuracy: 0.6541902071563088
[Validation] loss: 0.07752268435267791 | accuracy: 0.4288256227758007
Model not improved at epoch 4/30. Patience: 2/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 5/30
[Train] loss: 0.05321637331990423 | accuracy: 0.6611738857501569
[Validation] loss: 0.07711983029316329 | accuracy: 0.4470640569395018


 17%|█▋        | 5/30 [28:12<2:21:08, 338.75s/it]

Model updated and saved in file: model/hand_gesture_vgg_16.ckpt
--------------------------------------------------------------------------------------------------------------
Starting epoch 6/30
[Train] loss: 0.0521653358639222 | accuracy: 0.6711393596986818
[Validation] loss: 0.07633064681949141 | accuracy: 0.46174377224199287


 20%|██        | 6/30 [33:51<2:15:32, 338.84s/it]

Model updated and saved in file: model/hand_gesture_vgg_16.ckpt
--------------------------------------------------------------------------------------------------------------
Starting epoch 7/30


 23%|██▎       | 7/30 [39:28<2:09:41, 338.33s/it]

[Train] loss: 0.051627347614132076 | accuracy: 0.6738072818581293
[Validation] loss: 0.07687992127976807 | accuracy: 0.445729537366548
Model not improved at epoch 7/30. Patience: 1/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 8/30


 27%|██▋       | 8/30 [45:04<2:03:51, 337.79s/it]

[Train] loss: 0.052398323142820384 | accuracy: 0.665646578782172
[Validation] loss: 0.07734628148986776 | accuracy: 0.4314946619217082
Model not improved at epoch 8/30. Patience: 2/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 9/30


 30%|███       | 9/30 [50:41<1:58:03, 337.30s/it]

[Train] loss: 0.05076000679983557 | accuracy: 0.6792215944758317
[Validation] loss: 0.07721551001284047 | accuracy: 0.45062277580071175
Model not improved at epoch 9/30. Patience: 3/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 10/30


 33%|███▎      | 10/30 [56:16<1:52:17, 336.85s/it]

[Train] loss: 0.0504849471540026 | accuracy: 0.6858913998744507
[Validation] loss: 0.07524671758196956 | accuracy: 0.45462633451957296
Model not improved at epoch 10/30. Patience: 4/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 11/30


 37%|███▋      | 11/30 [1:01:51<1:46:29, 336.30s/it]

[Train] loss: 0.05033740173026742 | accuracy: 0.6835373509102323
[Validation] loss: 0.07703210652086659 | accuracy: 0.4443950177935943
Model not improved at epoch 11/30. Patience: 5/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 12/30
[Train] loss: 0.050016819944339805 | accuracy: 0.684557438794727
[Validation] loss: 0.07466981154319655 | accuracy: 0.445729537366548
Model not improved at epoch 12/30. Patience: 6/5
Max patience exceeded. Early stopping.


### Train all layers

In [10]:
# Train the entire model for a few more epochs, continuing with the *same* weights.
max_acc = 0.0
patience = 0
for epoch in tqdm(range(max_epochs2)):
    # Run an epoch over the training data.
    print('-'*110)
    print('Starting epoch {}/{}'.format(epoch+1, max_epochs2))
    # Here we initialize the iterator with the training set.
    # This means that we can go through an entire epoch until the iterator becomes empty.
    sess.run(dataset_init_op, feed_dict={image_paths: train_image_paths,
                                         labels: train_labels,
                                         is_training: True})
    while True:
        try:
            _ = sess.run(full_train_op, feed_dict={is_training: True})    
        except tf.errors.OutOfRangeError:
            break

    # Check performance every epoch
    train_loss, train_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                     feed_dict={image_paths: train_image_paths,
                                                labels: train_labels,
                                                is_training: True})
    
    val_loss, val_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                 feed_dict={image_paths: val_image_paths,
                                            labels: val_labels,
                                            is_training: False})
    
    print('[Train] loss: {} | accuracy: {}'.format(train_loss, train_acc))
    print('[Validation] loss: {} | accuracy: {}'.format(val_loss, val_acc))
    
    # Save checkpoint
    if val_acc > max_acc:
        patience = 0
        max_acc = val_acc
        save_path = saver.save(sess, MODEL_PATH)
        print("Model updated and saved in file: %s" % save_path)
    else:
        patience += 1
        print('Model not improved at epoch {}/{}. Patience: {}/{}'.format(epoch+1, max_epochs1, patience, max_patience))
    # Early stopping.
    if patience > max_patience:
        print('Max patience exceeded. Early stopping.')
        break

  0%|          | 0/30 [00:00<?, ?it/s]

--------------------------------------------------------------------------------------------------------------
Starting epoch 1/30
[Train] loss: 0.046402791256584315 | accuracy: 0.7221437539234149
[Validation] loss: 0.0718847906144064 | accuracy: 0.47330960854092524


  3%|▎         | 1/30 [08:41<4:12:02, 521.46s/it]

Model updated and saved in file: model/hand_gesture_vgg_16/hand_gesture_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 2/30
[Train] loss: 0.04632800730496028 | accuracy: 0.7218298807281858
[Validation] loss: 0.07255974247574382 | accuracy: 0.474644128113879


  7%|▋         | 2/30 [17:24<4:03:33, 521.92s/it]

Model updated and saved in file: model/hand_gesture_vgg_16/hand_gesture_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 3/30
[Train] loss: 0.04683835052388761 | accuracy: 0.7186911487758946
[Validation] loss: 0.07246557540952947 | accuracy: 0.47597864768683273


 10%|█         | 3/30 [26:05<3:54:48, 521.79s/it]

Model updated and saved in file: model/hand_gesture_vgg_16/hand_gesture_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 4/30
[Train] loss: 0.04689487364968637 | accuracy: 0.7203389830508474
[Validation] loss: 0.07189593037252325 | accuracy: 0.4786476868327402


 13%|█▎        | 4/30 [34:49<3:46:17, 522.20s/it]

Model updated and saved in file: model/hand_gesture_vgg_16/hand_gesture_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 5/30
[Train] loss: 0.04651352033552252 | accuracy: 0.7266949152542372
[Validation] loss: 0.07225807133094271 | accuracy: 0.4795373665480427


 17%|█▋        | 5/30 [43:30<3:37:26, 521.86s/it]

Model updated and saved in file: model/hand_gesture_vgg_16/hand_gesture_vgg_16_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 6/30


 20%|██        | 6/30 [52:08<3:28:22, 520.94s/it]

[Train] loss: 0.046100814996915124 | accuracy: 0.732187696170747
[Validation] loss: 0.07216650332420321 | accuracy: 0.476423487544484
Model not improved at epoch 6/30. Patience: 1/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 7/30


 23%|██▎       | 7/30 [1:00:46<3:19:20, 520.00s/it]

[Train] loss: 0.04611234199454481 | accuracy: 0.7281073446327684
[Validation] loss: 0.07207183317145419 | accuracy: 0.47686832740213525
Model not improved at epoch 7/30. Patience: 2/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 8/30


 27%|██▋       | 8/30 [1:09:24<3:10:26, 519.38s/it]

[Train] loss: 0.04612294859057017 | accuracy: 0.7270087884494664
[Validation] loss: 0.0725403787614612 | accuracy: 0.47731316725978645
Model not improved at epoch 8/30. Patience: 3/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 9/30


 30%|███       | 9/30 [1:18:04<3:01:49, 519.48s/it]

[Train] loss: 0.046086961435059366 | accuracy: 0.7277934714375393
[Validation] loss: 0.07204095918512854 | accuracy: 0.47686832740213525
Model not improved at epoch 9/30. Patience: 4/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 10/30


 33%|███▎      | 10/30 [1:26:43<2:53:09, 519.48s/it]

[Train] loss: 0.04592442379683275 | accuracy: 0.7277934714375393
[Validation] loss: 0.07214404016839228 | accuracy: 0.47820284697508897
Model not improved at epoch 10/30. Patience: 5/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 11/30
[Train] loss: 0.04605437530823197 | accuracy: 0.7268518518518519
[Validation] loss: 0.0721421920001719 | accuracy: 0.4786476868327402
Model not improved at epoch 11/30. Patience: 6/5
Max patience exceeded. Early stopping.


### Testing

In [11]:
test_loss, test_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                               feed_dict={image_paths: test_image_paths,
                                          labels: test_labels,
                                          is_training: False})

print('[Test] loss: {} | accuracy: {}'.format(test_loss, test_acc))

[Test] loss: 0.0594797047996939 | accuracy: 0.6193644333124608
