### Load training, validation and testing data¶

In [1]:
from data_helper import load_dataset

In [2]:
IMAGE_FOLDER_PATH = 'dataset/resized/frames/'
LABEL_FOLDER_PATH = 'dataset/labels/'

train_head_image_paths, train_hand_image_paths, train_labels, \
val_head_image_paths, val_hand_image_paths, val_labels, \
test_head_image_paths, test_hand_image_paths, test_labels = load_dataset(image_folder_path=IMAGE_FOLDER_PATH,
                                                                         label_folder_path=LABEL_FOLDER_PATH,
                                                                         label_type='obj',
                                                                         hand_types=['left', 'right'],
                                                                         with_head=True,
                                                                         validation_split_ratio=0.15)

# Only take hand image paths for baseline
train_image_paths =  train_hand_image_paths
val_image_paths = val_hand_image_paths
test_image_paths = test_hand_image_paths

----------------------------------------------------------------------------------------------------
[Train (Head)] number of image paths: 12744
[Train (Hand)] number of image paths: 12744
[Train (Label)] number of labels: 12744
----------------------------------------------------------------------------------------------------
[Validation (Head)] number of image paths: 2248
[Validation (Hand)] number of image paths: 2248
[Validation (Label)] number of labels: 2248
----------------------------------------------------------------------------------------------------
[Test (Head)] number of image paths: 12776
[Test (Hand)] number of image paths: 12776
[Test (Label)] number of labels: 12776


### Use Tensorflow to build computational graph

In [3]:
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets
import vgg_preprocessing

#### Path configs and hyperparameters

In [4]:
PRETRAINED_HAND_GESTURE_MODEL_PATH = 'model/hand_gesture_vgg_16/hand_gesture_vgg_16_model'
PRETRAINED_HAND_OBJ_MODEL_PATH = 'model/hand_obj_vgg_16/hand_obj_vgg_16_model'
MODEL_PATH = 'model/two_stream_vgg_16_baseline/two_stream_vgg_16_baseline_model'

num_classes = 24
batch_size = 16
num_workers = 20
max_epochs1 = 30
max_epochs2 = 30
max_patience = 5 # For early stopping
learning_rate1 = 1e-3
learning_rate2 = 1e-5
dropout_keep_prob = 0.5
weight_decay = 5e-4

#### Building blocks of  two-stream CNN model

In [5]:
def stream_vgg_16(inputs,
                  is_training=True,
                  dropout_keep_prob=0.5,
                  spatial_squeeze=False,
                  scope='stream_vgg_16',
                  fc_conv_padding='VALID'):
    
    """ Reference from "https://github.com/tensorflow/models/blob/master/research/slim/nets/vgg.py"
        A VGG16 net excluding 'fc7' and 'fc8' layers.
        
        Returns: 
            A shape=(?, 4096) deep features if spatial_squeeze == True, else shape=(?, 1, 1, 4096).
            and end_points dict.
    """
    
    with tf.variable_scope(scope, 'stream_vgg_16', [inputs]) as sc:
            end_points_collection = sc.name + '_end_points'
            
            with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                                outputs_collections=end_points_collection):
                
                net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
                net = slim.max_pool2d(net, [2, 2], scope='pool1')
                net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
                net = slim.max_pool2d(net, [2, 2], scope='pool2')
                net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
                net = slim.max_pool2d(net, [2, 2], scope='pool3')
                net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
                net = slim.max_pool2d(net, [2, 2], scope='pool4')
                net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
                net = slim.max_pool2d(net, [2, 2], scope='pool5')
                # Use conv2d instead of fully_connected layers.
                net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
                net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6')
                # Convert end_points_collection into a end_point dict.
                end_points = slim.utils.convert_collection_to_dict(end_points_collection)
                
                if spatial_squeeze:
                    net = tf.squeeze(net, [1, 2], name='fc6/squeezed')
                    end_points[sc.name + '/fc6'] = net
                return net, end_points
            
def fusion_fc(inputs,
              num_classes,
              is_training=True,
              dropout_keep_prob=0.5,
              spatial_squeeze=True,
              scope='fusion_fc'):
    
    """ Reference from "https://github.com/tensorflow/models/blob/master/research/slim/nets/vgg.py"
        A VGG16 net only include 'fc7' and 'fc8' layers.
        
        Args:
            inputs: A list of tensor with shape like (?, 1, 1, 4096). (the 2, 3 axis must be "1")
            
        Returns:
            The last op containing the log predictions and end_points dict.
    """
    
    with tf.variable_scope(scope, 'fusion_fc', [inputs]) as sc:
        end_points_collection = sc.name + '_end_points'
                
        with slim.arg_scope([slim.conv2d],
                            outputs_collections=end_points_collection):
            # [(?, 1, 1, 4096), (?, 1, 1, 4096)] => (?, 1, 1, 8192)
            net = tf.concat(inputs, axis=3)
            # (?, 1, 1, 8192) => (?, 1, 1, 4096)
            net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
            net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7')
            # (?, 1, 1, 4096) => (?, 1, 1, num_classes)
            net = slim.conv2d(net, num_classes, [1, 1],
                              activation_fn=None,
                              normalizer_fn=None,
                              scope='fc8')
            # Convert end_points_collection into a end_point dict.
            end_points = slim.utils.convert_collection_to_dict(end_points_collection)
            
            if spatial_squeeze:
                # (?, 1, 1, num_classes) => (?, num_classes)
                net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points

#### Build our two-stream CNN

In [6]:
%%time

def dataset_map_fn(image_path, label, is_training):
    # Load image
    image_string = tf.read_file(image_path)
    image_decoded = tf.image.decode_png(image_string, channels=3)
    image = tf.cast(image_decoded, tf.float32)
    # Preprocess image
    preprocessed_image = tf.cond(is_training,
                                 true_fn=lambda: vgg_preprocessing.preprocess_image(image, 224, 224, is_training=True),
                                 false_fn=lambda: vgg_preprocessing.preprocess_image(image, 224, 224, is_training=False))
    return preprocessed_image, label

graph = tf.Graph()
with graph.as_default():
    # ---------------------------------------------------------------------
    # Indicates whether we are in training or in test mode
    # Since VGG16 has applied `dropout`, we need to disable it when testing.
    is_training = tf.placeholder(dtype=tf.bool, name='is_training')
    
    # Training, validation, testing data to feed in.
    image_paths = tf.placeholder(dtype=tf.string, shape=(None,), name='image_paths')
    labels = tf.placeholder(dtype=tf.int32, shape=(None,), name='labels')
    
    # Use dataset API to automatically generate batch data by iterator.
    dataset = tf.contrib.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(lambda image_path, label: dataset_map_fn(image_path, label, is_training))
    dataset = dataset.shuffle(buffer_size=10000)
    batched_dataset = dataset.batch(batch_size)
    
    # Now we define an iterator that can operator on dataset.
    # The iterator can be reinitialized by calling:
    # sess.run(dataset_init_op, feed_dict={image_paths=train_image_paths, labels=train_labels}) 
    # for 1 epoch on the training set.
    
    # Once this is done, we don't need to feed any value for images and labels
    # as they are automatically pulled out from the iterator queues.

    # A reinitializable iterator is defined by its structure. We could use the
    # `output_types` and `output_shapes` properties of dataset.
    # The dataset will be fed with training, validation or testing data.
    iterator = tf.contrib.data.Iterator.from_structure(batched_dataset.output_types,
                                                       batched_dataset.output_shapes)
    
    # A batch of data to feed into the networks.
    batch_images, batch_labels = iterator.get_next()
    dataset_init_op = iterator.make_initializer(batched_dataset)
    
    # =====================================================================
    # Start to build our two-stream cnn model.
    vgg = tf.contrib.slim.nets.vgg
    # Apply L2 regularization with weight decay.
    with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)):
        # Firstly, build our first stream cnn model -- pretrained hand gesture vgg16 net,
        # excluding 'fc7' and'fc8' layers
        hand_gesture_model_4096_features, _ = stream_vgg_16(batch_images,
                                          is_training=is_training,
                                          dropout_keep_prob=dropout_keep_prob,
                                          spatial_squeeze=False,
                                          scope='hand_gesture_vgg_16')

        # Secondly, build our second stream cnn model -- pretrained hand obj vgg16 net,
        # excluding 'fc7' and'fc8' layers
        hand_obj_model_4096_features, _ = stream_vgg_16(batch_images,
                                        is_training=is_training,
                                        dropout_keep_prob=dropout_keep_prob,
                                        spatial_squeeze=False,
                                        scope='hand_obj_vgg_16')

        # Finally, concatenate our 2 stream cnn models with fc layers architecture in vgg16 net.
        logits, _ = fusion_fc(inputs=[hand_gesture_model_4096_features, hand_obj_model_4096_features],
                              num_classes=num_classes,
                              is_training=is_training,
                              dropout_keep_prob=dropout_keep_prob,
                              spatial_squeeze=True,
                              scope='fusion_fc')
        
    # =====================================================================

    # ---------------------------------------------------------------------
    # Restore only the layers up to 'fc6' (included)
    # Calling function `hand_gesture_model_init_fn(sess)` will load all the pretrained weights.
    hand_gesture_model_variables = slim.get_variables(scope='hand_gesture_vgg_16')
    # Since the variable scope name of checkpoint file is 'vgg_16',
    # which is different from our new scope name 'hand_gesture_vgg_16',
    # we need to re-map variable names in order to make `Saver` know which new variable name to restore.
    # If we don't know the variable names of source checkpoint file, we can run the script `inspect_checkpoint.py`, 
    # For example:
    # $ python inspect_checkpoint.py --file_name=model/hand_gesture_vgg_16/hand_gesture_vgg_16_model
    # to inspect the variable names from our source checkpoint file.
    hand_gesture_model_init_fn = tf.contrib.framework.assign_from_checkpoint_fn(
        PRETRAINED_HAND_GESTURE_MODEL_PATH,
        var_list={var.name.replace('hand_gesture_vgg_16', 'vgg_16').split(':')[0]: var for var in hand_gesture_model_variables}
    )
        
    # Same as we done to `hand_gesture_model`.
    hand_obj_model_variables = slim.get_variables(scope='hand_obj_vgg_16')
    hand_obj_model_init_fn = tf.contrib.framework.assign_from_checkpoint_fn(
        PRETRAINED_HAND_OBJ_MODEL_PATH,
        var_list={var.name.replace('hand_obj_vgg_16', 'vgg_16').split(':')[0]: var for var in hand_obj_model_variables}
    )
    
    # ---------------------------------------------------------------------
    # Using tf.losses, any loss is added to the tf.GraphKeys.LOSSES collection
    # We can then call the total loss easily
    tf.losses.sparse_softmax_cross_entropy(labels=batch_labels, logits=logits)
    loss = tf.losses.get_total_loss()
    
    # First we want to train only the reinitialized last layer fc8 for a few epochs.
    # We run minimize the loss only with respect to the `fusion_fc` variables (weight and bias).
    fusion_fc_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate1)
    fusion_fc_train_op = fusion_fc_optimizer.minimize(loss, var_list=slim.get_variables(scope='fusion_fc'))
    
    # Then we want to finetune the entire model for a few epochs.
    # We run minimize the loss only with respect to all the variables.
    full_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate2)
    full_train_op = full_optimizer.minimize(loss)
    
    # Evaluation metrics
    prediction = tf.to_int32(tf.argmax(logits, 1))
    correct_prediction = tf.equal(prediction, batch_labels)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))    
    
    #Initialize the variables (i.e. assign their default value)
    init_op = tf.global_variables_initializer()
    
    # 'Saver' op to save and restore all the variables
    saver = tf.train.Saver()
    
    

CPU times: user 2.11 s, sys: 20 ms, total: 2.13 s
Wall time: 2.13 s


### Start training

In [7]:
from tqdm import tqdm

In [8]:
def evaluate(sess, loss, correct_prediction, dataset_init_op, feed_dict):
    """
        Evaluation in training loop.
        Check the performance of the model on either train, val or test (depending on `dataset_init_op`)
        Note: The arguments are tensorflow operators defined in the graph.
    """
    
    # Initialize the correct dataset.
    sess.run(dataset_init_op, feed_dict=feed_dict)

    data_loss = 0
    num_correct = 0
    num_samples = 0
    
    # Evaluate on every batch.
    while True:
        try:
            # Disable `is_training` since we have `dropout` in VGG net.
            _loss, _correct_prediction = sess.run([loss, correct_prediction], feed_dict={is_training: False})

            data_loss += _loss
            num_correct += _correct_prediction.sum() # e.g: [True, False, True].sum() = 2
            num_samples += _correct_prediction.shape[0] # Batch size
            
        except tf.errors.OutOfRangeError:
            break

    data_loss = data_loss / num_samples
    acc = num_correct / num_samples

    return data_loss, acc

In [9]:
# --------------------------------------------------------------------------
# Now that we have built the graph and finalized it, we define the session.
# The session is the interface to *run* the computational graph.
# We can call our training operations with `sess.run(train_op)` for instance
sess = tf.Session(graph=graph)

### Initialize variables or restore variables from checkpoint

In [23]:
RESTORE = True
max_acc = 0.0

if RESTORE:
    print('Restore variables from checkpoint...')
    # If checkpoint exists, restore it to session.
    saver.restore(sess, MODEL_PATH)
    # Regain max validation accuracy from model
    print('Regaining max validation accuracy...')
    %time _, val_acc = evaluate(sess, loss, correct_prediction, dataset_init_op, \
                                feed_dict={image_paths: val_image_paths, \
                                           labels: val_labels, \
                                           is_training: False})
    max_acc = val_acc
    print('Max validation accuracy: {}'.format(max_acc))
else:
    print('Initialize variables from scratch...')
    # Initialize all variables
    sess.run(init_op)
    # Load the pretrained weights for 2-stream model
    hand_gesture_model_init_fn(sess) 
    hand_obj_model_init_fn(sess)

Restore variables from checkpoint...
INFO:tensorflow:Restoring parameters from model/two_stream_vgg_16_baseline/two_stream_vgg_16_baseline_model
Regaining max validation accuracy...
CPU times: user 35.5 s, sys: 2.52 s, total: 38 s
Wall time: 37.3 s
Max validation accuracy: 0.5160142348754448


### Only train `fusion_fc` layers.

In [None]:
patience = 0

# Update only the last layer for a few epochs.
for epoch in tqdm(range(max_epochs1)):
    # Run an epoch over the training data.
    print('-'*110)
    print('Starting epoch {}/{}'.format(epoch+1, max_epochs1))
    # Here we initialize the iterator with the training set.
    # This means that we can go through an entire epoch until the iterator becomes empty.
    sess.run(dataset_init_op, feed_dict={image_paths: train_image_paths,
                                         labels: train_labels,
                                         is_training: True})
    while True:
        try:
            _ = sess.run(fusion_fc_train_op, feed_dict={is_training: True})
        except tf.errors.OutOfRangeError:
            break

    # Check performance every epoch
    train_loss, train_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                     feed_dict={image_paths: train_image_paths,
                                                labels: train_labels,
                                                is_training: True})
    
    val_loss, val_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                 feed_dict={image_paths: val_image_paths,
                                            labels: val_labels,
                                            is_training: False})
    
    print('[Train] loss: {} | accuracy: {}'.format(train_loss, train_acc))
    print('[Validation] loss: {} | accuracy: {}'.format(val_loss, val_acc))
    
    # Save checkpoint
    if val_acc > max_acc:
        patience = 0
        max_acc = val_acc
        save_path = saver.save(sess, MODEL_PATH)
        print("Model updated and saved in file: %s" % save_path)
    else:
        patience += 1
        print('Model not improved at epoch {}/{}. Patience: {}/{}'.format(epoch+1, max_epochs1, patience, max_patience))
    # Early stopping.
    if patience > max_patience:
        print('Max patience exceeded. Early stopping.')
        break

  0%|          | 0/30 [00:00<?, ?it/s]

--------------------------------------------------------------------------------------------------------------
Starting epoch 1/30
[Train] loss: 0.0901912741891558 | accuracy: 0.7633396107972379
[Validation] loss: 0.14419236853453613 | accuracy: 0.45062277580071175


  3%|▎         | 1/30 [09:14<4:27:52, 554.22s/it]

Model updated and saved in file: model/two_stream_vgg_16_baseline/two_stream_vgg_16_baseline_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 2/30


  7%|▋         | 2/30 [18:06<4:15:38, 547.79s/it]

[Train] loss: 0.0914820623487627 | accuracy: 0.7512554927809165
[Validation] loss: 0.14428714182877456 | accuracy: 0.44217081850533807
Model not improved at epoch 2/30. Patience: 1/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 3/30


 10%|█         | 3/30 [26:57<4:04:10, 542.61s/it]

[Train] loss: 0.09052651907315587 | accuracy: 0.7557281858129316
[Validation] loss: 0.14454995493447653 | accuracy: 0.4412811387900356
Model not improved at epoch 3/30. Patience: 2/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 4/30


 13%|█▎        | 4/30 [35:49<3:53:44, 539.40s/it]

[Train] loss: 0.0919695555490289 | accuracy: 0.7472536095417451
[Validation] loss: 0.1587530796638163 | accuracy: 0.4443950177935943
Model not improved at epoch 4/30. Patience: 3/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 5/30
[Train] loss: 0.08971963573085835 | accuracy: 0.76530131826742
[Validation] loss: 0.15214911421422855 | accuracy: 0.4639679715302491


 17%|█▋        | 5/30 [45:06<3:46:59, 544.78s/it]

Model updated and saved in file: model/two_stream_vgg_16_baseline/two_stream_vgg_16_baseline_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 6/30


 20%|██        | 6/30 [54:00<3:36:38, 541.61s/it]

[Train] loss: 0.0870232390332207 | accuracy: 0.7659290646578782
[Validation] loss: 0.135964793666826 | accuracy: 0.4626334519572954
Model not improved at epoch 6/30. Patience: 1/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 7/30


 23%|██▎       | 7/30 [1:02:56<3:26:55, 539.79s/it]

[Train] loss: 0.08448172391995049 | accuracy: 0.7700094161958568
[Validation] loss: 0.15515190956855585 | accuracy: 0.4190391459074733
Model not improved at epoch 7/30. Patience: 2/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 8/30


 27%|██▋       | 8/30 [1:11:51<3:17:21, 538.26s/it]

[Train] loss: 0.08406663726727617 | accuracy: 0.7651443816698054
[Validation] loss: 0.13889422790011477 | accuracy: 0.44973309608540923
Model not improved at epoch 8/30. Patience: 3/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 9/30


 30%|███       | 9/30 [1:20:44<3:07:53, 536.86s/it]

[Train] loss: 0.08196605430484491 | accuracy: 0.7663214061519146
[Validation] loss: 0.14358577142830845 | accuracy: 0.452846975088968
Model not improved at epoch 9/30. Patience: 4/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 10/30


 33%|███▎      | 10/30 [1:29:30<2:57:51, 533.55s/it]

[Train] loss: 0.08102813108642372 | accuracy: 0.761142498430634
[Validation] loss: 0.1443828574491141 | accuracy: 0.4501779359430605
Model not improved at epoch 10/30. Patience: 5/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 11/30
[Train] loss: 0.07919231768082464 | accuracy: 0.7674199623352166
[Validation] loss: 0.13080420199238108 | accuracy: 0.4559608540925267
Model not improved at epoch 11/30. Patience: 6/5
Max patience exceeded. Early stopping.


### Train all layers

In [24]:
patience = 0

# Train the entire model for a few more epochs, continuing with the *same* weights.
for epoch in tqdm(range(max_epochs2)):
    # Run an epoch over the training data.
    print('-'*110)
    print('Starting epoch {}/{}'.format(epoch+1, max_epochs2))
    # Here we initialize the iterator with the training set.
    # This means that we can go through an entire epoch until the iterator becomes empty.
    sess.run(dataset_init_op, feed_dict={image_paths: train_image_paths,
                                         labels: train_labels,
                                         is_training: True})
    while True:
        try:
            _ = sess.run(full_train_op, feed_dict={is_training: True})    
        except tf.errors.OutOfRangeError:
            break

    # Check performance every epoch
    train_loss, train_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                     feed_dict={image_paths: train_image_paths,
                                                labels: train_labels,
                                                is_training: True})
    
    val_loss, val_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                                 feed_dict={image_paths: val_image_paths,
                                            labels: val_labels,
                                            is_training: False})
    
    print('[Train] loss: {} | accuracy: {}'.format(train_loss, train_acc))
    print('[Validation] loss: {} | accuracy: {}'.format(val_loss, val_acc))
    
    # Save checkpoint
    if val_acc > max_acc:
        patience = 0
        max_acc = val_acc
        save_path = saver.save(sess, MODEL_PATH)
        print("Model updated and saved in file: %s" % save_path)
    else:
        patience += 1
        print('Model not improved at epoch {}/{}. Patience: {}/{}'.format(epoch+1, max_epochs1, patience, max_patience))
    # Early stopping.
    if patience > max_patience:
        print('Max patience exceeded. Early stopping.')
        break

  0%|          | 0/30 [00:00<?, ?it/s]

--------------------------------------------------------------------------------------------------------------
Starting epoch 1/30


  3%|▎         | 1/30 [12:59<6:16:35, 779.16s/it]

[Train] loss: 0.11563884985574147 | accuracy: 0.8915568110483365
[Validation] loss: 0.29818385860673896 | accuracy: 0.5084519572953736
Model not improved at epoch 1/30. Patience: 1/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 2/30
[Train] loss: 0.11589638696140414 | accuracy: 0.8873979912115505
[Validation] loss: 0.2688038814746612 | accuracy: 0.5249110320284698


  7%|▋         | 2/30 [26:24<6:07:17, 787.07s/it]

Model updated and saved in file: model/two_stream_vgg_16_baseline/two_stream_vgg_16_baseline_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 3/30
[Train] loss: 0.11434853913195839 | accuracy: 0.8862994350282486
[Validation] loss: 0.2815796122007947 | accuracy: 0.5324733096085409


 10%|█         | 3/30 [39:54<5:57:13, 793.82s/it]

Model updated and saved in file: model/two_stream_vgg_16_baseline/two_stream_vgg_16_baseline_model
--------------------------------------------------------------------------------------------------------------
Starting epoch 4/30


 13%|█▎        | 4/30 [52:55<5:42:21, 790.04s/it]

[Train] loss: 0.11247834495905429 | accuracy: 0.8983050847457628
[Validation] loss: 0.27902767200062706 | accuracy: 0.4919928825622776
Model not improved at epoch 4/30. Patience: 1/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 5/30


 17%|█▋        | 5/30 [1:05:58<5:28:17, 787.90s/it]

[Train] loss: 0.1136300910009353 | accuracy: 0.8884180790960452
[Validation] loss: 0.30352958792893486 | accuracy: 0.50355871886121
Model not improved at epoch 5/30. Patience: 2/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 6/30


 20%|██        | 6/30 [1:18:57<5:14:03, 785.14s/it]

[Train] loss: 0.11165027740386321 | accuracy: 0.8979127432517263
[Validation] loss: 0.3077259921730625 | accuracy: 0.48976868327402134
Model not improved at epoch 6/30. Patience: 3/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 7/30


 23%|██▎       | 7/30 [1:31:59<5:00:40, 784.37s/it]

[Train] loss: 0.11040766803782763 | accuracy: 0.899482109227872
[Validation] loss: 0.2736537470300002 | accuracy: 0.5106761565836299
Model not improved at epoch 7/30. Patience: 4/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 8/30


 27%|██▋       | 8/30 [1:44:57<4:46:51, 782.34s/it]

[Train] loss: 0.10882109295102088 | accuracy: 0.9079566854990584
[Validation] loss: 0.3053095893291392 | accuracy: 0.49777580071174377
Model not improved at epoch 8/30. Patience: 5/5
--------------------------------------------------------------------------------------------------------------
Starting epoch 9/30
[Train] loss: 0.10768211646530514 | accuracy: 0.9088983050847458
[Validation] loss: 0.3757829063727762 | accuracy: 0.48131672597864766
Model not improved at epoch 9/30. Patience: 6/5
Max patience exceeded. Early stopping.


### Testing

In [25]:
%%time

saver.restore(sess, MODEL_PATH)

test_loss, test_acc = evaluate(sess, loss, correct_prediction, dataset_init_op,
                               feed_dict={image_paths: test_image_paths,
                                          labels: test_labels,
                                          is_training: False})

print('[Test] loss: {} | accuracy: {}'.format(test_loss, test_acc))

INFO:tensorflow:Restoring parameters from model/two_stream_vgg_16_baseline/two_stream_vgg_16_baseline_model
[Test] loss: 0.20677262219922873 | accuracy: 0.6556825297432687
CPU times: user 3min 8s, sys: 20.7 s, total: 3min 29s
Wall time: 3min 22s
