In [2]:
"""
Uses tf.contrib.data module which is in release candidate 1.2.0rc0
Based on:
    - PyTorch example from Justin Johnson:
      https://gist.github.com/jcjohnson/6e41e8512c17eae5da50aebef3378a4c
      - https://gist.github.com/omoindrot/dedc857cdc0e680dfb1be99762990c9c
Required packages: tensorflow (v1.2)
You can install the release candidate 1.2.0rc0 here:
https://www.tensorflow.org/versions/r1.2/install/

Download the weights trained on ImageNet for VGG:
```
wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz
tar -xvf vgg_16_2016_08_28.tar.gz
rm vgg_16_2016_08_28.tar.gz
```
"""

%matplotlib inline

from matplotlib import pyplot as plt

import argparse
import os

import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets

from vggnet_utils import *

VGG_MEAN = [123.68, 116.78, 103.94]

print(tf.__version__)

1.2.0-rc1


In [10]:
experiments = [] # list with dict of params for each experiment

lr1s = [5e-5, 1e-3]
lr2s = [1e-6, 1e-7, 1e-5]
dropout_probs = [0.5, 0.4, 0.3]
weight_decays = [5e-4, 1e-4]

# create a dict of experiments
for lr1 in lr1s:
    for lr2 in lr2s:
        for dp in dropout_probs:
            for wd in weight_decays:
                experiments.append({'lr1': lr1, 'lr2': lr2, 'dp': dp, 'wd': wd})

print(experiments)

for params_dict in experiments:
    print(params_dict)
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--train_dir', default='data/train-jpg/')
    parser.add_argument('--model_path', default='vgg_16.ckpt', type=str)
    parser.add_argument('--batch_size', default=100, type=int) #32
    parser.add_argument('--num_workers', default=50, type=int) #4
    parser.add_argument('--num_epochs1', default=10, type=int) #10
    parser.add_argument('--num_epochs2', default=10, type=int) #10
    parser.add_argument('--learning_rate1', default = params_dict['lr1'], type=float) #1e-3
    parser.add_argument('--learning_rate2', default = params_dict['lr2'], type=float)
    parser.add_argument('--dropout_keep_prob', default = params_dict['dp'], type=float)
    parser.add_argument('--weight_decay', default = params_dict['wd'], type=float)
    
    # For accessing args in an ipython notebook
    import sys; sys.argv=['']; del sys
    args = parser.parse_args()
    
    # Get the list of filenames and corresponding list of labels for training et validation
    # train_filenames, train_labels = list_images(args.train_dir)
    # val_filenames, val_labels = list_images(args.val_dir)
    
    all_filenames, all_labels = list_images(args.train_dir)
    
    train_filenames, train_labels, val_filenames, val_labels = split_samples(all_filenames, all_labels)
    
    num_classes = 17


    # --------------------------------------------------------------------------
    # In TensorFlow, you first want to define the computation graph with all the
    # necessary operations: loss, training op, accuracy...
    # Any tensor created in the `graph.as_default()` scope will be part of `graph`
    graph = tf.Graph()
    with graph.as_default():
        # Standard preprocessing for VGG on ImageNet taken from here:
        # https://github.com/tensorflow/models/blob/master/slim/preprocessing/vgg_preprocessing.py
        # Also see the VGG paper for more details: https://arxiv.org/pdf/1409.1556.pdf
    
        # Preprocessing (for both training and validation):
        # (1) Decode the image from jpg format
        # (2) Resize the image so its smaller side is 256 pixels long
        def _parse_function(filename, label):
            image_string = tf.read_file(filename)
            image_decoded = tf.image.decode_jpeg(image_string, channels=3)          # (1)
            image = tf.cast(image_decoded, tf.float32)
    
            smallest_side = 256.0
            height, width = tf.shape(image)[0], tf.shape(image)[1]
            height = tf.to_float(height)
            width = tf.to_float(width)
    
            scale = tf.cond(tf.greater(height, width),
                            lambda: smallest_side / width,
                             lambda: smallest_side / height)
            new_height = tf.to_int32(height * scale)
            new_width = tf.to_int32(width * scale)
    
            resized_image = tf.image.resize_images(image, [new_height, new_width])  # (2)
            return resized_image, label

        # Preprocessing (for training)
        # (3) Take a random 224x224 crop to the scaled image
        # (4) Horizontally flip the image with probability 1/2
        # (5) Substract the per color mean `VGG_MEAN`
        # Note: we don't normalize the data here, as VGG was trained without normalization
        def training_preprocess(image, label):
            crop_image = tf.random_crop(image, [224, 224, 3])                       # (3)
            flip_image = tf.image.random_flip_left_right(crop_image)                # (4)
    
            means = tf.reshape(tf.constant(VGG_MEAN), [1, 1, 3])
            centered_image = flip_image - means                                     # (5)
    
            return centered_image, label
    
        # Preprocessing (for validation)
        # (3) Take a central 224x224 crop to the scaled image
        # (4) Substract the per color mean `VGG_MEAN`
        # Note: we don't normalize the data here, as VGG was trained without normalization
        def val_preprocess(image, label):
            crop_image = tf.image.resize_image_with_crop_or_pad(image, 224, 224)    # (3)
    
            means = tf.reshape(tf.constant(VGG_MEAN), [1, 1, 3])
            centered_image = crop_image - means                                     # (4)
    
            return centered_image, label
    
            # ----------------------------------------------------------------------
            # DATASET CREATION using tf.contrib.data.Dataset
            # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/data
        
            # The tf.contrib.data.Dataset framework uses queues in the background to feed in
            # data to the model.
            # We initialize the dataset with a list of filenames and labels, and then apply
        # the preprocessing functions described above.
        # Behind the scenes, queues will load the filenames, preprocess them with multiple
        # threads and apply the preprocessing in parallel, and then batch the data
    
        # Training dataset
        train_filenames = tf.constant(train_filenames)
        train_labels = tf.constant(train_labels)
        train_dataset = tf.contrib.data.Dataset.from_tensor_slices((train_filenames, train_labels))
        train_dataset = train_dataset.map(_parse_function,
           num_threads=args.num_workers, output_buffer_size=args.batch_size)
        train_dataset = train_dataset.map(training_preprocess,
           num_threads=args.num_workers, output_buffer_size=args.batch_size)
        train_dataset = train_dataset.shuffle(buffer_size=10000)  # don't forget to shuffle
        batched_train_dataset = train_dataset.batch(args.batch_size)
    
        # Validation dataset
        val_filenames = tf.constant(val_filenames)
        val_labels = tf.constant(val_labels)
        val_dataset = tf.contrib.data.Dataset.from_tensor_slices((val_filenames, val_labels))
        val_dataset = val_dataset.map(_parse_function,
        num_threads=args.num_workers, output_buffer_size=args.batch_size)
        val_dataset = val_dataset.map(val_preprocess,
        num_threads=args.num_workers, output_buffer_size=args.batch_size)
        batched_val_dataset = val_dataset.batch(args.batch_size)

        print("dataset created")
        # Now we define an iterator that can operator on either dataset.
        # The iterator can be reinitialized by calling:
        #     - sess.run(train_init_op) for 1 epoch on the training set
        #     - sess.run(val_init_op)   for 1 epoch on the valiation set
        # Once this is done, we don't need to feed any value for images and labels
        # as they are automatically pulled out from the iterator queues.
    
        # A reinitializable iterator is defined by its structure. We could use the
        # `output_types` and `output_shapes` properties of either `train_dataset`
        # or `validation_dataset` here, because they are compatible.
        iterator = tf.contrib.data.Iterator.from_structure(batched_train_dataset.output_types,
                                                           batched_train_dataset.output_shapes)
        images, labels = iterator.get_next()
        train_init_op = iterator.make_initializer(batched_train_dataset)
        val_init_op = iterator.make_initializer(batched_val_dataset)
    
        # Indicates whether we are in training or in test mode
        is_training = tf.placeholder(tf.bool)
    
        # ---------------------------------------------------------------------
        # Now that we have set up the data, it's time to set up the model.
        # For this example, we'll use VGG-16 pretrained on ImageNet. We will remove the
        # last fully connected layer (fc8) and replace it with our own, with an
        # output size num_classes=8
        # We will first train the last layer for a few epochs.
        # Then we will train the entire model on our dataset for a few epochs.
    
        # Get the pretrained model, specifying the num_classes argument to create a new
        # fully connected replacing the last one, called "vgg_16/fc8"
        # Each model has a different architecture, so "vgg_16/fc8" will change in another model.
        # Here, logits gives us directly the predicted scores we wanted from the images.
        # We pass a scope to initialize "vgg_16/fc8" weights with he_initializer
        vgg = tf.contrib.slim.nets.vgg
        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=args.weight_decay)):
            logits, _ = vgg.vgg_16(images, num_classes=num_classes, is_training=is_training,
                                       dropout_keep_prob=args.dropout_keep_prob)
            # note: need to normalize differently during validation...
            # logits = tf.nn.l2_normalize(logits, dim=0) # tf.layers.batch_normalization(logits)  # 
            logits = tf.sigmoid(logits) # add a sigmoid layer to make scores be 0-1
    
        # Specify where the model checkpoint is (pretrained weights).
        model_path = args.model_path
        assert(os.path.isfile(model_path))
    
        # Restore only the layers up to fc7 (included)
        # Calling function `init_fn(sess)` will load all the pretrained weights.
        variables_to_restore = tf.contrib.framework.get_variables_to_restore(exclude=['vgg_16/fc8'])
        init_fn = tf.contrib.framework.assign_from_checkpoint_fn(model_path, variables_to_restore)
    
        # Initialization operation from scratch for the new "fc8" layers
        # `get_variables` will only return the variables whose name starts with the given pattern
        fc8_variables = tf.contrib.framework.get_variables('vgg_16/fc8')
        fc8_init = tf.variables_initializer(fc8_variables)
    
        # ---------------------------------------------------------------------
        # Using tf.losses, any loss is added to the tf.GraphKeys.LOSSES collection
        # We can then call the total loss easily
        # tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 
        tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits) # softmax cross entropy loss so can have labels with multiple classes
        loss = tf.losses.get_total_loss()  
    
        # First we want to train only the reinitialized last layer fc8 for a few epochs.
        # We run minimize the loss only with respect to the fc8 variables (weight and bias).
        fc8_optimizer = tf.train.GradientDescentOptimizer(args.learning_rate1)
        fc8_train_op = fc8_optimizer.minimize(loss, var_list=fc8_variables)

        # Then we want to finetune the entire model for a few epochs.
        # We run minimize the loss only with respect to all the variables.
        full_optimizer = tf.train.GradientDescentOptimizer(args.learning_rate2)
        full_train_op = full_optimizer.minimize(loss)
    
        # Evaluation metrics
        
        tf.get_default_graph().finalize()
    
    # --------------------------------------------------------------------------
    # Now that we have built the graph and finalized it, we define the session.
    # The session is the interface to *run* the computational graph.
    # We can call our training operations with `sess.run(train_op)` for instance
    with tf.Session(graph=graph) as sess:
        init_fn(sess)  # load the pretrained weights
        sess.run(fc8_init)  # initialize the new fc8 layer
    
        # Update only the last layer for a few epochs.
        for epoch in range(args.num_epochs1):
            # Run an epoch over the training data.
            print('Starting epoch %d / %d' % (epoch + 1, args.num_epochs1))
            # Here we initialize the iterator with the training set.
            # This means that we can go through an entire epoch until the iterator becomes empty.
            sess.run(train_init_op)
            while True:
                try:
                    _, curr_loss, curr_logits, curr_labels = sess.run([fc8_train_op, loss, logits, labels], {is_training: True})
                    if curr_loss < 100:
                        print('Current loss: %f' % curr_loss)

                        # Check F1 score on logits
                        threshs = [0.05, 0.1, 0.25, 0.5, 0.75, 0.8, 0.9, 0.95]
                        F1s = []
                        for i in range(len(threshs)):
                            F1s.append(fbeta_score(curr_labels, np.array(curr_logits) > threshs[i], beta=2, average='samples'))
                        max_i = F1s.index(max(F1s))
                        print('thresh: %f, max F1: %f' %(threshs[max_i], F1s[max_i]))
                    else:
                        print("EXPLODING LOSS")
                        break
                except tf.errors.OutOfRangeError:
                    break
            
            # Print current loss
            print('Current epoch loss: %f' % curr_loss)
            
            # Check F1 score on logits
            threshs = [0.05, 0.1, 0.25, 0.5, 0.75, 0.8, 0.9, 0.95]
            F1s = []
            for i in range(len(threshs)):
                F1s.append(fbeta_score(curr_labels, np.array(curr_logits) > threshs[i], beta=2, average='samples'))
            max_i = F1s.index(max(F1s))
            print('Epoch thresh: %f, max F1: %f' %(threshs[max_i], F1s[max_i]))
            
            # Check accuracy on the train and val sets every epoch.
            # train_acc = check_accuracy(sess, correct_prediction, is_training, train_init_op)
            # val_acc = check_accuracy(sess, correct_prediction, is_training, val_init_op)
            # print('Train accuracy: %f' % train_acc)
            # print('Val accuracy: %f\n' % val_acc)
    
    
        # Train the entire model for a few more epochs, continuing with the *same* weights.
        for epoch in range(args.num_epochs2):
            print('Starting epoch %d / %d' % (epoch + 1, args.num_epochs1))
            sess.run(train_init_op)
            while True:
                try:
                    _, curr_loss, curr_logits, curr_labels = sess.run([full_train_op, loss, logits, labels], {is_training: True})
                    if curr_loss < 100:
                        print('Current loss: %f' % curr_loss)

                        # Check F1 score on logits
                        threshs = [0.5, 0.1, 0.25, 0.5, 0.75, 0.8, 0.9, 0.95]
                        F1s = []
                        for i in range(len(threshs)):
                            F1s.append(fbeta_score(curr_labels, np.array(curr_logits) > threshs[i], beta=2, average='samples'))
                        max_i = F1s.index(max(F1s))
                        print('thresh: %f, max F1: %f' %(threshs[max_i], F1s[max_i]))
                    else:
                        print("EXPLODING LOSS")
                        break
                except tf.errors.OutOfRangeError:
                    break
    
            # Print current loss
            print('Entire model epoch loss: %f' % curr_loss)
    
            # Check F1 score on logits
            threshs = [0.05, 0.1, 0.25, 0.5, 0.75, 0.8, 0.9, 0.95]
            F1s = []
            for i in range(len(threshs)):
                F1s.append(fbeta_score(curr_labels, np.array(curr_logits) > threshs[i], beta=2, average='samples'))
            max_i = F1s.index(max(F1s))
            print('Entire model epoch thresh: %f, max F1: %f' %(threshs[max_i], F1s[max_i]))
            
            # Check accuracy on the train and val sets every epoch
            # train_acc = check_accuracy(sess, correct_prediction, is_training, train_init_op)
            # val_acc = check_accuracy(sess, correct_prediction, is_training, val_init_op)
            # print('Train accuracy: %f' % train_acc)
            # print('Val accuracy: %f\n' % val_acc)
    

100%|██████████| 40479/40479 [00:00<00:00, 412275.49it/s]

[{'lr1': 5e-05, 'wd': 0.0005, 'lr2': 1e-06, 'dp': 0.5}, {'lr1': 5e-05, 'wd': 0.0001, 'lr2': 1e-06, 'dp': 0.5}, {'lr1': 5e-05, 'wd': 0.0005, 'lr2': 1e-06, 'dp': 0.4}, {'lr1': 5e-05, 'wd': 0.0001, 'lr2': 1e-06, 'dp': 0.4}, {'lr1': 5e-05, 'wd': 0.0005, 'lr2': 1e-06, 'dp': 0.3}, {'lr1': 5e-05, 'wd': 0.0001, 'lr2': 1e-06, 'dp': 0.3}, {'lr1': 5e-05, 'wd': 0.0005, 'lr2': 1e-07, 'dp': 0.5}, {'lr1': 5e-05, 'wd': 0.0001, 'lr2': 1e-07, 'dp': 0.5}, {'lr1': 5e-05, 'wd': 0.0005, 'lr2': 1e-07, 'dp': 0.4}, {'lr1': 5e-05, 'wd': 0.0001, 'lr2': 1e-07, 'dp': 0.4}, {'lr1': 5e-05, 'wd': 0.0005, 'lr2': 1e-07, 'dp': 0.3}, {'lr1': 5e-05, 'wd': 0.0001, 'lr2': 1e-07, 'dp': 0.3}, {'lr1': 5e-05, 'wd': 0.0005, 'lr2': 1e-05, 'dp': 0.5}, {'lr1': 5e-05, 'wd': 0.0001, 'lr2': 1e-05, 'dp': 0.5}, {'lr1': 5e-05, 'wd': 0.0005, 'lr2': 1e-05, 'dp': 0.4}, {'lr1': 5e-05, 'wd': 0.0001, 'lr2': 1e-05, 'dp': 0.4}, {'lr1': 5e-05, 'wd': 0.0005, 'lr2': 1e-05, 'dp': 0.3}, {'lr1': 5e-05, 'wd': 0.0001, 'lr2': 1e-05, 'dp': 0.3}, {'lr1': 0


INFO:tensorflow:Restoring parameters from vgg_16.ckpt


Starting epoch 1 / 10
Current loss: 9.287038
thresh: 0.050000, max F1: 0.479649
Current loss: 8.801819
thresh: 0.050000, max F1: 0.449315
Current loss: 8.790260
thresh: 0.050000, max F1: 0.466801
Current loss: 9.502462
thresh: 0.050000, max F1: 0.484676
Current loss: 8.599808
thresh: 0.050000, max F1: 0.458769
Current loss: 9.013104
thresh: 0.050000, max F1: 0.469613
Current loss: 9.512433
thresh: 0.050000, max F1: 0.479807
Current loss: 9.246086
thresh: 0.050000, max F1: 0.481469
Current loss: 8.470780
thresh: 0.050000, max F1: 0.454534
Current loss: 9.002193
thresh: 0.050000, max F1: 0.465095
Current loss: 8.778718
thresh: 0.050000, max F1: 0.457039
Current loss: 9.415797
thresh: 0.050000, max F1: 0.482372
Current loss: 9.022259
thresh: 0.050000, max F1: 0.464446
Current loss: 9.482093
thresh: 0.050000, max F1: 0.490504
Current loss: 9.217441
thresh: 0.050000, max F1: 0.473030
Current loss: 9.736200
thresh: 0.050000, max F1: 0.495845
Current loss: 8.978197
thresh: 0.050000, max F1: 0

  'precision', 'predicted', average, warn_for)
100%|██████████| 40479/40479 [00:00<00:00, 411401.37it/s]


thresh: 0.900000, max F1: 0.653014
Entire model epoch loss: 7.471708
Entire model epoch thresh: 0.900000, max F1: 0.653014
{'lr1': 5e-05, 'wd': 0.0001, 'lr2': 1e-06, 'dp': 0.5}
listed
dataset created
INFO:tensorflow:Restoring parameters from vgg_16.ckpt



INFO:tensorflow:Restoring parameters from vgg_16.ckpt


Starting epoch 1 / 10
Current loss: 8.442378
thresh: 0.050000, max F1: 0.488683
Current loss: 8.146644
thresh: 0.050000, max F1: 0.477025
Current loss: 8.585183
thresh: 0.050000, max F1: 0.497454
Current loss: 8.851315
thresh: 0.050000, max F1: 0.504689
Current loss: 8.139172
thresh: 0.100000, max F1: 0.493383
Current loss: 8.031407
thresh: 0.050000, max F1: 0.472489
Current loss: 8.249376
thresh: 0.100000, max F1: 0.485940
Current loss: 8.669520
thresh: 0.100000, max F1: 0.506093
Current loss: 8.198097
thresh: 0.050000, max F1: 0.481376
Current loss: 8.444810
thresh: 0.050000, max F1: 0.490483
Current loss: 8.665749
thresh: 0.050000, max F1: 0.496006
Current loss: 7.741804
thresh: 0.050000, max F1: 0.461833
Current loss: 8.114803
thresh: 0.050000, max F1: 0.465148
Current loss: 8.206464
thresh: 0.050000, max F1: 0.495526
Current loss: 8.807316
thresh: 0.100000, max F1: 0.518530
Current loss: 9.226366
thresh: 0.050000, max F1: 0.516154
Current loss: 8.031628
thresh: 0.050000, max F1: 0

100%|██████████| 40479/40479 [00:00<00:00, 156162.75it/s]


thresh: 0.950000, max F1: 0.668858
Entire model epoch loss: 7.294249
Entire model epoch thresh: 0.950000, max F1: 0.668858
{'lr1': 5e-05, 'wd': 0.0005, 'lr2': 1e-06, 'dp': 0.4}
listed
dataset created
INFO:tensorflow:Restoring parameters from vgg_16.ckpt



INFO:tensorflow:Restoring parameters from vgg_16.ckpt


Starting epoch 1 / 10
Current loss: 8.495043
thresh: 0.050000, max F1: 0.464938
Current loss: 9.251058
thresh: 0.050000, max F1: 0.494964
Current loss: 8.758061
thresh: 0.050000, max F1: 0.470365
Current loss: 9.291769
thresh: 0.050000, max F1: 0.479408
Current loss: 9.139052
thresh: 0.050000, max F1: 0.482012
Current loss: 8.836327
thresh: 0.050000, max F1: 0.474273
Current loss: 8.342544
thresh: 0.050000, max F1: 0.445026
Current loss: 8.890888
thresh: 0.050000, max F1: 0.477968
Current loss: 8.849143
thresh: 0.050000, max F1: 0.463897
Current loss: 8.550283
thresh: 0.050000, max F1: 0.464697
Current loss: 9.364493
thresh: 0.050000, max F1: 0.494370
Current loss: 9.006965
thresh: 0.050000, max F1: 0.484857
Current loss: 8.826713
thresh: 0.050000, max F1: 0.474625
Current loss: 9.012724
thresh: 0.050000, max F1: 0.489380
Current loss: 8.892563
thresh: 0.050000, max F1: 0.471240
Current loss: 8.928363
thresh: 0.050000, max F1: 0.465413
Current loss: 9.102937
thresh: 0.050000, max F1: 0

KeyboardInterrupt: 