In [1]:
"""
Uses tf.contrib.data module which is in release candidate 1.2.0rc0
Based on:
    - PyTorch example from Justin Johnson:
      https://gist.github.com/jcjohnson/6e41e8512c17eae5da50aebef3378a4c
      - https://gist.github.com/omoindrot/dedc857cdc0e680dfb1be99762990c9c
Required packages: tensorflow (v1.2)
You can install the release candidate 1.2.0rc0 here:
https://www.tensorflow.org/versions/r1.2/install/

Download the weights trained on ImageNet for VGG:
```
wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz
tar -xvf vgg_16_2016_08_28.tar.gz
rm vgg_16_2016_08_28.tar.gz
```
"""

%matplotlib inline

from matplotlib import pyplot as plt

import argparse
import os

import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets

from vggnet_utils import *

VGG_MEAN = [123.68, 116.78, 103.94]

print(tf.__version__)

1.2.0-rc1


In [11]:
experiments = [] # list with dict of params for each experiment

lr1s = [1e-3] # 5e-5, 
lr2s = [1e-5] # 1e-6, 1e-7, 
dropout_probs = [0.4] # 0.5, 0.3
weight_decays = [1e-4] # 5e-4, 

# create a dict of experiments
for lr1 in lr1s:
    for lr2 in lr2s:
        for dp in dropout_probs:
            for wd in weight_decays:
                experiments.append({'lr1': lr1, 'lr2': lr2, 'dp': dp, 'wd': wd})

print(experiments)

loss_log = []
f_log = []
thresh_log = []

for params_dict in experiments:
    print(params_dict)
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--train_dir', default='data/train-jpg/')
    parser.add_argument('--model_path', default='vgg_16.ckpt', type=str)
    parser.add_argument('--batch_size', default=100, type=int) #32
    parser.add_argument('--num_workers', default=50, type=int) #4
    parser.add_argument('--num_epochs1', default=10, type=int) #10
    parser.add_argument('--num_epochs2', default=1, type=int) #10
    parser.add_argument('--learning_rate1', default = params_dict['lr1'], type=float) #1e-3
    parser.add_argument('--learning_rate2', default = params_dict['lr2'], type=float)
    parser.add_argument('--dropout_keep_prob', default = params_dict['dp'], type=float)
    parser.add_argument('--weight_decay', default = params_dict['wd'], type=float)
    
    # For accessing args in an ipython notebook
    import sys; sys.argv=['']; del sys
    args = parser.parse_args()
    
    # Get the list of filenames and corresponding list of labels for training et validation
    # train_filenames, train_labels = list_images(args.train_dir)
    # val_filenames, val_labels = list_images(args.val_dir)
    
    all_filenames, all_labels = list_images(args.train_dir)
    
    train_filenames, train_labels, val_filenames, val_labels = split_samples(all_filenames, all_labels)
    
    num_classes = 17


    # --------------------------------------------------------------------------
    # In TensorFlow, you first want to define the computation graph with all the
    # necessary operations: loss, training op, accuracy...
    # Any tensor created in the `graph.as_default()` scope will be part of `graph`
    graph = tf.Graph()
    with graph.as_default():
        # Standard preprocessing for VGG on ImageNet taken from here:
        # https://github.com/tensorflow/models/blob/master/slim/preprocessing/vgg_preprocessing.py
        # Also see the VGG paper for more details: https://arxiv.org/pdf/1409.1556.pdf
    
        # Preprocessing (for both training and validation):
        # (1) Decode the image from jpg format
        # (2) Resize the image so its smaller side is 256 pixels long
        def _parse_function(filename, label):
            image_string = tf.read_file(filename)
            image_decoded = tf.image.decode_jpeg(image_string, channels=3)          # (1)
            image = tf.cast(image_decoded, tf.float32)
    
            smallest_side = 256.0
            height, width = tf.shape(image)[0], tf.shape(image)[1]
            height = tf.to_float(height)
            width = tf.to_float(width)
    
            scale = tf.cond(tf.greater(height, width),
                            lambda: smallest_side / width,
                             lambda: smallest_side / height)
            new_height = tf.to_int32(height * scale)
            new_width = tf.to_int32(width * scale)
    
            resized_image = tf.image.resize_images(image, [new_height, new_width])  # (2)
            return resized_image, label

        # Preprocessing (for training)
        # (3) Take a random 224x224 crop to the scaled image
        # (4) Horizontally flip the image with probability 1/2
        # (5) Substract the per color mean `VGG_MEAN`
        # Note: we don't normalize the data here, as VGG was trained without normalization
        def training_preprocess(image, label):
            crop_image = tf.random_crop(image, [224, 224, 3])                       # (3)
            flip_image = tf.image.random_flip_left_right(crop_image)                # (4)
    
            means = tf.reshape(tf.constant(VGG_MEAN), [1, 1, 3])
            centered_image = flip_image - means                                     # (5)
    
            return centered_image, label
    
        # Preprocessing (for validation)
        # (3) Take a central 224x224 crop to the scaled image
        # (4) Substract the per color mean `VGG_MEAN`
        # Note: we don't normalize the data here, as VGG was trained without normalization
        def val_preprocess(image, label):
            crop_image = tf.image.resize_image_with_crop_or_pad(image, 224, 224)    # (3)
    
            means = tf.reshape(tf.constant(VGG_MEAN), [1, 1, 3])
            centered_image = crop_image - means                                     # (4)
    
            return centered_image, label
    
            # ----------------------------------------------------------------------
            # DATASET CREATION using tf.contrib.data.Dataset
            # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/data
        
            # The tf.contrib.data.Dataset framework uses queues in the background to feed in
            # data to the model.
            # We initialize the dataset with a list of filenames and labels, and then apply
        # the preprocessing functions described above.
        # Behind the scenes, queues will load the filenames, preprocess them with multiple
        # threads and apply the preprocessing in parallel, and then batch the data
    
        # Training dataset
        train_filenames = tf.constant(train_filenames)
        train_labels = tf.constant(train_labels)
        train_dataset = tf.contrib.data.Dataset.from_tensor_slices((train_filenames, train_labels))
        train_dataset = train_dataset.map(_parse_function,
           num_threads=args.num_workers, output_buffer_size=args.batch_size)
        train_dataset = train_dataset.map(training_preprocess,
           num_threads=args.num_workers, output_buffer_size=args.batch_size)
        train_dataset = train_dataset.shuffle(buffer_size=10000)  # don't forget to shuffle
        batched_train_dataset = train_dataset.batch(args.batch_size)
    
        # Validation dataset
        val_filenames = tf.constant(val_filenames)
        val_labels = tf.constant(val_labels)
        val_dataset = tf.contrib.data.Dataset.from_tensor_slices((val_filenames, val_labels))
        val_dataset = val_dataset.map(_parse_function,
        num_threads=args.num_workers, output_buffer_size=args.batch_size)
        val_dataset = val_dataset.map(val_preprocess,
        num_threads=args.num_workers, output_buffer_size=args.batch_size)
        batched_val_dataset = val_dataset.batch(args.batch_size)

        print("dataset created")
        # Now we define an iterator that can operator on either dataset.
        # The iterator can be reinitialized by calling:
        #     - sess.run(train_init_op) for 1 epoch on the training set
        #     - sess.run(val_init_op)   for 1 epoch on the valiation set
        # Once this is done, we don't need to feed any value for images and labels
        # as they are automatically pulled out from the iterator queues.
    
        # A reinitializable iterator is defined by its structure. We could use the
        # `output_types` and `output_shapes` properties of either `train_dataset`
        # or `validation_dataset` here, because they are compatible.
        iterator = tf.contrib.data.Iterator.from_structure(batched_train_dataset.output_types,
                                                           batched_train_dataset.output_shapes)
        images, labels = iterator.get_next()
        train_init_op = iterator.make_initializer(batched_train_dataset)
        val_init_op = iterator.make_initializer(batched_val_dataset)
    
        # Indicates whether we are in training or in test mode
        is_training = tf.placeholder(tf.bool)
    
        # ---------------------------------------------------------------------
        # Now that we have set up the data, it's time to set up the model.
        # For this example, we'll use VGG-16 pretrained on ImageNet. We will remove the
        # last fully connected layer (fc8) and replace it with our own, with an
        # output size num_classes=8
        # We will first train the last layer for a few epochs.
        # Then we will train the entire model on our dataset for a few epochs.
    
        # Get the pretrained model, specifying the num_classes argument to create a new
        # fully connected replacing the last one, called "vgg_16/fc8"
        # Each model has a different architecture, so "vgg_16/fc8" will change in another model.
        # Here, logits gives us directly the predicted scores we wanted from the images.
        # We pass a scope to initialize "vgg_16/fc8" weights with he_initializer
        vgg = tf.contrib.slim.nets.vgg
        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=args.weight_decay)):
            logits, _ = vgg.vgg_16(images, num_classes=num_classes, is_training=is_training,
                                       dropout_keep_prob=args.dropout_keep_prob)
            logits = tf.sigmoid(logits) # add a sigmoid layer to make scores be 0-1
    
        # Specify where the model checkpoint is (pretrained weights).
        model_path = args.model_path
        assert(os.path.isfile(model_path))
    
        # Restore only the layers up to fc6 (included)
        # Calling function `init_fn(sess)` will load all the pretrained weights.
        variables_to_restore = tf.contrib.framework.get_variables_to_restore(exclude=['vgg_16/fc8', 'vgg_16/fc7', 'vgg_16/fc6'])
        init_fn = tf.contrib.framework.assign_from_checkpoint_fn(model_path, variables_to_restore)
    
        # Initialization operation from scratch for the new "fc6" layers
        # `get_variables` will only return the variables whose name starts with the given pattern
        fc6_variables = tf.contrib.framework.get_variables('vgg_16/fc6')
        fc6_init = tf.variables_initializer(fc6_variables)
        
        # Initialization operation from scratch for the new "fc7" layers
        # `get_variables` will only return the variables whose name starts with the given pattern
        fc7_variables = tf.contrib.framework.get_variables('vgg_16/fc7')
        fc7_init = tf.variables_initializer(fc7_variables)
        
        # Initialization operation from scratch for the new "fc8" layers
        # `get_variables` will only return the variables whose name starts with the given pattern
        fc8_variables = tf.contrib.framework.get_variables('vgg_16/fc8')
        fc8_init = tf.variables_initializer(fc8_variables)
        
        # Initialize additional fully connected layer
        fc9_variables = 
    
        # ---------------------------------------------------------------------
        # Using tf.losses, any loss is added to the tf.GraphKeys.LOSSES collection
        # We can then call the total loss easily
        # tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 
        tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits) # softmax cross entropy loss so can have labels with multiple classes
        loss = tf.losses.get_total_loss()  
    
        # First we want to train only the reinitialized last layer fc8 for a few epochs.
        # We run minimize the loss only with respect to the fc8 variables (weight and bias).
        fc876_optimizer = tf.train.GradientDescentOptimizer(args.learning_rate1)
        fc876_train_op = fc876_optimizer.minimize(loss, var_list=[fc8_variables, fc7_variables, fc6_variables])

        # Then we want to finetune the entire model for a few epochs.
        # We run minimize the loss only with respect to all the variables.
        full_optimizer = tf.train.GradientDescentOptimizer(args.learning_rate2)
        full_train_op = full_optimizer.minimize(loss)
    
        # Evaluation metrics
        
        tf.get_default_graph().finalize()
    
    # --------------------------------------------------------------------------
    # Now that we have built the graph and finalized it, we define the session.
    # The session is the interface to *run* the computational graph.
    # We can call our training operations with `sess.run(train_op)` for instance
    with tf.Session(graph=graph) as sess:
        init_fn(sess)  # load the pretrained weights
        sess.run(fc8_init)  # initialize the new fc8 layer
        sess.run(fc7_init) # initialize fc7
        sess.run(fc6_init) # initialize fc6

        # Update only the last layer for a few epochs.
        for epoch in range(args.num_epochs1):
            # Run an epoch over the training data.
            print('Starting epoch %d / %d' % (epoch + 1, args.num_epochs1))
            # Here we initialize the iterator with the training set.
            # This means that we can go through an entire epoch until the iterator becomes empty.
            sess.run(train_init_op)
            while True:
                try:
                    _, curr_loss, curr_logits, curr_labels = sess.run([fc876_train_op, loss, logits, labels], {is_training: True})
                    loss_log.append(curr_loss)
                    if curr_loss < 100:
                        print('Current loss: %f' % curr_loss)

                        # Check F score on logits
                        threshs = [0.05, 0.1, 0.25, 0.5, 0.75, 0.8, 0.9, 0.95]
                        Fs = []
                        for i in range(len(threshs)):
                            Fs.append(fbeta_score(curr_labels, np.array(curr_logits) > threshs[i], beta=2, average='samples'))
                        max_i = Fs.index(max(Fs))
                        print('thresh: %f, max F: %f' %(threshs[max_i], Fs[max_i]))
                        thresh_log.append(threshs[max_i])
                        f_log.append(Fs[max_i])
                    else:
                        print("EXPLODING LOSS")
                        break
                except tf.errors.OutOfRangeError:
                    break
            
            # Print current loss
            print('Current epoch loss: %f' % curr_loss)
            
            # Check F score on logits
            threshs = [0.05, 0.1, 0.25, 0.5, 0.75, 0.8, 0.9, 0.95]
            Fs = []
            for i in range(len(threshs)):
                Fs.append(fbeta_score(curr_labels, np.array(curr_logits) > threshs[i], beta=2, average='samples'))
            max_i = Fs.index(max(Fs))
            print('Epoch thresh: %f, max F: %f' %(threshs[max_i], Fs[max_i]))
            
            # Check accuracy on the train and val sets every epoch.
            # train_acc = check_accuracy(sess, correct_prediction, is_training, train_init_op)
            # val_acc = check_accuracy(sess, correct_prediction, is_training, val_init_op)
            # print('Train accuracy: %f' % train_acc)
            # print('Val accuracy: %f\n' % val_acc)
    
    
        # Train the entire model for a few more epochs, continuing with the *same* weights.
        for epoch in range(args.num_epochs2):
            print('Starting epoch %d / %d' % (epoch + 1, args.num_epochs2))
            sess.run(train_init_op)
            while True:
                try:
                    _, curr_loss, curr_logits, curr_labels = sess.run([full_train_op, loss, logits, labels], {is_training: True})
                    loss_log.append(curr_loss)

                    if curr_loss < 100:
                        print('Current loss: %f' % curr_loss)

                        # Check F score on logits
                        threshs = [0.5, 0.1, 0.25, 0.5, 0.75, 0.8, 0.9, 0.95]
                        Fs = []
                        for i in range(len(threshs)):
                            Fs.append(fbeta_score(curr_labels, np.array(curr_logits) > threshs[i], beta=2, average='samples'))
                        max_i = Fs.index(max(Fs))
                        print('thresh: %f, max F: %f' %(threshs[max_i], Fs[max_i]))
                        thresh_log.append(threshs[max_i])
                        f_log.append(Fs[max_i])
                    else:
                        print("EXPLODING LOSS")
                        break
                except tf.errors.OutOfRangeError:
                    break
    
            # Print current loss
            print('Entire model epoch loss: %f' % curr_loss)
    
            # Check F score on logits
            threshs = [0.05, 0.1, 0.25, 0.5, 0.75, 0.8, 0.9, 0.95]
            Fs = []
            for i in range(len(threshs)):
                Fs.append(fbeta_score(curr_labels, np.array(curr_logits) > threshs[i], beta=2, average='samples'))
            max_i = Fs.index(max(Fs))
            print('Entire model epoch thresh: %f, max F: %f' %(threshs[max_i], Fs[max_i]))
            
            # Check accuracy on the train and val sets every epoch
            # train_acc = check_accuracy(sess, correct_prediction, is_training, train_init_op)
            # val_acc = check_accuracy(sess, correct_prediction, is_training, val_init_op)
            # print('Train accuracy: %f' % train_acc)
            # print('Val accuracy: %f\n' % val_acc)
    

100%|██████████| 40479/40479 [00:00<00:00, 390299.89it/s]

[{'lr2': 1e-05, 'wd': 0.0001, 'dp': 0.4, 'lr1': 0.001}]
{'lr2': 1e-05, 'wd': 0.0001, 'dp': 0.4, 'lr1': 0.001}
listed
dataset created
INFO:tensorflow:Restoring parameters from vgg_16.ckpt



INFO:tensorflow:Restoring parameters from vgg_16.ckpt


Starting epoch 1 / 10
Current loss: 7.705766
thresh: 0.250000, max F: 0.473975
Current loss: 8.107130
thresh: 0.800000, max F: 0.630032
Current loss: 7.674615
thresh: 0.800000, max F: 0.658615
Current loss: 8.124624
thresh: 0.900000, max F: 0.684927
Current loss: 8.112829
thresh: 0.750000, max F: 0.642529
Current loss: 7.545458
thresh: 0.900000, max F: 0.649560
Current loss: 7.540549
thresh: 0.900000, max F: 0.675468
Current loss: 7.616459
thresh: 0.750000, max F: 0.627566
Current loss: 7.288611
thresh: 0.900000, max F: 0.658123
Current loss: 7.957115
thresh: 0.750000, max F: 0.644743
Current loss: 7.737102
thresh: 0.950000, max F: 0.665767
Current loss: 7.825218
thresh: 0.800000, max F: 0.646709
Current loss: 7.398033
thresh: 0.900000, max F: 0.661985
Current loss: 8.405693
thresh: 0.800000, max F: 0.688250
Current loss: 7.999835
thresh: 0.900000, max F: 0.669203
Current loss: 8.602598
thresh: 0.800000, max F: 0.682773
Current loss: 7.371038
thresh: 0.900000, max F: 0.688544
Current l

  'precision', 'predicted', average, warn_for)


KeyboardInterrupt: 

In [9]:
loss_log

[8.4865713,
 8.9777699,
 8.3237562,
 8.3305616,
 8.3337154,
 8.0930071,
 8.8884449,
 7.9526343,
 8.2214165,
 7.9075079,
 7.837399,
 7.8236284,
 8.1950054,
 8.2363329,
 7.5359364,
 7.8460698,
 8.1068268,
 8.0083027,
 7.6284266,
 7.6762023,
 7.8359299,
 8.247345,
 7.7166495,
 7.9537516,
 7.7317286,
 8.2916603,
 7.4644785,
 7.9027176,
 7.4496465,
 7.3289752,
 7.4514861,
 7.4237981,
 7.7085338,
 8.0786133,
 8.1121902,
 8.4150343,
 7.5011468,
 7.5444694,
 7.5887737,
 7.4624319,
 7.8495746,
 8.3187094,
 7.6243014,
 7.4161825,
 7.3927355,
 7.4336648,
 7.6829796,
 7.925106,
 7.0129528,
 6.9112325,
 7.5184927,
 7.7016068,
 7.9828238,
 6.8553009,
 7.3367553,
 7.4809208,
 7.1702213,
 7.702527,
 7.6048265,
 7.6174836,
 7.7716861,
 7.6493282,
 7.1154957,
 7.558239,
 7.0838046,
 7.6624026,
 7.420279,
 7.4077821,
 7.3571291,
 7.3151789,
 6.7448716,
 7.5804701,
 7.8100247,
 7.4837108,
 7.4721327,
 8.1705723,
 7.4240775,
 7.6582222,
 7.5887871,
 7.1351242,
 7.3274717,
 7.5731592,
 7.881537,
 7.1880417,

In [6]:
thresh_log

[0.05, 0.05, 0.05, 0.1, 0.1]

In [7]:
f_log

[0.46531563784031454,
 0.44781595830271975,
 0.48591363878800875,
 0.4691980017305285,
 0.48949742574814081]