In [53]:
"""
Uses tf.contrib.data module which is in release candidate 1.2.0rc0
Based on:
    - PyTorch example from Justin Johnson:
      https://gist.github.com/jcjohnson/6e41e8512c17eae5da50aebef3378a4c
      - https://gist.github.com/omoindrot/dedc857cdc0e680dfb1be99762990c9c
Required packages: tensorflow (v1.2)
You can install the release candidate 1.2.0rc0 here:
https://www.tensorflow.org/versions/r1.2/install/

Download the weights trained on ImageNet for VGG:
```
wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz
tar -xvf vgg_16_2016_08_28.tar.gz
rm vgg_16_2016_08_28.tar.gz
```
"""

%matplotlib inline

from matplotlib import pyplot as plt

import argparse
import os

import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets


parser = argparse.ArgumentParser()
# parser.add_argument('--train_dir', default='coco-animals/train')
parser.add_argument('--train_dir', default='data/train-jpg/')
# parser.add_argument('--val_dir', default='coco-animals/val')
# parser.add_argument('--val_dir', default='data/')
parser.add_argument('--model_path', default='vgg_16.ckpt', type=str)
parser.add_argument('--batch_size', default=40, type=int) #32
parser.add_argument('--num_workers', default=20, type=int) #4
parser.add_argument('--num_epochs1', default=4, type=int) #10
parser.add_argument('--num_epochs2', default=4, type=int) #10
parser.add_argument('--learning_rate1', default=1e-3, type=float)
parser.add_argument('--learning_rate2', default=1e-5, type=float)
parser.add_argument('--dropout_keep_prob', default=0.5, type=float)
parser.add_argument('--weight_decay', default=5e-4, type=float)


VGG_MEAN = [123.68, 116.78, 103.94]

print(tf.__version__)

1.2.0-rc1


In [7]:
import pandas as pd
import cv2
import os
import numpy as np
from tqdm import tqdm

# read in the data files 
x = []
x_test = []
y = [] # one hot encoding of which of the 17 tags this image is (images can have multiple tags)

df_train = pd.read_csv('data/train_v2.csv') # column1: image_name, column2: tags (labels for image file)

flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))
label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

# for each filename and targets, read in pixel values from file, save image (resized to 32 x 32) to x, labels vector to y
for f, tags in tqdm(df_train.values, miniters=1000):
    img = cv2.imread('data/train-jpg/{}.jpg'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    x.append(cv2.resize(img, (32, 32)))
    y.append(targets)

  0%|          | 0/40479 [00:00<?, ?it/s]


KeyboardInterrupt: 

In [None]:
y = np.array(y, np.uint8)
x = np.array(x, np.float16) / 255.

split = 35000
x_train, x_valid, y_train, y_valid = x[:split], x[split:], y[:split], y[split:]

In [None]:
# view some images

plt.imshow(x[0])
plt.show()
#plt.imshow(x[1])
#plt.show()
print(x[1].shape)
plt.imshow(x[10][:, :, 0])
plt.show()
plt.imshow(x[10][:, :, 1])
plt.show()
plt.imshow(x[10][:, :, 2])

plt.show()


print('raw_image')
img_ex = cv2.imread('data/train-jpg/{}.jpg'.format('train_0'))
plt.imshow(img_ex)
plt.show()


plt.imshow(x[1])
plt.show()

img_ex = cv2.imread('data/train-jpg/{}.jpg'.format('train_1'))
plt.imshow(img_ex)
plt.show()

print(y[0], y[1], y[0].shape)


In [2]:
import pandas as pd
filenames_targets = pd.read_csv('data/train_v2.csv') # column1: image_name, column2: tags (labels for image file)

In [54]:
# Helper functions
import pandas as pd
import random
from tqdm import tqdm
import numpy as np
from sklearn.metrics import fbeta_score

"""
# read in the data files 
x = []
x_test = []
y = [] # one hot encoding of which of the 17 tags this image is (images can have multiple tags)

df_train = pd.read_csv('data/train_v2.csv') # column1: image_name, column2: tags (labels for image file)

flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))
label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

# for each filename and targets, read in pixel values from file, save image (resized to 32 x 32) to x, labels vector to y
for f, tags in tqdm(df_train.values, miniters=1000):
    img = cv2.imread('data/train-jpg/{}.jpg'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    x.append(cv2.resize(img, (32, 32)))
    y.append(targets)
"""
    

def list_images(directory):
    """
    Get all the images and labels from data/train_v2.csv
    """
    filenames_targets = pd.read_csv('data/train_v2.csv') # column1: image_name, column2: tags (labels for image file)
    filenames = [directory + file + '.jpg' for file in filenames_targets['image_name'].tolist()]
    
    labels = filenames_targets['tags'].tolist()

    # for now just use integers instead of one hot encoding (with multiple ones possible)
    # unique_labels = list(set(labels))

    # label_to_int = {}
    # for i, label in enumerate(unique_labels):
    #     label_to_int[label] = i

    # labels = [label_to_int[l] for l in labels]
    
    # Convert to one-hot labels
    flatten = lambda l: [item for sublist in l for item in sublist]
    labels = list(set(flatten([l.split(' ') for l in filenames_targets['tags'].values])))
    label_map = {l: i for i, l in enumerate(labels)}
    inv_label_map = {i: l for l, i in label_map.items()}

    one_hot_labels = []
    
    for f, tags in tqdm(filenames_targets.values, miniters=1000):
        targets = [0]*17 #np.zeros(17)
        for t in tags.split(' '):
            targets[label_map[t]] = 1 
        one_hot_labels.append(targets)
    
    return filenames, one_hot_labels
    
    '''
    labels = os.listdir(directory)
    files_and_labels = []
    for label in labels:
        for f in os.listdir(os.path.join(directory, label)):
            files_and_labels.append((os.path.join(directory, label, f), label))

    filenames, labels = zip(*files_and_labels)
    filenames = list(filenames)
    labels = list(labels)
    unique_labels = list(set(labels))

    label_to_int = {}
    for i, label in enumerate(unique_labels):
        label_to_int[label] = i

    labels = [label_to_int[l] for l in labels]
    '''

# change to F score
def check_accuracy(sess, correct_prediction, is_training, dataset_init_op):
    """
    Check the accuracy of the model on either train or val (depending on dataset_init_op).
    """
    # Initialize the correct dataset
    sess.run(dataset_init_op)
    num_correct, num_samples = 0, 0
    while True:
        try:
            correct_pred = sess.run(correct_prediction, {is_training: False})
            num_correct += correct_pred.sum()
            num_samples += correct_pred.shape[0]
        except tf.errors.OutOfRangeError:
            break

    # Return the fraction of datapoints that were correctly classified
    acc = float(num_correct) / num_samples
    return acc


def split_samples(all_filenames, all_labels):
    """
    Split all filenames and labels into training and test sets, return both
    """
    n = len(all_filenames)
    order = random.sample(range(n), n)
    all_filenames_random = [all_filenames[i] for i in order]
    all_labels_random = [all_labels[i] for i in order]
    
    third = int(n/3)
    val_filenames = all_filenames_random[:third]
    val_labels = all_labels_random[:third]
    train_filenames = all_filenames_random[third:]
    train_labels = all_labels_random[third:]
    
    return train_filenames, train_labels, val_filenames, val_labels 
    

In [55]:
import sys; sys.argv=['']; del sys
args = parser.parse_args()

In [None]:
# Get the list of filenames and corresponding list of labels for training et validation
# train_filenames, train_labels = list_images(args.train_dir)
# val_filenames, val_labels = list_images(args.val_dir)


all_filenames, all_labels = list_images(args.train_dir)

train_filenames, train_labels, val_filenames, val_labels = split_samples(all_filenames, all_labels)

num_classes = 17 #len(set(all_labels))


# --------------------------------------------------------------------------
# In TensorFlow, you first want to define the computation graph with all the
# necessary operations: loss, training op, accuracy...
# Any tensor created in the `graph.as_default()` scope will be part of `graph`
graph = tf.Graph()
with graph.as_default():
    # Standard preprocessing for VGG on ImageNet taken from here:
    # https://github.com/tensorflow/models/blob/master/slim/preprocessing/vgg_preprocessing.py
    # Also see the VGG paper for more details: https://arxiv.org/pdf/1409.1556.pdf

    # Preprocessing (for both training and validation):
    # (1) Decode the image from jpg format
    # (2) Resize the image so its smaller side is 256 pixels long
    def _parse_function(filename, label):
        image_string = tf.read_file(filename)
        image_decoded = tf.image.decode_jpeg(image_string, channels=3)          # (1)
        image = tf.cast(image_decoded, tf.float32)

        smallest_side = 256.0
        height, width = tf.shape(image)[0], tf.shape(image)[1]
        height = tf.to_float(height)
        width = tf.to_float(width)

        scale = tf.cond(tf.greater(height, width),
                        lambda: smallest_side / width,
                         lambda: smallest_side / height)
        new_height = tf.to_int32(height * scale)
        new_width = tf.to_int32(width * scale)

        resized_image = tf.image.resize_images(image, [new_height, new_width])  # (2)
        return resized_image, label

    # Preprocessing (for training)
    # (3) Take a random 224x224 crop to the scaled image
    # (4) Horizontally flip the image with probability 1/2
    # (5) Substract the per color mean `VGG_MEAN`
    # Note: we don't normalize the data here, as VGG was trained without normalization
    def training_preprocess(image, label):
        crop_image = tf.random_crop(image, [224, 224, 3])                       # (3)
        flip_image = tf.image.random_flip_left_right(crop_image)                # (4)

        means = tf.reshape(tf.constant(VGG_MEAN), [1, 1, 3])
        centered_image = flip_image - means                                     # (5)

        return centered_image, label

    # Preprocessing (for validation)
    # (3) Take a central 224x224 crop to the scaled image
    # (4) Substract the per color mean `VGG_MEAN`
    # Note: we don't normalize the data here, as VGG was trained without normalization
    def val_preprocess(image, label):
        crop_image = tf.image.resize_image_with_crop_or_pad(image, 224, 224)    # (3)

        means = tf.reshape(tf.constant(VGG_MEAN), [1, 1, 3])
        centered_image = crop_image - means                                     # (4)

        return centered_image, label

    # ----------------------------------------------------------------------
    # DATASET CREATION using tf.contrib.data.Dataset
    # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/data

    # The tf.contrib.data.Dataset framework uses queues in the background to feed in
    # data to the model.
    # We initialize the dataset with a list of filenames and labels, and then apply
    # the preprocessing functions described above.
    # Behind the scenes, queues will load the filenames, preprocess them with multiple
    # threads and apply the preprocessing in parallel, and then batch the data

    # Training dataset
    train_filenames = tf.constant(train_filenames)
    train_labels = tf.constant(train_labels)
    train_dataset = tf.contrib.data.Dataset.from_tensor_slices((train_filenames, train_labels))
    train_dataset = train_dataset.map(_parse_function,
       num_threads=args.num_workers, output_buffer_size=args.batch_size)
    train_dataset = train_dataset.map(training_preprocess,
       num_threads=args.num_workers, output_buffer_size=args.batch_size)
    train_dataset = train_dataset.shuffle(buffer_size=10000)  # don't forget to shuffle
    batched_train_dataset = train_dataset.batch(args.batch_size)

    # Validation dataset
    val_filenames = tf.constant(val_filenames)
    val_labels = tf.constant(val_labels)
    val_dataset = tf.contrib.data.Dataset.from_tensor_slices((val_filenames, val_labels))
    val_dataset = val_dataset.map(_parse_function,
    num_threads=args.num_workers, output_buffer_size=args.batch_size)
    val_dataset = val_dataset.map(val_preprocess,
    num_threads=args.num_workers, output_buffer_size=args.batch_size)
    batched_val_dataset = val_dataset.batch(args.batch_size)

    print("dataset created")
    # Now we define an iterator that can operator on either dataset.
    # The iterator can be reinitialized by calling:
    #     - sess.run(train_init_op) for 1 epoch on the training set
    #     - sess.run(val_init_op)   for 1 epoch on the valiation set
    # Once this is done, we don't need to feed any value for images and labels
    # as they are automatically pulled out from the iterator queues.

    # A reinitializable iterator is defined by its structure. We could use the
    # `output_types` and `output_shapes` properties of either `train_dataset`
    # or `validation_dataset` here, because they are compatible.
    iterator = tf.contrib.data.Iterator.from_structure(batched_train_dataset.output_types,
                                                       batched_train_dataset.output_shapes)
    images, labels = iterator.get_next()
    
    print('got next batch')
    train_init_op = iterator.make_initializer(batched_train_dataset)
    val_init_op = iterator.make_initializer(batched_val_dataset)

    # Indicates whether we are in training or in test mode
    is_training = tf.placeholder(tf.bool)

    # ---------------------------------------------------------------------
    # Now that we have set up the data, it's time to set up the model.
    # For this example, we'll use VGG-16 pretrained on ImageNet. We will remove the
    # last fully connected layer (fc8) and replace it with our own, with an
    # output size num_classes=8
    # We will first train the last layer for a few epochs.
    # Then we will train the entire model on our dataset for a few epochs.

    # Get the pretrained model, specifying the num_classes argument to create a new
    # fully connected replacing the last one, called "vgg_16/fc8"
    # Each model has a different architecture, so "vgg_16/fc8" will change in another model.
    # Here, logits gives us directly the predicted scores we wanted from the images.
    # We pass a scope to initialize "vgg_16/fc8" weights with he_initializer
    vgg = tf.contrib.slim.nets.vgg
    with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=args.weight_decay)):
        logits, _ = vgg.vgg_16(images, num_classes=num_classes, is_training=is_training,
                                   dropout_keep_prob=args.dropout_keep_prob)

    # Specify where the model checkpoint is (pretrained weights).
    model_path = args.model_path
    assert(os.path.isfile(model_path))

    # Restore only the layers up to fc7 (included)
    # Calling function `init_fn(sess)` will load all the pretrained weights.
    variables_to_restore = tf.contrib.framework.get_variables_to_restore(exclude=['vgg_16/fc8'])
    init_fn = tf.contrib.framework.assign_from_checkpoint_fn(model_path, variables_to_restore)

    # Initialization operation from scratch for the new "fc8" layers
    # `get_variables` will only return the variables whose name starts with the given pattern
    fc8_variables = tf.contrib.framework.get_variables('vgg_16/fc8')
    fc8_init = tf.variables_initializer(fc8_variables)

    # ---------------------------------------------------------------------
    # Using tf.losses, any loss is added to the tf.GraphKeys.LOSSES collection
    # We can then call the total loss easily
    # tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 
    tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits) # softmax cross entropy loss so can have labels with multiple classes
    loss = tf.losses.get_total_loss()  

    # First we want to train only the reinitialized last layer fc8 for a few epochs.
    # We run minimize the loss only with respect to the fc8 variables (weight and bias).
    fc8_optimizer = tf.train.GradientDescentOptimizer(args.learning_rate1)
    fc8_train_op = fc8_optimizer.minimize(loss, var_list=fc8_variables)

    # Then we want to finetune the entire model for a few epochs.
    # We run minimize the loss only with respect to all the variables.
    full_optimizer = tf.train.GradientDescentOptimizer(args.learning_rate2)
    full_train_op = full_optimizer.minimize(loss)

    # Evaluation metrics
       
    # prediction = tf.to_int32(tf.argmax(logits, 1))
    # correct_prediction = tf.equal(prediction, labels)
    # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    ########################
    # best_F1_train = -1
    # for t in np.arange(.1, .3, .01):
    #    F1 = fbeta_score(y_train, np.array(p_train) > t, beta=2, average='samples')
    #    if F1 > best_F1_train:
    #        thresh = t
    #        best_F1_train = F1

    #p_valid = model.predict(x_valid, batch_size=128)
    # thresh = 0.5
    # F1 = fbeta_score(labels, np.array(logits) > thresh, beta=2, average='samples')
    # print("F1")
    # print(F1)
    
    # print("Current results", "F1: " , F1, "threshold: ", thresh , " Dropout: ", dProb, " Filter Size: ", fSize)
    # if F1 > best_F1:
    #    best_f = fSize
    #    best_d = dProb
    #    best_thresh = thresh
    #    best_F1 = F1
    #    print("New best F1 found with", "F1: " , best_F1, "threshold: ", best_thresh , " Dropout: ", best_d, " Filter Size: ", best_f)

    
    tf.get_default_graph().finalize()

# --------------------------------------------------------------------------
# Now that we have built the graph and finalized it, we define the session.
# The session is the interface to *run* the computational graph.
# We can call our training operations with `sess.run(train_op)` for instance
with tf.Session(graph=graph) as sess:
    init_fn(sess)  # load the pretrained weights
    sess.run(fc8_init)  # initialize the new fc8 layer

    # Update only the last layer for a few epochs.
    for epoch in range(args.num_epochs1):
        # Run an epoch over the training data.
        print('Starting epoch %d / %d' % (epoch + 1, args.num_epochs1))
        # Here we initialize the iterator with the training set.
        # This means that we can go through an entire epoch until the iterator becomes empty.
        sess.run(train_init_op)
        while True:
            try:
                _, curr_loss, curr_logits, curr_labels = sess.run([fc8_train_op, loss, logits, labels], {is_training: True})
                print('Current loss: %f' % curr_loss)
            except tf.errors.OutOfRangeError:
                break
                
        # Check F1 score on logits
        thresh = 0.5
        F1 = fbeta_score(curr_labels, np.array(curr_logits) > thresh, beta=2, average='samples')
        print('F1: %f' % F1)
        
        # Check accuracy on the train and val sets every epoch.
        # train_acc = check_accuracy(sess, correct_prediction, is_training, train_init_op)
        # val_acc = check_accuracy(sess, correct_prediction, is_training, val_init_op)
        # print('Train accuracy: %f' % train_acc)
        # print('Val accuracy: %f\n' % val_acc)


    # Train the entire model for a few more epochs, continuing with the *same* weights.
    for epoch in range(args.num_epochs2):
        print('Starting epoch %d / %d' % (epoch + 1, args.num_epochs1))
        sess.run(train_init_op)
        while True:
            try:
                _, curr_loss, curr_logits, curr_labels = sess.run([full_train_op, loss, logits, labels], {is_training: True})
                print('Current loss: %f' % curr_loss)
            except tf.errors.OutOfRangeError:
                break

        # Check F1 score on logits
        thresh = 0.5
        F1 = fbeta_score(curr_labels, np.array(curr_logits) > thresh, beta=2, average='samples')
        print('F1: %f' % F1)
        
        # Check accuracy on the train and val sets every epoch
        # train_acc = check_accuracy(sess, correct_prediction, is_training, train_init_op)
        # val_acc = check_accuracy(sess, correct_prediction, is_training, val_init_op)
        # print('Train accuracy: %f' % train_acc)
        # print('Val accuracy: %f\n' % val_acc)


100%|██████████| 40479/40479 [00:00<00:00, 421483.73it/s]

dataset created
got next batch
INFO:tensorflow:Restoring parameters from vgg_16.ckpt



INFO:tensorflow:Restoring parameters from vgg_16.ckpt


Starting epoch 1 / 4
Current loss: 13.020557
Current loss: 9.494416
Current loss: 9.266887
Current loss: 7.931184
Current loss: 8.027351
Current loss: 8.142326
Current loss: 9.702850
Current loss: 8.159016
Current loss: 9.700662
Current loss: 9.093849
Current loss: 10.039870
Current loss: 10.191294
Current loss: 10.603869
Current loss: 10.660870
Current loss: 12.616015
Current loss: 13.469410
Current loss: 11.645139
Current loss: 9.253268
Current loss: 15.476110
Current loss: 12.592234
Current loss: 10.178679
Current loss: 16.079582
Current loss: 13.703094
Current loss: 13.503307
Current loss: 15.821052
Current loss: 14.554851
Current loss: 14.258167
Current loss: 16.114143
Current loss: 17.408394
Current loss: 18.609873
Current loss: 14.635252
Current loss: 20.348776
Current loss: 18.531437
Current loss: 17.410519
Current loss: 21.604156
Current loss: 17.998486
Current loss: 22.909023
Current loss: 19.567867
Current loss: 17.626442
Current loss: 18.477270
Current loss: 18.823019
Curre