# Tiny ImageNet Inception Transfer Learning

## Goal
Use a Inception v3 model pretrained on the full ImageNet in order to achieve >60% accuracy on the Tiny ImageNet validation set.

## Approach
1. Download a pretrained Inception v3 model
2. Load the Tiny ImageNet training and validation datasets
3. Cache the transfer values (second-last layer activations) for the whole Tiny ImageNet training and validation set
4. Build the graph to take the feature extractions, apply one dense layer and one softmax classification layer
5. Design a training algorithm and conduct hyperparameter search to push the accuracy

## 1. Download the Inception v3 Model

In [6]:
from model import inception
inception.maybe_download()
model = inception.Inception()

Downloading Inception v3 Model ...
- Download progress: 100.0%
Download finished. Extracting files.
Done.


## 2. Load Tiny ImageNet 

In [2]:
import data.tiny_imagenet as data
import numpy as np
import cv2

# Helper Functions to load all the images, not create a `tf.train.input_producer`-queue
def load_image(filename):
    img = cv2.imread(filename)
    return img

def load_tiny_image_net(mode, limit=None):
    filenames_labels = data.load_filenames_labels(mode)
    # import: don't shuffle here, otherwise the cached transfer values will be useless!
    if limit:
        filenames_labels = filenames_labels[:limit]
    images = np.array([load_image(img) for img, _ in filenames_labels])
    labels = np.array([label for _, label in filenames_labels])
    labels = labels.astype(np.uint8)
    return images, labels

In [3]:
# this takes about 1-2min (use the limit parameter if you're testing something and don't need to load everything)
limit = 100 # None
train_images, train_labels = load_tiny_image_net('train', limit=limit)
val_images, val_labels = load_tiny_image_net('val', limit=limit)

In [4]:
# check the shapes (should be 100,000 training and 10,000 validation images with dimensions 64x64x3)
print("Training Set: images: {}, labels: {}".format(train_images.shape, train_labels.shape))
print("Validation Set: images: {}, labels: {}".format(val_images.shape, val_labels.shape))

Training Set: images: (100, 64, 64, 3), labels: (100,)
Validation Set: images: (100, 64, 64, 3), labels: (100,)


## 3. Generate and Cache the Transfer Values

In [5]:
import os

cache_dir = "cache/tiny-imagenet/"
cache_path_train = os.path.join(cache_dir, "inception_tiny_imagenet_train.pkl")
cache_path_val = os.path.join(cache_dir, "inception_tiny_imagenet_val.pkl")

if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

In [6]:
# this should be done on a fast, GPU-powered machine. TODO: process in batches!
print("Training transfer values:")
transfer_values_train = inception.transfer_values_cache(cache_path=cache_path_train,
                                                        images=train_images,
                                                        model=model)
print("Validation transfer values:")
transfer_values_val = inception.transfer_values_cache(cache_path=cache_path_val,
                                                      images=val_images,
                                                      model=model)

Training transfer values:
- Data loaded from cache-file: cache/tiny-imagenet/inception_tiny_imagenet_train.pkl
Validation transfer values:
- Data loaded from cache-file: cache/tiny-imagenet/inception_tiny_imagenet_val.pkl


In [7]:
# check the shapes of the transfer values (should have 2048 transfer values (second channel))
print("Training Set transfer values: {}".format(transfer_values_train.shape))
print("Validation Set transfer values: {}".format(transfer_values_val.shape))

_, transfer_len = transfer_values_train.shape # 2048

Training Set transfer values: (100, 2048)
Validation Set transfer values: (100, 2048)


## 4. Build the Classification Graph

In [8]:
import tensorflow as tf

In [9]:
# TODO weight decay (?)
def model(x):
    with tf.variable_scope('', reuse=tf.AUTO_REUSE):
        # simple 2-layer graph
        fc1 = tf.layers.dense(x, units=1024, name="classifier/fc1")
        fc1 = tf.nn.relu(fc1)

        logits = tf.layers.dense(fc1, units=data.NUM_CLASSES, name="classifier/logits")
        softmax = tf.nn.softmax(logits, axis=1, name='softmax')
    return logits, softmax

In [10]:
def loss(labels, logits):  # TODO weight decay
    labels_one_hot = tf.one_hot(labels, depth=data.NUM_CLASSES)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_one_hot, logits=logits)
    loss = tf.reduce_mean(cross_entropy, name="classifier/cross_entropy_loss")
    return loss

In [11]:
def accuracy(labels, softmax):
    correct = tf.cast(tf.equal(tf.argmax(softmax, axis=1), tf.cast(labels, tf.int64)), dtype=tf.float32)
    return tf.reduce_mean(correct, name="accuracy")

## 5. Training Algorithm

In [12]:
LEARNING_RATE = 0.002
NUM_EPOCHS = 1000
TRAIN_BATCH_SIZE = 64
VALIDATION_BATCH_SIZE = 64
STEPS_PER_EPOCH = min(data.NUM_TRAIN_SAMPLES // TRAIN_BATCH_SIZE, data.NUM_TRAIN_SAMPLES)

In [13]:
def get_optimization_op(loss):
    optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
    return optimizer.minimize(loss)

In [14]:
def random_batch():
    num_images = len(transfer_values_train)
    # random index
    idx = np.random.choice(num_images, size=TRAIN_BATCH_SIZE, replace=False)
    x_batch = transfer_values_train[idx]
    y_batch = train_labels[idx]

    return x_batch, y_batch

In [15]:
def evaluate(batch_size, x, y):
    vals = []
    total_count = len(y)
    for i in range(min(total_count // batch_size, total_count)):
        from_idx = i*batch_size
        to_idx = (i+1)*batch_size
        x_batch = x[from_idx:to_idx]
        y_batch = y[from_idx:to_idx]
        vals.append(sess.run([accuracy_val, loss_val], feed_dict={
            features: x_batch,
            labels: y_batch
        }))
    acc_mean, loss_mean = np.mean(vals, axis=0)
    return acc_mean, loss_mean

In [16]:
graph = tf.Graph()
with graph.as_default():
    # placeholders
    features = tf.placeholder(tf.float32, shape=[None, transfer_len], name="transfer_features")
    labels = tf.placeholder(tf.uint8, shape=[None], name="labels")
    
    # graph
    logits, softmax = model(features)
    loss_val = loss(labels, logits)
    accuracy_val = accuracy(labels, softmax)
    
    optimizer = get_optimization_op(loss_val)
    init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

In [17]:
sess = tf.Session(graph=graph)
with sess.as_default():
    sess.run(init)
    for i in range(NUM_EPOCHS):
        print("Starting epoch #%d" % (i + 1))
        # show validation accuracy
        val_acc, val_loss = evaluate(batch_size=VALIDATION_BATCH_SIZE, 
                                     x=transfer_values_val, y=val_labels)
        print("Validation before: accuracy {}, loss {}".format(val_acc, val_loss))
        
        vals = []
        for _ in range(STEPS_PER_EPOCH):
            x_batch, y_true_batch = random_batch()
            _, t_acc, t_loss = sess.run([optimizer, accuracy_val, loss_val], feed_dict={
                features: x_batch,
                labels: y_true_batch
            })
            vals.append((t_acc, t_loss))
        train_acc, train_loss = np.mean(vals, axis=0)
        print("Training: accuracy {}, loss {}".format(train_acc, train_loss))

Starting epoch #1
Validation before: accuracy 0.0, loss 5.495240688323975
Training: accuracy 0.9977793097496033, loss 0.013392413966357708
Starting epoch #2
Validation before: accuracy 0.0, loss 19.239974975585938


KeyboardInterrupt: 