In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import time
from datetime import timedelta
import math
import os

## Load Data

In [2]:
import cifar10
from cifar10 import img_size, num_channels, num_classes

In [3]:
class_names = cifar10.load_class_names()
class_names

Loading data: data/CIFAR-10/cifar-10-batches-py/batches.meta


['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

Load the training-set. This returns the images, the class-numbers as integers, and the class-numbers as One-Hot encoded arrays called labels. <br>
Load the test-set.

In [4]:
images_train, cls_train, labels_train = cifar10.load_training_data()
images_test, cls_test, labels_test = cifar10.load_test_data()

Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_1
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_2
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_3
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_4
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_5
Loading data: data/CIFAR-10/cifar-10-batches-py/test_batch


## Image preprocessing

In [5]:
img_size_cropped = 24

In [6]:
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size, num_channels], name='x')
y = tf.placeholder(tf.float32, shape=[None, num_classes], name='y')

In [7]:
def pre_process_image(image, training):
    # This function takes a single image as input,
    # and a boolean whether to build the training or testing graph.
    
    if training:
        # For training, add the following to the TensorFlow graph.

        # Randomly crop the input image.
        image = tf.random_crop(image, size=[img_size_cropped, img_size_cropped, num_channels])

        # Randomly flip the image horizontally.
        image = tf.image.random_flip_left_right(image)
        
        # Randomly adjust hue, contrast and saturation.
        image = tf.image.random_hue(image, max_delta=0.05)
        image = tf.image.random_contrast(image, lower=0.3, upper=1.0)
        image = tf.image.random_brightness(image, max_delta=0.2)
        image = tf.image.random_saturation(image, lower=0.0, upper=2.0)

        # Some of these functions may overflow and result in pixel
        # values beyond the [0, 1] range. It is unclear from the
        # documentation of TensorFlow whether this is
        # intended. A simple solution is to limit the range.

        # Limit the image pixels between [0, 1] in case of overflow.
        image = tf.minimum(image, 1.0)
        image = tf.maximum(image, 0.0)
    else:
        # For training, add the following to the TensorFlow graph.

        # Crop the input image around the centre so it is the same
        # size as images that are randomly cropped during training.
        image = tf.image.resize_image_with_crop_or_pad(image,
                                                       target_height=img_size_cropped,
                                                       target_width=img_size_cropped)

    return image

In [8]:
def pre_process(images, training):
    return tf.map_fn(lambda image: pre_process_image(image, training), images)

## AlexNet
Original paper: https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf

### Helper functions

In [9]:
def weight_variable(shape, name):
    return tf.get_variable(name, initializer=tf.truncated_normal(shape, stddev=0.1))

def bias_variable(shape, name):
    return tf.get_variable(name, initializer=tf.constant(0.3, shape=shape))

def conv(x, *, sz, depth, out, groups=1, stride=1, padding="SAME", name):
    with tf.variable_scope(name) as scope:
        w = weight_variable((sz, sz, depth // groups, out), name="weight")
        b = bias_variable((out,), name="bias")
            
        convolution = lambda x, w: tf.nn.conv2d(x, w, strides=[1, stride, stride, 1], padding=padding)

        if groups == 1:
            conv_result = convolution(x, w)
        else:
            x_groups = tf.split(x, num_or_size_splits=groups, axis=3)
            w_groups = tf.split(w, num_or_size_splits=groups, axis=3)
            conv_result = tf.concat([convolution(x_g, w_g) for x_g, w_g in zip(x_groups, w_groups)], axis=3)
            
        return tf.nn.relu(tf.nn.bias_add(conv_result, b), name=scope.name)
    
def max_pool(x, *, sz, stride=1, padding="VALID", name):
    return tf.nn.max_pool(x,
                          ksize=[1, sz, sz, 1], 
                          strides=[1, stride, stride, 1],
                          padding=padding, 
                          name=name)

def lrn(x, *, depth_radius=2, bias=2, alpha=1e-4, beta=0.75, name):
    return tf.nn.lrn(x, depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta, name=name)

def full(x, *, out, use_relu=True, name):
    with tf.variable_scope(name) as scope:
        sz = int(np.prod(x.shape[1:]))
        flattened = tf.reshape(x, [-1, sz])

        w = weight_variable((sz, out), name="weight")
        b = bias_variable((out,), name="bias")
        
        value = tf.nn.xw_plus_b(flattened, w, b)
        
        return tf.nn.relu(value) if use_relu else value    
    
def dropout(x, *, keep_prob=0.5, name):
    return tf.nn.dropout(x, keep_prob=keep_prob, name=name)

### Creating network

In [10]:
class AlexNet:
    def __init__(self, x):
        self.x = x
        self._build_layers()
    
    def _build_layers(self):
        # (24, 24, 3) -> (12, 12, 24)x2
        conv1 = conv(self.x, sz=4, depth=3, out=48, stride=2, name="conv1")
        lrn1 = lrn(conv1, name="lrn1")
        # (12, 12, 24)x2-> (11, 11, 24)x2
        maxpool1 = max_pool(lrn1, sz=2, name="maxpool1")
        
        # (11, 11, 24)x2 -> (5, 5, 64)x2
        conv2 = conv(maxpool1, sz=3, stride=2, depth=48, out=128, groups=2, name="conv2")
        lrn2 = lrn(conv2, name="lrn2")
        # (5, 5, 64)x2 -> (4, 4, 64)x2
        maxpool2 = max_pool(lrn2, sz=2, name="maxpool2")
        
        # (4, 4, 64)x2 -> (4, 4, 96)x2
        conv3 = conv(maxpool2, sz=3, depth=128, out=192, name="conv3")
        
        # (4, 4, 96)x2 -> (4, 4, 96)x2
        conv4 = conv(conv3, sz=3, depth=192, out=192, groups=2, name="conv4")
        
        # (4, 4, 96)x2 -> (4, 4, 64)x2
        conv5 = conv(conv4, sz=3, depth=192, out=128, groups=2, name="conv5")
        # (4, 4, 64)x2 -> (3, 3, 64)x2
        maxpool5 = max_pool(conv5, sz=3, name="maxpool5")
        
        # (3, 3, 128)x2 -> (512)
        fc6 = full(maxpool5, out=512, name="fc6")
        dropout6 = dropout(fc6, name="dropout6")
        
        # (512) -> (512)
        fc7 = full(dropout6, out=512, name="fc7")
        dropout7 = dropout(fc7, name="dropout7")
        
        # (512) -> (10)
        self.score = full(dropout7, out=num_classes, use_relu=False, name="score")

## Initialize network

In [11]:
def create_network(training):
    # Wrap the neural network in the scope named 'network'.
    # Create new variables during training, and re-use during testing.
    with tf.variable_scope("network", reuse=not training):
        images = pre_process(images=x, training=training)
        net = AlexNet(images)
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=net.score, labels=y))

        return net.score, loss

In [12]:
global_step = tf.Variable(initial_value=0,
                          name="global_step",
                          trainable=False)

In [13]:
score_train, loss_train = create_network(training=True)
optimizer = tf.train.AdamOptimizer(learning_rate=7e-4).minimize(loss_train, global_step=global_step)

In [14]:
score_test, loss_test = create_network(training=False)

In [15]:
y_pred_cls_train = tf.argmax(score_train, 1)
correct_prediction_train = tf.equal(y_pred_cls_train, tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction_train, tf.float32))

In [16]:
y_pred_cls_test = tf.argmax(score_test, 1)

## Testing

### Restore or initialize variables

In [17]:
session = tf.Session()
writer = tf.summary.FileWriter('./graphs', session.graph)

saver = tf.train.Saver()

In [18]:
save_dir = 'alexnet_checkpoints/'

if not os.path.exists(save_dir):
    os.makedirs(save_dir)
    
save_path = os.path.join(save_dir, 'cifar10_cnn')

try:
    print("Trying to restore last checkpoint ...")

    # Use TensorFlow to find the latest checkpoint - if any.
    last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=save_dir)

    # Try and load the data in the checkpoint.
    saver.restore(session, save_path=last_chk_path)

    # If we get to this point, the checkpoint was successfully loaded.
    print("Restored checkpoint from:", last_chk_path)
except:
    # If the above failed for some reason, simply
    # initialize all the variables for the TensorFlow graph.
    print("Failed to restore checkpoint. Initializing variables instead.")
    session.run(tf.global_variables_initializer())

Trying to restore last checkpoint ...
Failed to restore checkpoint. Initializing variables instead.


### Optimization

In [19]:
train_batch_size = 64

def random_batch():
    # Number of images in the training-set.
    num_images = len(images_train)

    # Create a random index.
    idx = np.random.choice(num_images,
                           size=train_batch_size,
                           replace=False)

    # Use the random index to select random images and labels.
    x_batch = images_train[idx, :, :, :]
    y_batch = labels_train[idx, :]

    return x_batch, y_batch

In [20]:
def optimize(num_iterations):
    start_time = time.time()

    for i in range(num_iterations):
        x_batch, y_true_batch = random_batch()
        
        feed_dict_train = {x: x_batch,
                           y: y_true_batch}

        i_global, _ = session.run([global_step, optimizer], 
                                  feed_dict=feed_dict_train)
        
        # Print status to screen every 100 iterations (and last).
        if (i_global % 100 == 0) or (i == num_iterations - 1):
            # Calculate the accuracy on the training-batch.
            batch_acc, batch_loss = session.run([accuracy, loss_train],
                                                feed_dict=feed_dict_train)

            # Print status.
            msg = "Global Step: {0:>6}, Training Batch Accuracy: {1:>6.1%}, Training Batch Loss: {2:>2.4}"
            print(msg.format(i_global, batch_acc, batch_loss))
        
        if (i_global % 1000 == 0) or (i == num_iterations - 1):
            # Save all variables of the TensorFlow graph to a
            # checkpoint. Append the global_step counter
            # to the filename so we save the last several checkpoints.
            saver.save(session,
                       save_path=save_path,
                       global_step=global_step)

            print("Saved checkpoint.")


    # Ending time.
    end_time = time.time()

    # Difference between start and end-times.
    time_dif = end_time - start_time

    # Print the time-usage.
    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))

### Calculating classifications

In [21]:
# Split the data-set in batches of this size to limit RAM usage.
batch_size = 256

def predict_cls(images, labels, cls_true):
    # Number of images.
    num_images = len(images)

    # Allocate an array for the predicted classes which
    # will be calculated in batches and filled into this array.
    cls_pred = np.zeros(shape=num_images, dtype=np.int)

    # Now calculate the predicted classes for the batches.
    # We will just iterate through all the batches.
    # There might be a more clever and Pythonic way of doing this.

    # The starting index for the next batch is denoted i.
    i = 0

    while i < num_images:
        # The ending index for the next batch is denoted j.
        j = min(i + batch_size, num_images)

        # Create a feed-dict with the images and labels
        # between index i and j.
        feed_dict = {x: images[i:j, :],
                     y: labels[i:j, :]}

        # Calculate the predicted class using TensorFlow.
        cls_pred[i:j] = session.run(y_pred_cls_test, feed_dict=feed_dict)

        # Set the start-index for the next batch to the
        # end-index of the current batch.
        i = j

    # Create a boolean array whether each image is correctly classified.
    correct = (cls_true == cls_pred)

    return correct, cls_pred

In [22]:
def classification_accuracy(correct):
    return correct.mean(), correct.sum()

In [23]:
def print_test_accuracy():
    # For all the images in the test-set,
    # calculate the predicted classes and whether they are correct.
    correct, cls_pred = predict_cls(images=images_test,
                                    labels=labels_test,
                                    cls_true=cls_test)
    
    # Classification accuracy and the number of correct classifications.
    acc, num_correct = classification_accuracy(correct)
    
    # Number of images being classified.
    num_images = len(correct)

    # Print the accuracy.
    msg = "Accuracy on Test-Set: {0:.1%} ({1} / {2})"
    print(msg.format(acc, num_correct, num_images))

## Perform optimization

In [None]:
optimize(num_iterations=50000)

Global Step:  50100, Training Batch Accuracy:  57.8%, Training Batch Loss: 1.163
Global Step:  50200, Training Batch Accuracy:  78.1%, Training Batch Loss: 0.9655
Global Step:  50300, Training Batch Accuracy:  64.1%, Training Batch Loss: 0.8521
Global Step:  50400, Training Batch Accuracy:  71.9%, Training Batch Loss: 1.06
Global Step:  50500, Training Batch Accuracy:  75.0%, Training Batch Loss: 0.7677
Global Step:  50600, Training Batch Accuracy:  65.6%, Training Batch Loss: 0.9519
Global Step:  50700, Training Batch Accuracy:  68.8%, Training Batch Loss: 1.029
Global Step:  50800, Training Batch Accuracy:  76.6%, Training Batch Loss: 0.7907
Global Step:  50900, Training Batch Accuracy:  67.2%, Training Batch Loss: 0.8055
Global Step:  51000, Training Batch Accuracy:  67.2%, Training Batch Loss: 1.035
Saved checkpoint.
Global Step:  51100, Training Batch Accuracy:  56.2%, Training Batch Loss: 1.122
Global Step:  51200, Training Batch Accuracy:  78.1%, Training Batch Loss: 0.6318
Glob

Global Step:  60000, Training Batch Accuracy:  62.5%, Training Batch Loss: 1.055
Saved checkpoint.
Global Step:  60100, Training Batch Accuracy:  79.7%, Training Batch Loss: 0.6027
Global Step:  60200, Training Batch Accuracy:  79.7%, Training Batch Loss: 0.6241
Global Step:  60300, Training Batch Accuracy:  59.4%, Training Batch Loss: 1.016
Global Step:  60400, Training Batch Accuracy:  70.3%, Training Batch Loss: 0.8626
Global Step:  89200, Training Batch Accuracy:  73.4%, Training Batch Loss: 0.6825
Global Step:  89300, Training Batch Accuracy:  67.2%, Training Batch Loss: 1.134
Global Step:  89400, Training Batch Accuracy:  71.9%, Training Batch Loss: 0.8902
Global Step:  89500, Training Batch Accuracy:  60.9%, Training Batch Loss: 1.038
Global Step:  89600, Training Batch Accuracy:  64.1%, Training Batch Loss: 1.238
Global Step:  89700, Training Batch Accuracy:  78.1%, Training Batch Loss: 0.5822
Global Step:  89800, Training Batch Accuracy:  73.4%, Training Batch Loss: 0.7159
Glo

## Results

In [39]:
print_test_accuracy()

Accuracy on Test-Set: 75.4% (7541 / 10000)
