In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import os

## Load Data

In [7]:
import cifar10

Set the path for storing the data-set on your computer.

The CIFAR-10 data-set is about 163 MB and will be downloaded automatically if it is not located in the given path.

In [8]:
cifar10.maybe_download_and_extract()

Data has apparently already been downloaded and unpacked.


Load the class-names.

In [9]:
class_names = cifar10.load_class_names()
class_names

Loading data: data/CIFAR-10/cifar-10-batches-py/batches.meta


['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

Load the training-set. This returns the images, the class-numbers as integers, and the class-numbers as One-Hot encoded arrays called labels.

In [10]:
images_train, cls_train, labels_train = cifar10.load_training_data()

Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_1
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_2
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_3
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_4
Loading data: data/CIFAR-10/cifar-10-batches-py/data_batch_5


Load the test-set.

In [11]:
images_test, cls_test, labels_test = cifar10.load_test_data()

Loading data: data/CIFAR-10/cifar-10-batches-py/test_batch


The CIFAR-10 data-set has now been loaded and consists of 60,000 images and associated labels (i.e. classifications of the images). The data-set is split into 2 mutually exclusive sub-sets, the training-set and the test-set.

In [12]:
print("Size of:")
print("- Training-set:\t\t{}".format(len(images_train)))
print("- Test-set:\t\t{}".format(len(images_test)))

Size of:
- Training-set:		50000
- Test-set:		10000


The data dimensions are used in several places in the source-code below. They have already been defined in the cifar10 module, so we just need to import them.

In [13]:
from cifar10 import img_size, num_channels, num_classes

The images are 32 x 32 pixels, but we will crop the images to 24 x 24 pixels.

In [14]:
img_size_cropped = 24

In [15]:
def pre_process_image(image, training):
    # This function takes a single image as input,
    # and a boolean whether to build the training or testing graph.
    
    if training:
        # For training, add the following to the TensorFlow graph.

        # Randomly crop the input image.
#        image = tf.random_crop(image, size=[img_size_cropped, img_size_cropped, num_channels])

        # Randomly flip the image horizontally.
        image = tf.image.random_flip_left_right(image)
        
        # Randomly adjust hue, contrast and saturation.
        image = tf.image.random_hue(image, max_delta=0.05)
        image = tf.image.random_contrast(image, lower=0.3, upper=1.0)
        image = tf.image.random_brightness(image, max_delta=0.2)
        image = tf.image.random_saturation(image, lower=0.0, upper=2.0)

        # Some of these functions may overflow and result in pixel
        # values beyond the [0, 1] range. It is unclear from the
        # documentation of TensorFlow whether this is
        # intended. A simple solution is to limit the range.

        # Limit the image pixels between [0, 1] in case of overflow.
        image = tf.minimum(image, 1.0)
        image = tf.maximum(image, 0.0)
    else:
        # For training, add the following to the TensorFlow graph.

        # Crop the input image around the centre so it is the same
        # size as images that are randomly cropped during training.
        image = tf.image.resize_image_with_crop_or_pad(image,
                                                       target_height=img_size_cropped,
                                                       target_width=img_size_cropped)

    return image

In [16]:
def pre_process(images, training):
    # Use TensorFlow to loop over all the input images and call
    # the function above which takes a single image as input.
    images = tf.map_fn(lambda image: pre_process_image(image, training), images)

    return images

## AlexNet

In [17]:
def weight_variable(shape, name):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name=name, trainable=True)

In [18]:
def conv2d(input_, out_channels, filter_height, filter_width, in_channels, strides):
    W = weight_variable(shape=[filter_height, filter_width, in_channels, out_channels], name="weight")
    b = weight_variable(shape=[out_channels], name="bias")
    res = tf.nn.conv2d(input_, W, strides, padding='SAME')
    relu = tf.nn.relu(tf.nn.bias_add(res, b))
    return relu

In [19]:
def max_pool(input_, size, strides, name=None):
    return tf.nn.max_pool(input_, ksize=size,
                          strides=strides,
                          padding='SAME', name=name)

In [20]:
def fc(input_, input_size, output_size, keep_prob, name=None):
    fc_drop = tf.nn.dropout(input_, keep_prob, name="h_fc1_drop")

    W = weight_variable([input_size, output_size], "weight")
    b = weight_variable([output_size], "bias")

    return tf.nn.relu(tf.nn.bias_add(tf.matmul(fc_drop, W), b, name='y_conv'))

In [21]:
def flatten(input_, batch_size):
    flatten = tf.reshape(input_, shape=[batch_size, -1])
    return flatten

In [22]:
def lrn(x, radius, alpha, beta, name, bias=1.0):
    return tf.nn.local_response_normalization(x, depth_radius = radius,
                                            alpha = alpha, beta = beta,
                                            bias = bias, name = name)

In [23]:
class AlexNet:
    def __init__(self, class_count, save_dir, save_path, batch_size):
        self.class_count = class_count
        self.save_dir = save_dir
        self.save_path = save_path
        self.train_batch_size = batch_size
        
    def _create_placeholders(self):
        with tf.name_scope("data"):
            self.ims = tf.placeholder(shape=[None, 32, 32, 3], dtype=tf.float32, name="x")
            self.y_true = tf.placeholder(shape=[None, self.class_count], dtype=tf.float32, name="y")
            self.global_step = tf.Variable(initial_value=0,
                          name='global_step', trainable=False)
        
    def _create_layers(self):
        #layer1: convolution 11x11 + RELU:
        with tf.name_scope("layer1"):
            layer1_1_conv = conv2d(self.x, 48, 11, 11, 3, [1, 4, 4, 1])
            layer1_2_conv = conv2d(self.x, 48, 11, 11, 3, [1, 4, 4, 1])
            norm1_1 = lrn(layer1_1_conv, 2, 2e-05, 0.75, name = 'norm1')
            norm1_2 = lrn(layer1_2_conv, 2, 2e-05, 0.75, name = 'norm1')
            layer1_1_pooling = max_pool(layer1_1_conv, [1, 3, 3, 1], [1, 2, 2, 1])
            layer1_2_pooling = max_pool(layer1_2_conv, [1, 3, 3, 1], [1, 2, 2, 1])
        
        with tf.name_scope("layer2"):
            layer2_1_conv = conv2d(layer1_1_pooling, 128, 5, 5, 48, [1, 1, 1, 1])
            layer2_2_conv = conv2d(layer1_2_pooling, 128, 5, 5, 48, [1, 1, 1, 1])
            norm1_1 = lrn(layer2_1_conv, 2, 2e-05, 0.75, name = 'norm1')
            norm1_2 = lrn(layer2_2_conv, 2, 2e-05, 0.75, name = 'norm1')            
            layer2_1_pooling = max_pool(layer2_1_conv, [1, 3, 3, 1], [1, 2, 2, 1])
            layer2_2_pooling = max_pool(layer2_2_conv, [1, 3, 3, 1], [1, 2, 2, 1])
        
        with tf.name_scope("layer3"):
            layer3_input = tf.nn.relu(tf.add(layer2_1_pooling, layer2_2_pooling))
            layer3_1_conv = conv2d(layer3_input, 192, 3, 3, 128, [1, 1, 1, 1])
            layer3_2_conv = conv2d(layer3_input, 192, 3, 3, 128, [1, 1, 1, 1])
            
        with tf.name_scope("layer4"):
            layer4_1_conv = conv2d(layer3_1_conv, 192, 3, 3, 192, [1, 1, 1, 1])
            layer4_2_conv = conv2d(layer3_2_conv, 192, 3, 3, 192, [1, 1, 1, 1])
            
        with tf.name_scope("layer5"):
            layer5_1_conv = conv2d(layer4_1_conv, 128, 3, 3, 192, [1, 1, 1, 1])
            layer5_2_conv = conv2d(layer4_2_conv, 128, 3, 3, 192, [1, 1, 1, 1])
            layer5_1_pooling = max_pool(layer5_1_conv, [1, 3, 3, 1], [1, 2, 2, 1])
            layer5_2_pooling = max_pool(layer5_2_conv, [1, 3, 3, 1], [1, 2, 2, 1])
        
        with tf.name_scope("layer6"):
            layer6_1_flatten = flatten(layer5_1_pooling, self.train_batch_size)
            layer6_2_flatten = flatten(layer5_2_pooling, self.train_batch_size)
            layer6_1_fc = fc(layer6_1_flatten, 128, 1024, 0.5)
            layer6_2_fc = fc(layer6_2_flatten, 128, 1024, 0.5)
            
        with tf.name_scope("layer7"):
            layer7_1_fc = fc(layer6_1_fc, 1024, 1024, 0.5)
            layer7_2_fc = fc(layer6_2_fc, 1024, 1024, 0.5)
            
        with tf.name_scope("layer8"):
            w_out = weight_variable([1024, self.class_count], "weight_out")
            b_out = weight_variable([self.class_count], "bias_out")
            self.logits = tf.add(tf.matmul(tf.nn.relu(layer7_1_fc + layer7_2_fc), w_out), b_out)        
    
    def _create_loss(self):
        with tf.name_scope("loss"):
            self.cross_entropy = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=self.y_true, logits=self.logits))   
    
    def _create_optimizer(self):
        with tf.name_scope("optimizer"):
            self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.cross_entropy, global_step=self.global_step)
            
    def build_graph(self):
        self._create_placeholders()
        self.x = pre_process(self.ims, training=True)
        self._create_layers()
        self._create_loss()
        self._create_optimizer()
        self.saver = tf.train.Saver()
    
    def random_batch(self, images_train, labels_train):
        # Number of images in the training-set.
        num_images = len(images_train)

        # Create a random index.
        idx = np.random.choice(num_images,
                               size=self.train_batch_size,
                               replace=False)

        # Use the random index to select random images and labels.
        x_batch = images_train[idx, :, :, :]
        y_batch = labels_train[idx, :]

        return x_batch, y_batch
    
    def restore_checkpoints(self, session):
#         try:
#             print("Trying to restore last checkpoint ...")

#             # Use TensorFlow to find the latest checkpoint - if any.
#             last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=self.save_dir)

#             # Try and load the data in the checkpoint.
#             self.saver.restore(session, save_path=last_chk_path)

#             # If we get to this point, the checkpoint was successfully loaded.
#             print("Restored checkpoint from:", last_chk_path)
#         except:
#             If the above failed for some reason, simply
#             initialize all the variables for the TensorFlow graph.
        print("Failed to restore checkpoint. Initializing variables instead.")
        session.run(tf.global_variables_initializer())    
    
    def train_model(self, images_train, labels_train, num_iterations):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
        y_pred = tf.nn.softmax(self.logits)
        y_pred_cls = tf.argmax(y_pred, axis=1)
        correct_prediction = tf.equal(y_pred_cls, tf.argmax(self.y_true, axis=1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as session:
                
            self.restore_checkpoints(session)

            # Start-time used for printing time-usage below.        
            start_time = time.time()
        
            for i in range(num_iterations):
                # Get a batch of training examples.
                # x_batch now holds a batch of images and
                # y_true_batch are the true labels for those images.
                x_batch, y_true_batch = self.random_batch(images_train, labels_train)

                # Put the batch into a dict with the proper names
                # for placeholder variables in the TensorFlow graph.
                feed_dict_train = {self.x: x_batch,
                                   self.y_true: y_true_batch}

                # Run the optimizer using this batch of training data.
                # TensorFlow assigns the variables in feed_dict_train
                # to the placeholder variables and then runs the optimizer.
                # We also want to retrieve the global_step counter.
                i_global, _ = session.run([self.global_step, self.train_step],
                                          feed_dict=feed_dict_train)

                cross_entropy = session.run([self.cross_entropy], feed_dict=feed_dict_train)
                print(i_global, cross_entropy)
                # Print status to screen every 100 iterations (and last).
                if (i_global % 100 == 0) or (i == num_iterations - 1):
                    # Calculate the accuracy on the training-batch.
                    batch_acc = session.run(accuracy,
                                            feed_dict=feed_dict_train)

                    # Print status.
                    msg = "Global Step: {0:>6}, Training Batch Accuracy: {1:>6.1%}"
                    print(msg.format(i_global, batch_acc))

                # Save a checkpoint to disk every 1000 iterations (and last).
#                 if (i_global % 100 == 0) or (i == num_iterations - 1):
#                     # Save all variables of the TensorFlow graph to a
#                     # checkpoint. Append the global_step counter
#                     # to the filename so we save the last several checkpoints.
#                     self.saver.save(session,
#                                save_path=self.save_path,
#                                global_step=self.global_step)

#                     print("Saved checkpoint.")

            # Ending time.
            end_time = time.time()

            # Difference between start and end-times.
            time_dif = end_time - start_time

            # Print the time-usage.
            print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))

In [24]:
save_dir = 'checkpoints_alexNet/'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
save_path = os.path.join(save_dir, 'cifar10_cnn')

alexNet = AlexNet(len(class_names), save_dir, save_path, 30)
alexNet.build_graph()

In [None]:
alexNet.train_model(images_train, labels_train, 10000)

Failed to restore checkpoint. Initializing variables instead.
