# Training CNN on DLVM (GPU)
In this notebook we will go through the steps of stacking layers together and seeing how it affects performance. In section we went through the steps of how the various types of layers and their properties affect the dimensions of the data passing through them. In this notebook we will look at the affect on performance so that we get an idea of stacking these layers can give us better performance. We will be basic this CNN on the VGG architecture.

In [None]:
import numpy as np
import os
import sys
import tarfile
import pickle
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf

from urllib.request import urlretrieve

In [None]:
# Parameters
EPOCHS = 10
BATCHSIZE = 64
LR = 0.01
MOMENTUM = 0.9
N_CLASSES = 10 # There are 10 classes in the CIFAR10 dataset

In [None]:
data_format = 'channels_first'

In [None]:
def read_pickle(src):
    with open(src, 'rb') as f:
        data = pickle.load(f, encoding='latin1')
    return data

In [None]:
def process_cifar():
    """ Read data
    """
    
    print('Preparing train set...')
    train_list = [read_pickle('./cifar-10-batches-py/data_batch_{0}'.format(i)) for i in range(1, 6)]
    x_train = np.concatenate([t['data'] for t in train_list])
    y_train = np.concatenate([t['labels'] for t in train_list])
    
    print('Preparing test set...')
    tst = read_pickle('./cifar-10-batches-py/test_batch')
    x_test = tst['data']
    y_test = np.asarray(tst['labels'])
    
    return x_train, y_train, x_test, y_test

In [None]:
def load_cifar(src="http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"):
    """ Load CIFAR10 Dataset
    """
    try:
        return process_cifar()
    except FileNotFoundError:
        print('Data does not exist. Downloading ' + src)
        fname, h = urlretrieve(src, './delete.me')
        print('Extracting files...')
        with tarfile.open(fname) as tar:
            tar.extractall()
        os.remove(fname)
    return process_cifar()

In [None]:
def prepare_cifar(x_train, y_train, x_test, y_test):
    
    # Scale pixel intensity
    x_train = x_train / 255.0
    x_test = x_test / 255.0
    
    # Reshape
    x_train = x_train.reshape(-1, 3, 32, 32)
    x_test = x_test.reshape(-1, 3, 32, 32)
    
    return (x_train.astype(np.float32), 
            y_train.astype(np.int32), 
            x_test.astype(np.float32), 
            y_test.astype(np.int32))

In [None]:
def minibatch_from(X, y, batchsize=BATCHSIZE, shuffle=False):
    if len(X) != len(y):
        raise Exception("The length of X {} and y {} don't match".format(len(X), len(y)))
        
    if shuffle:
        X, y = shuffle_data(X, y)
    
    for i in range(0, len(X), batchsize):
        yield X[i:i + batchsize], y[i:i + batchsize]

In [None]:
def shuffle_data(X, y):
    index = np.arange(len(X))
    np.random.shuffle(index)
    return X[index], y[index]

In [None]:
def init_model_training(m, labels, learning_rate=LR, momentum=MOMENTUM):
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=m, labels=y)
    loss = tf.reduce_mean(cross_entropy)
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum)
    return optimizer.minimize(loss)

In [None]:
%%time
# Data into format for library
x_train, y_train, x_test, y_test = prepare_cifar(*load_cifar())
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

## Model 1
Our first model will have 2 convolution layers and a max pooling layer. The classification layer will use softmax as we want it to only output a 1 for our specified class and 0 everywhere else.

In [None]:
%%time
# Place-holders
X = tf.placeholder(tf.float32, shape=[None, 3, 32, 32])
y = tf.placeholder(tf.int32, shape=[None])
training = tf.placeholder(tf.bool)  # Indicator for dropout layer

In [None]:
# Block 1
conv1_1 = tf.layers.conv2d(X, 
                           filters=64, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv1_2 = tf.layers.conv2d(conv1_1, 
                           filters=64, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
pool1_1 = tf.layers.max_pooling2d(conv1_2, 
                                  pool_size=(2, 2), 
                                  strides=(2, 2), 
                                  padding='valid', 
                                  data_format=data_format)
relu2 = tf.nn.relu(pool1_1)
flatten = tf.reshape(relu2, shape=[-1, 64*16*16])
fc1 = tf.layers.dense(flatten, 4096, activation=tf.nn.relu)
fc2 = tf.layers.dense(fc1, 4096, activation=tf.nn.relu)
model = tf.layers.dense(fc2, N_CLASSES, name='output')

In [None]:
%%time
train_model = init_model_training(model, y)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

# Accuracy logging
correct = tf.nn.in_top_k(model, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [None]:
x_train.shape

In [None]:
%%time
# Train model
for j in range(EPOCHS):
    for data, label in minibatch_from(x_train, y_train, shuffle=True):
        sess.run(train_model, feed_dict={X: data, y: label})
    # Log
    acc_train = sess.run(accuracy, feed_dict={X: data, y: label})
    print(j, "Train accuracy:", acc_train)

In [None]:
%%time
y_guess = list()
for data, label in minibatch_from(x_test, y_test):
    pred = tf.argmax(model,1)
    output = sess.run(pred, feed_dict={X: data})
    y_guess.append(output)
print("Accuracy: ", sum(np.concatenate(y_guess) == y_test)/float(len(y_test)))

Our model gets an accuracy of around 68% on the test set after 5 epochs. We can also see that it achieves 100% on the training set a few epochs before we stop training. It would usually be prudent to stop the model earlier and there are usually callbacks that can be used in any of the frameworks to do this. We are simply not using these here to try and keep things simple

## Model 2 
With the second model we will add a second convolution block. In keeping with the VGG architecture we will add two convolution layers each with 128 filters as well as a mac pooling layer. This time we will train it for 30 epochs.

In [None]:
%%time
tf.reset_default_graph()
# Place-holders
X = tf.placeholder(tf.float32, shape=[None, 3, 32, 32])
y = tf.placeholder(tf.int32, shape=[None])

In [None]:
# Block 1
conv1_1 = tf.layers.conv2d(X, 
                           filters=64, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv1_2 = tf.layers.conv2d(conv1_1, 
                           filters=64, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
pool1_1 = tf.layers.max_pooling2d(conv1_2, 
                                  pool_size=(2, 2), 
                                  strides=(2, 2), 
                                  padding='valid', 
                                  data_format=data_format)
# Block 2
conv2_1 = tf.layers.conv2d(pool1_1, 
                           filters=128, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv2_2 = tf.layers.conv2d(conv2_1, 
                           filters=128, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
pool2_1 = tf.layers.max_pooling2d(conv2_2, 
                                  pool_size=(2, 2), 
                                  strides=(2, 2), 
                                  padding='valid', 
                                  data_format=data_format)

relu2 = tf.nn.relu(pool2_1)
flatten = tf.reshape(relu2, shape=[-1, 128*8*8])
fc1 = tf.layers.dense(flatten, 4096, activation=tf.nn.relu)
fc2 = tf.layers.dense(fc1, 4096, activation=tf.nn.relu)
model = tf.layers.dense(fc2, N_CLASSES, name='output')

In [None]:
%%time
train_model = init_model_training(model, y)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

# Accuracy logging
correct = tf.nn.in_top_k(model, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [None]:
%%time
# Train model
for j in range(EPOCHS):
    for data, label in minibatch_from(x_train, y_train, shuffle=True):
        sess.run(train_model, feed_dict={X: data, y: label})
    # Log
    acc_train = sess.run(accuracy, feed_dict={X: data, y: label})
    print(j, "Train accuracy:", acc_train)

In [None]:
%%time
y_guess = list()
for data, label in minibatch_from(x_test, y_test):
    pred = tf.argmax(model,1)
    output = sess.run(pred, feed_dict={X: data})
    y_guess.append(output)
print("Accuracy: ", sum(np.concatenate(y_guess) == y_test)/float(len(y_test)))

The model does slightly better with an accuracy of 75.5.

## Model 3 
For our third model we will add a 3rd convolution block. This will be made up of 3 convolution layers each with 256 filters each. Again we will have a max pooling block at the end.

In [None]:
%%time
tf.reset_default_graph()
# Place-holders
X = tf.placeholder(tf.float32, shape=[None, 3, 32, 32])
y = tf.placeholder(tf.int32, shape=[None])

In [None]:
# Block 1
conv1_1 = tf.layers.conv2d(X, 
                           filters=64, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv1_2 = tf.layers.conv2d(conv1_1, 
                           filters=64, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
pool1_1 = tf.layers.max_pooling2d(conv1_2, 
                                  pool_size=(2, 2), 
                                  strides=(2, 2), 
                                  padding='valid', 
                                  data_format=data_format)
# Block 2
conv2_1 = tf.layers.conv2d(pool1_1, 
                           filters=128, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv2_2 = tf.layers.conv2d(conv2_1, 
                           filters=128, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
pool2_1 = tf.layers.max_pooling2d(conv2_2, 
                                  pool_size=(2, 2), 
                                  strides=(2, 2), 
                                  padding='valid', 
                                  data_format=data_format)

# Block 3
conv3_1 = tf.layers.conv2d(pool2_1, 
                           filters=256, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv3_2 = tf.layers.conv2d(conv3_1, 
                           filters=256, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv3_3 = tf.layers.conv2d(conv3_2, 
                           filters=256, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
pool3_1 = tf.layers.max_pooling2d(conv3_3, 
                                  pool_size=(2, 2), 
                                  strides=(2, 2), 
                                  padding='valid', 
                                  data_format=data_format)

relu2 = tf.nn.relu(pool3_1)
flatten = tf.reshape(relu2, shape=[-1, 256*4*4])
fc1 = tf.layers.dense(flatten, 4096, activation=tf.nn.relu)
fc2 = tf.layers.dense(fc1, 4096, activation=tf.nn.relu)
model = tf.layers.dense(fc2, N_CLASSES, name='output')

In [None]:
%%time
train_model = init_model_training(model, y)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

# Accuracy logging
correct = tf.nn.in_top_k(model, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [None]:
%%time
# Train model
for j in range(EPOCHS):
    for data, label in minibatch_from(x_train, y_train, shuffle=True):
        sess.run(train_model, feed_dict={X: data, y: label})
    # Log
    acc_train = sess.run(accuracy, feed_dict={X: data, y: label})
    print(j, "Train accuracy:", acc_train)

In [None]:
%%time
y_guess = list()
for data, label in minibatch_from(x_test, y_test):
    pred = tf.argmax(model,1)
    output = sess.run(pred, feed_dict={X: data})
    y_guess.append(output)
print("Accuracy: ", sum(np.concatenate(y_guess) == y_test)/float(len(y_test)))

The model reaches a accuracy of 76%. As you can see with each additional layer we get better results but the returns diminish with each succesive block.

## Model 4
Due to the large number of free parameters CNNs can benefit from regularisation. One way to refularise is to use a dropout layer which we talked about earlier. This layer will randomly during the forward pass zero a certain proportion of its outputs. This was also eployed by the authors of the VGG architecture.

In [None]:
%%time
tf.reset_default_graph()
# Place-holders
X = tf.placeholder(tf.float32, shape=[None, 3, 32, 32])
y = tf.placeholder(tf.int32, shape=[None])
training = tf.placeholder(tf.bool)  # Indicator for dropout layer

In [None]:
# Block 1
conv1_1 = tf.layers.conv2d(X, 
                           filters=64, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv1_2 = tf.layers.conv2d(conv1_1, 
                           filters=64, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
pool1_1 = tf.layers.max_pooling2d(conv1_2, 
                                  pool_size=(2, 2), 
                                  strides=(2, 2), 
                                  padding='valid', 
                                  data_format=data_format)
# Block 2
conv2_1 = tf.layers.conv2d(pool1_1, 
                           filters=128, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv2_2 = tf.layers.conv2d(conv2_1, 
                           filters=128, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
pool2_1 = tf.layers.max_pooling2d(conv2_2, 
                                  pool_size=(2, 2), 
                                  strides=(2, 2), 
                                  padding='valid', 
                                  data_format=data_format)

# Block 3
conv3_1 = tf.layers.conv2d(pool2_1, 
                           filters=256, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv3_2 = tf.layers.conv2d(conv3_1, 
                           filters=256, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
conv3_3 = tf.layers.conv2d(conv3_2, 
                           filters=256, 
                           kernel_size=(3, 3), 
                           padding='same', 
                           data_format=data_format,
                           activation=tf.nn.relu)
pool3_1 = tf.layers.max_pooling2d(conv3_3, 
                                  pool_size=(2, 2), 
                                  strides=(2, 2), 
                                  padding='valid', 
                                  data_format=data_format)

relu2 = tf.nn.relu(pool3_1)
flatten = tf.reshape(relu2, shape=[-1, 256*4*4])
fc1 = tf.layers.dense(flatten, 4096, activation=tf.nn.relu)
drop1 = tf.layers.dropout(fc1, 0.5, training=training)
fc2 = tf.layers.dense(drop1, 4096, activation=tf.nn.relu)
drop2 = tf.layers.dropout(fc2, 0.5, training=training)
model = tf.layers.dense(drop2, N_CLASSES, name='output')

In [None]:
%%time
train_model = init_model_training(model, y)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

# Accuracy logging
correct = tf.nn.in_top_k(model, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [None]:
%%time
# Train model
for j in range(EPOCHS):
    for data, label in minibatch_from(x_train, y_train, shuffle=True):
        sess.run(train_model, feed_dict={X: data, y: label, training: True})
    # Log
    acc_train = sess.run(accuracy, feed_dict={X: data, y: label, training: True})
    print(j, "Train accuracy:", acc_train)

In [None]:
%%time
y_guess = list()
for data, label in minibatch_from(x_test, y_test):
    pred = tf.argmax(model,1)
    output = sess.run(pred, feed_dict={X: data, training: False})
    y_guess.append(output)
print("Accuracy: ", sum(np.concatenate(y_guess) == y_test)/float(len(y_test)))

We can see that our accuracy has increased further to 80%. The VGG architecture actually has even more layers than our final model but it was designed to tackle the ImageNet dataset which contains a lot more data than the CIFAR10 dataset. Adding further layers with the limited data available would quickly prove untenable. 