# building an MNIST classifier using a convolutional NN in Tensorflow

Importing some libraries, also the mnist dataset is preprocessed in the tensorflow tutorial libraries.

In [1]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

import tensorflow as tf
from time import time

Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz


 Defining some functions we will be using later on.

In [2]:
# predefined functions to add noise to initial values of weights and biases
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


# predefined conv and pool layers with stride and padding set
def conv2d(x, W): 
    return tf.nn.conv2d(x, filter=W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


def validation_accuracy(loaded_mnist, accuracy, x, y_, batch_size=64):
    """ A function that calculates the accuracy on the validation data."""
    batch_num = int(loaded_mnist.test.num_examples / batch_size)
    test_accuracy = 0
    
    for i in range(batch_num):
        batch = loaded_mnist.test.next_batch(batch_size)
        test_accuracy += accuracy.eval(feed_dict={x: batch[0],
                                                  y_: batch[1]})

    test_accuracy /= batch_num
    return test_accuracy


def countParameters():
    """ Counts all the trainable parameters in the current graph. Used for task 3."""
    total = 0;
    for variable in tf.trainable_variables():
        shape = variable.get_shape()
        variable_parameters = 1
        for dimension in shape:
            variable_parameters *= dimension.value
        total += variable_parameters
    return total

Let's now build the actual convolutional net in a way that enablers us to quickly change and test different hyperparameters.

In [3]:
def convNN(x, dev, num_filters=16):
    """Returns the output-graph for an convultional NN with following architecture:
    2 convolutional layers with num_filters filters, each a size of 3x3 and stride of 1.
    Relu activations, max pooling and a fully connected layer at the end with softmax cross
    entropy loss calculation."""
    with tf.device(dev):
        num_filters_const = tf.constant(num_filters)
    
    # reshape images
    with tf.device(dev), tf.name_scope('reshape'):
        x_image = tf.reshape(x, [-1, 28, 28, 1])


    # applying convolution, bias and activation
    with tf.device(dev), tf.name_scope('conv1'):
        W_conv1 = weight_variable([3, 3, 1, num_filters_const])
        b_conv1 = bias_variable([num_filters])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)


    # max pool layer 2
    with tf.device(dev), tf.name_scope('pool2'):
        h_pool1 = max_pool_2x2(h_conv1)


    # applying convolution, bias and activation
    with tf.device(dev), tf.name_scope('conv2'):
        W_conv2 = weight_variable([3, 3, num_filters_const, num_filters_const])
        b_conv2 = bias_variable([num_filters])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
        
    # max pool layer 2
    with tf.device(dev), tf.name_scope('pool2'):
        h_pool2 = max_pool_2x2(h_conv2)



    # flatten input and feed forward
    with tf.device(dev), tf.name_scope('fcl1'):
        # fully connected layer
        W_fc1 = weight_variable([7*7*num_filters, 128])
        b_fc1 = bias_variable([128])
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*num_filters])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)


    # readout layer, fully connected
    with tf.device(dev), tf.name_scope('fcl2'):
        W_fc2 = weight_variable([128, 10])
        b_fc2 = bias_variable([10])
        y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2
        
    return y_conv

With all these tools, we can now build an easy way to run tests and check nets with different parameters with just one function call.

In [4]:
def testArchitecture(learning_rate=0.1, num_filters=16, device='gpu'):
    """ 
    Builds a conv NN with the given parameters and trains it, 
    calculating the number of trainable parameters and measuring 
    the runtime as well as the performance during training.
    """
    
    dev = '/GPU:0' if device == 'gpu' else '/cpu:0'
    print('Starting to train a convolutional NN with {} filters, learning rate {}, using the {}.'.format(num_filters, learning_rate, device))
    
    with tf.device(dev):
        # Placeholder for input
        x = tf.placeholder(tf.float32, [None, 784])

        # Placeholder for predicted output
        y_ = tf.placeholder(tf.float32, [None, 10])
    
    # build a net with the given parameters
    y = convNN(x, dev, num_filters)
    
    with tf.device(dev), tf.name_scope('loss'):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
        cross_entropy = tf.reduce_mean(cross_entropy)
    tf.summary.scalar('cross_entropy', cross_entropy)
    
    with tf.device(dev), tf.name_scope("SGD"):
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
        
    with tf.device(dev), tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
        accuracy = tf.reduce_mean(correct_prediction)
        
    tf.summary.scalar('accuracy', accuracy)
    merged_summary_op = tf.summary.merge_all()
    
    with tf.device(dev), tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter('C:/Users/Kai/Desktop/Logs/' 
                                       + str(learning_rate)
                                       + '/' + str(num_filters)
                                       + '/' + device, graph=sess.graph)
        start_time = time()
        print("trainable parameters: " + str(countParameters()))
        for epoch in range(2000):
            batch = mnist.train.next_batch(50)
            batch_val = mnist.validation.next_batch(50)
            if epoch % 500 == 0:
                val_accuracy = validation_accuracy(mnist, accuracy, x, y_)
                print('step {}, validation accuracy {:.2f}%'.format(epoch, val_accuracy*100), end='\n')
    
            with tf.device(dev):
                train_step.run(feed_dict={x: batch[0], y_: batch[1]})

            # collect data for summary
            summary_str = sess.run(merged_summary_op, feed_dict={x: batch[0], y_: batch[1]})
            writer.add_summary(summary_str, epoch)
        print("time needed: " + str(time() - start_time))

Thats it, let's check out some different parameters!

In [5]:
for testing_learning_rate in [0.1, 0.01, 0.001, 0.0001]:
    testArchitecture(learning_rate=testing_learning_rate, num_filters=16, device='gpu')
    tf.reset_default_graph()

for number_of_filters in [8, 16, 32, 64, 128, 256]:
    testArchitecture(learning_rate=0.1, num_filters=number_of_filters, device='gpu')
    tf.reset_default_graph()

    
for number_of_filters in [8, 16, 32, 64]:
    testArchitecture(learning_rate=0.1, num_filters=number_of_filters, device='cpu')
    tf.reset_default_graph()

Starting to train a convolutional NN with 16 filters, learning rate 0.1, using the gpu.
trainable parameters: 104250
step 0, validation accuracy 8.18%
step 500, validation accuracy 96.04%
step 1000, validation accuracy 97.40%
step 1500, validation accuracy 98.05%
time needed: 35.28899097442627
Starting to train a convolutional NN with 16 filters, learning rate 0.01, using the gpu.
trainable parameters: 104250
step 0, validation accuracy 10.09%
step 500, validation accuracy 86.67%
step 1000, validation accuracy 91.49%
step 1500, validation accuracy 93.18%
time needed: 32.38352966308594
Starting to train a convolutional NN with 16 filters, learning rate 0.001, using the gpu.
trainable parameters: 104250
step 0, validation accuracy 11.87%
step 500, validation accuracy 18.30%
step 1000, validation accuracy 36.12%
step 1500, validation accuracy 49.51%
time needed: 33.02908277511597
Starting to train a convolutional NN with 16 filters, learning rate 0.0001, using the gpu.
trainable parameter