# The aim of this notebook is to implement a CNN in tensor flow

First let's create a simple CNN layer as shown below:
![](figures/figure7.jpg)
Even though we won't run it, this will help us understanding the concept of a CNN

In [2]:
# Library imports
import tensorflow as tf

In [4]:
# Set the structure
image_width = 32
image_height = 32
input_channels = 3
kernel_width = 3
kernel_height = 3
k_kernels = 20
vertical_stride = 2
horizontal_stride = 2

x = tf.placeholder(tf.float32, shape=[None, image_height, image_width, input_channels])
weight = tf.Variable(tf.truncated_normal([kernel_height, kernel_width, input_channels, k_kernels]))
bias = tf.Variable(tf.zeros(k_kernels))
conv_layer = tf.nn.conv2d(x, filter=weight, strides=[1, vertical_stride, horizontal_stride, 1], padding='SAME')
conv_layer = tf.nn.bias_add(conv_layer, bias)
conv_layer = tf.nn.relu(conv_layer) # RELU activation function

![](figures/figure8.jpg)

In [5]:
#Applying a maxpool layer to the above structure
conv_layer = tf.nn.max_pool(conv_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

![](figures/figure9.jpg)

# Implementing a LeNet CNN network to solve Mnist

We will use a LeNet architecture to predecit numbers in the Mnist layer. The LeNet network has the following architecture:

![](figures/figure10.jpg)



In [3]:
# Import libraries
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

In [2]:
# reading mnist data
mnist_data = input_data.read_data_sets('/datasets/ud730/mnist', one_hot=True, reshape=False)  
train_data = mnist_data.train.images.astype(np.float32)
test_data = mnist_data.test.images.astype(np.float32)
train_labels = mnist_data.train.labels.astype(np.float32)
test_labels = mnist_data.test.labels.astype(np.float32)

Extracting /datasets/ud730/mnist\train-images-idx3-ubyte.gz
Extracting /datasets/ud730/mnist\train-labels-idx1-ubyte.gz
Extracting /datasets/ud730/mnist\t10k-images-idx3-ubyte.gz
Extracting /datasets/ud730/mnist\t10k-labels-idx1-ubyte.gz


In [4]:
#Visualize shape of data
print(f"training dataset size: {train_data.shape}")
print(f"testing dataset size: {test_data.shape}")
print(f"labels trainig dataset size: {train_labels.shape}")
print(f"labels testing dataset size: {test_labels.shape}")

training dataset size: (55000, 28, 28, 1)
testing dataset size: (10000, 28, 28, 1)
labels trainig dataset size: (55000, 10)
labels testing dataset size: (10000, 10)


In [5]:
def conv_layer(input_layer, weights, bias, stride=[1,1,1,1], pad='SAME', name=None):
    """
        This function aims to return a convolution layer based on the inputs given
        
        @input input_layer: input features maps or pixels
        @input weights: weights to perform the convolution
        @input bias: bias to perform the convolution
        @input stride: stride to perform in the order [batch, vertical_stride, horizontal_stride, 
                                                                                channels or dept strides]
        @input pad: could be SAME or VALID
        
        @output convLayer: convolution layer with a RELU activation funtion at the end
    """
    if name==None:
        convLayer = tf.nn.conv2d(input_layer, filter=weights, strides=stride, padding=pad)
    else:
        convLayer = tf.nn.conv2d(input_layer, filter=weights, strides=stride, padding=pad, name=name)
    convLayer = tf.nn.bias_add(convLayer, bias)
    convLayer = tf.nn.relu(convLayer) # RELU activation function
    return convLayer

In [6]:
def pooling_layer(input_layer, filter_size = [1, 2, 2, 1], stride = [1, 2, 2, 1], pad = 'SAME', name=None):
    """
        This function performs a pooling to the input layer based on the parameters below
        
        @input input_layer: input feature maps or pixels
        @input filter_size: dimensions of the kernel to perform pooling [batch, filter_height, filter_width, 
                                                                                                      filter_depth]
        @input stride: stride to perform in the order [batch, vertical_stride, horizontal_stride, 
                                                                                channels or dept strides]                                                                                
        @input pad: could be SAME or VALID
        
        @output pool_layer: pooling layer result structure.
    """
    if name==None:
        pool_layer = tf.nn.max_pool(input_layer, ksize=filter_size, strides=stride, padding= pad)
    else:
        pool_layer = tf.nn.max_pool(input_layer, ksize=filter_size, strides=stride, padding= pad, name=name)
    return pool_layer

### building the architecture

In [26]:
## Parameters
learning_rate = 0.001
training_epochs = 10
batch_size = 128  # Decrease batch size if you don't have enough memory
display_step = 1

n_input = train_data.shape[1:4]  # MNIST data input (img shape: 28x28x1)
n_classes = train_labels.shape[1]  # MNIST total classes (0-9 digits)
test = tf.constant(3)
## architecture
# input layer
x = tf.placeholder(tf.float32, shape=[None] + list(n_input))
# labels
y = tf.placeholder("float", [None, n_classes])
# weights for conv layer 1 and 2 
weights = {'convLayer 1': tf.Variable(tf.truncated_normal([5,5,1,6], stddev=0.1),name="convLayer1_weights"), 
           'convLayer 2': tf.Variable(tf.truncated_normal([5,5,6,16], stddev=0.1),name="convLayer1_weights")}
bias = {'convLayer 1': tf.Variable(tf.zeros([6])), 
           'convLayer 2': tf.Variable(tf.zeros([16]))}
conv1 = conv_layer(x, weights['convLayer 1'], bias=bias['convLayer 1'])
pool1 = pooling_layer(conv1)
conv2 = conv_layer(pool1, weights['convLayer 2'], bias['convLayer 2'], pad='VALID')
pool2 = pooling_layer(conv2)
flatten_layer = tf.contrib.layers.flatten(pool2)
FC1 = tf.add(tf.matmul(flatten_layer, tf.Variable(tf.truncated_normal([400, 120], stddev=0.1))),
             tf.Variable(tf.zeros([120])))
FC1 = tf.nn.relu(FC1)
FC2 = tf.add(tf.matmul(FC1, tf.Variable(tf.truncated_normal([120, 84], stddev=0.1))),
             tf.Variable(tf.zeros([84])))
FC2 = tf.nn.relu(FC2)
logits = tf.add(tf.matmul(FC2, tf.Variable(tf.truncated_normal([84, 10], stddev=0.1))),
                tf.Variable(tf.zeros([10])))

# Loss function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
# Optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

## ******************************************************************
## Test model
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

In [27]:
# Launch the graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # Training cycle
    for epoch in range(training_epochs):
        total_batch = int(mnist_data.train.num_examples/batch_size)
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist_data.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
        # Display loss per epoch step
        if epoch % display_step == 0:
            c = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
            train_score = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            print(f"Epoch: {epoch+1} - cost= {c}, training_accuracy: {train_score}")
    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
    # Calculate accuracy
    accuracy = sess.run(accuracy, feed_dict={x: mnist_data.test.images, y: mnist_data.test.labels})
    # Decrease test_size if you don't have enough memory
    print(f"Test Accuracy:{accuracy}" )

Epoch: 1 - cost= 0.12936906516551971, training_accuracy: 0.953125
Epoch: 2 - cost= 0.0936274304986, training_accuracy: 0.9765625
Epoch: 3 - cost= 0.052664242684841156, training_accuracy: 0.9765625
Epoch: 4 - cost= 0.04286818578839302, training_accuracy: 0.9921875
Epoch: 5 - cost= 0.02058977633714676, training_accuracy: 0.9921875
Epoch: 6 - cost= 0.008332573808729649, training_accuracy: 1.0
Epoch: 7 - cost= 0.0058052497915923595, training_accuracy: 1.0
Epoch: 8 - cost= 0.006394656375050545, training_accuracy: 1.0
Epoch: 9 - cost= 0.013510813936591148, training_accuracy: 0.9921875
Epoch: 10 - cost= 0.010665135458111763, training_accuracy: 1.0
Optimization Finished!
Test Accuracy:0.988099992275238


**Conclusion:** as you can see the CNN manages to perform way better than a traditional fully connected Network