# Convolutional Neural Networks :

> CNN's are slightly differently structured than standard neural nets, CNN's use a special layer called convolution layer which is used to detect edges and other features from the image. CNN's generally perform better than MLP's when it comes to image related tasks. CNN's architecture is different due to Convolution layer.
> CNN's also train faster than normal MLP due to optimizations done to it.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import tensorflow as tf
from tensorflow.python.client import device_lib 
from tqdm import tqdm # This will be used to get the time taken taken for each epoch.
print(device_lib.list_local_devices())

  from ._conv import register_converters as _register_converters


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7637396090542304645
]


In [None]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')

## LeNet architecture CNN :

> LeNet was the first CNN architecture that was discussed in the 19th century. This is one of the most simplest architecture which uses all the layers which can be added to a CNN network.

<img src="LeNet.jpg">

In [2]:
# Load the MNIST data on which we will train the LeNet
from tensorflow.examples.tutorials.mnist import input_data
import os
path = "FFNN/"
if not os.path.exists(path):
    os.makedirs(path)
    
mnist = input_data.read_data_sets(path, one_hot=True)

print(mnist.train.images.shape)
print(mnist.train.labels.shape)
print(mnist.test.images.shape)
print(mnist.test.labels.shape)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting FFNN/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting FFNN/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting FFNN/t10k-images-idx3-ubyte.gz
Extracting FFNN/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
(55000, 784)
(55000, 10)
(10000, 784)
(10000, 10)


In [23]:
# First lets define the parameters for the architecture and parameters used in training the LeNet model :
batch_size = 3000 # Number of samples used in training phase.
test_size = 3000 # Number of samples used in testing phase.
img_size = 28 # Image size of 28X28
num_classes = 10 # 0-9 digits classifictaion task.

In [4]:
# None represents n number of images, img_size 2 times represnts the shape of image, 1 is used for channels in image.
# Since we have a black and white image channel is 1 , if we had RGB images channel would have been 3.
# n channel basically means tensor has a depth of n i.e tensor size is img_sizeXimg_sizeXchannel_number.
X = tf.placeholder(tf.float32, [None, img_size, img_size, 1])
Y_true = tf.placeholder(tf.float32, [None, num_classes])

In [5]:
trX, trY, teX, teY = mnist.train.images, \
                     mnist.train.labels, \
                     mnist.test.images,  \
                     mnist.test.labels

In [6]:
# Now we need to reshape the images in the form of 28*28
trX = trX.reshape(-1, img_size, img_size, 1)
teX = teX.reshape(-1, img_size, img_size, 1)

#### Defining network weights :

In [7]:
def init_weights(shape):
    # we keep simple initialization.
    # Xavier, Glorot or He Normal can be used for initialization too.
    return tf.Variable(tf.random_normal(shape, stddev=0.01)) 

In [8]:
# According to the Image of our architecture we need a convolution layer of 3X3 with depth of 1 as image is of depth 1.
# So basically a Kernel Matrix which is used to get the features from the image is of size 3X3X1 is present for this layer.
# We are having 32 Convolutional Layers. so we either declare 32 - 3X3X1 matrices or declare one of size 3X3X1X32.
# Second method will be useful as all layers are in matrix together, so we can parallelize them.
w1 = init_weights([3, 3, 1, 32]) # shape is passed as parameter

# Note this layer will output 32 2d matrices so basically a 3d tensor with depth of 32 and width and height of 28-3+1 = 26

In [9]:
# We will add pooling layer later first lets just define the weights
w2 = init_weights([3, 3, 32, 64]) # Here depth is 32 as we recievce 32 from first conv layer and we output 64 this time.

In [10]:
w3 = init_weights([3, 3, 64, 128])

In [11]:
# Fourth layer is fully connected and hence the output is a weight vector which is recieved after flattening..
w4 = init_weights([128 * 4 * 4, 625]) # The numbers here will make more sense when we actually define the graph.

In [12]:
# The output layer will take in these 625 features and provide 10 output as we have 10 classes.
w_o = init_weights([625, num_classes])

In [13]:
# DropOut Values
p_keep_conv = tf.placeholder("float") 
p_keep_hidden = tf.placeholder("float")

In [18]:
# Defining a LeNet Copy function as we are trying to mimic LeNet architecture..
def LeNetCopy(X, w, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden):
    # tf.nn.conv2d adds a convolution layer..
    # the function expected the image which is to be convolved and kernel matrices, we defined our kernel matrices in w1.
    # Strides basically tell how much to move the kernel matrix after operating on part of the image.
    # Padding adds additional columns and rows to keep the shape of image same after convolving..
    conv1 = tf.nn.conv2d(X, w1, strides=[1,1,1,1], padding='SAME') # Keep the same dimensions of image after convoling
    # The output shape of the tensor obtained is calculated as follows, depth = number of kernel matrices i.e 32 defined in w1.
    # the number of rows and columns are calculated as n - k + 1 + 2p, here n = 28, k = 3 p is not givenbut.
    # We know that dimensions of output matrix has to be constant i.e 28.
    # So. n - k + 1 + 2p = 28 , solving gets us p = 1.
    # p = 1 means that padding added 1 row and 1 column to get the tensor row and column dimensions same after convolving oper.
    
    # After convolving we pass the output through a relu layer.
    conv1 = tf.nn.relu(conv1)
    
    # Now according to the architecture we have to pool the image.
    conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    # output = floor[(n-k+1)/s]+2p = 28. s = stride here s = 2 for both row and column
    conv1 = tf.nn.dropout(conv1, p_keep_conv)
    
    # similarly we build other layers
    conv2 = tf.nn.conv2d(conv1, w2, strides=[1, 1, 1, 1], padding='SAME')
    conv2 = tf.nn.relu(conv2)
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv2 = tf.nn.dropout(conv2, p_keep_conv)

    # There is no max-pool after layer 3.
    conv3 = tf.nn.conv2d(conv2, w3, strides=[1, 1, 1, 1], padding='SAME')
    conv3 = tf.nn.relu(conv3)
    
    # Fully connected layers are now added to the model ...
    FC_layer = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    # Reshape the Fully connected layer i.e flatten the image to get a vector 625 length.
    FC_layer = tf.reshape(FC_layer, [-1, w4.get_shape().as_list()[0]])
    # Apply dropout..
    FC_layer = tf.nn.dropout(FC_layer, p_keep_conv)
    
    # Similar to MLP.. not included bias here.
    output_layer = tf.nn.relu(tf.matmul(FC_layer, w4))
    output_layer = tf.nn.dropout(output_layer, p_keep_hidden)
    
    # Result is obtained after final weight matrix multiplication.
    result = tf.matmul(output_layer, w_o)
    return result

In [19]:
py_x = LeNetCopy(X, w1, w2, w3, w4, w_o, p_keep_conv, p_keep_hidden)
Y_ = tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y_true,logits=py_x)

In [20]:
cost = tf.reduce_mean(Y_)

In [21]:
optimizer = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)

In [22]:
predict_op = tf.argmax(py_x, 1)

In [27]:
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for i in tqdm(range(100)):
        # Get the training and test batch..
        training_batch =  zip(range(0, len(trX), batch_size), range(batch_size, len(trX)+1, batch_size))
        for start, end in training_batch:
            # Use Current train and test batch to train the model.
            sess.run(optimizer, feed_dict={X: trX[start:end],Y_true: trY[start:end],p_keep_conv: 0.8,p_keep_hidden: 0.5})
        
        test_indices = np.arange(len(teX)) 
        np.random.shuffle(test_indices)
        test_indices = test_indices[0:test_size]
        
        print(i, np.mean(np.argmax(teY[test_indices], axis=1) == sess.run(predict_op,feed_dict={X: teX[test_indices],\
                                     Y_true: teY[test_indices], \
                                     p_keep_conv: 1.0,\
                                     p_keep_hidden: 1.0})))


  0%|                                                                                          | 0/100 [00:00<?, ?it/s]

KeyboardInterrupt: 