In [1]:
import tensorflow as tf #tensorflow version 1.15

In [2]:
from tensorflow.examples.tutorials.mnist import input_data

In [3]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot = True) #Loading the data and doing one-hot encoding

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [18]:
#Instead of assigning values directly in architecture code, we'll declare constants, so that we can play with different 
#values for different parameters

#input image is a 1d array of 784 element we'll convert into a 2d array having 28X28X1 dimensions. Thus, channels = 1
input_width = 28
input_height = 28
input_channels = 1
input_pixels = 784

n_conv1 = 32 #number of units in 1st convolutional layer
n_conv2 = 64 #number of units in 2nd convolutional layer
stride_conv1 = 1
stride_conv2 = 1
k_conv1 = 5 #5X5 filter in 1st convolutional layer
k_conv2 = 5 #5X5 filter in 2nd convolutional layer
max_pool1_k = 2 #pool_size = 2 in 1st pooling layer
max_pool2_k = 2 #pool_size = 2 in 2nd pooling layer

input_size_to_hidden = (input_width//(max_pool1_k*max_pool2_k)) * (input_height//(max_pool1_k*max_pool2_k)) * n_conv2 #dimension of input coming into the hidden layer
n_hidden = 1024 #number of units in dense layer
n_out = 10 #number of units in the output layer

In [5]:
#initializing weights
weights = {
    "wc1" : tf.Variable(tf.random_normal([k_conv1, k_conv1, input_channels, n_conv1])),
    "wc2" : tf.Variable(tf.random_normal([k_conv2, k_conv2, n_conv1, n_conv2])),
    "wh1" : tf.Variable(tf.random_normal([input_size_to_hidden, n_hidden])),
    "wo" : tf.Variable(tf.random_normal([n_hidden, n_out]))
}

#initializing biases
biases = {
    "bc1" : tf.Variable(tf.random_normal([n_conv1])),
    "bc2" : tf.Variable(tf.random_normal([n_conv2])),
    "bh1" : tf.Variable(tf.random_normal([n_hidden])),
    "bo" : tf.Variable(tf.random_normal([n_out]))
}

In [19]:
#convolution function that will take in the image, weights, baises and the stride for filter, and gives us the transformed
#image
def conv(x, weights, bias, strides = 1):
    out = tf.nn.conv2d(x, weights, padding = "SAME", strides = [1, strides, strides, 1])#strides is an interesting argument 
    #as it requires a list[no_of_e.gs, stride_through_height, stride_through_width, stride_through_depth] of the same length
    # as the number of values in x. And the dimension of x is given by: 
    #no_of_images_in_a_batch X height X width X no_of_channels. Thus, we pass 4 numbers into the strides argument.
    #These 4 numbers basically represent by how much you want to move the filter.
    #But, why do we take the 1st number as 1 and not as 100(in our case)? Because, we don't want to combine the images
    #we want to move the filter according to stride through one image at a time, and we don't want to skip any image.
    #Why do we keep the 4th number as 1? Because we don't want to move the filter along the depth, as that won't even
    #happen acually, as we have taken our filter to be 3d as well having it's depth equal to the number of channels in an 
    #image.
    #Now, we need to add the biases separately as TensorFlow's con2d doesn't include that. There's a special function
    #in TensorFlow to add biases rather than using tf.nn.add(out, bias). This special function is different than tf.nn.add
    #such that it allows us to add different kinds of dimensions,different kinds of types of vectors.
    out = tf.nn.bias_add(out, bias)
    #TensorFlow also allows us to use activation function  in convolution layer as well. We'll use RELU
    out = tf.nn.relu(out)
    return out

#Now writing the max pooling function, that takes i/p from the convolution layer before it and uses max pooling according
#to the pooling size and reduces the dimensions of the image.
def maxpooling(x, k = 2): #k is the pool_size
    return tf.nn.max_pool(x, padding = "SAME", ksize = [1, k, k, 1], strides = [1, k, k, 1])
    #where ksize = [pool_size_across_images(we kept 1 as we want to have our pool's size sufficient to 1 pooling 1 image),
    # height_of_pool, width_of_pool, depth_of_pool], and strides list format is same, except here we're talking about
    #striding of max-pool and not a filter. We can keep strides value same mostly, or even different than stride of filter.
    
    
#Notice that  if we change the padding in conv() & maxpooling() to “valid”, then we need to make changes in our code at:
#weights of the hidden layer as well as we need to reshape before feeding into the hidden layer.

#Notice that in maxpooling() if we keep tf.nn.max_pool(x, padding="SAME", ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1])
#and we have x say 28X28X1, then o/p would be 28X28X1, as whenever padding="SAME" and striding is done by 1, o/p remains 
#the same.
#and, for same x if we have tf.nn.max_pool(x, padding="valid", ksize=[1, 3, 3, 1], strides=[1, 3, 3, 1]), then o/p will be
#9X9X1 as through a 2d image we're doing 3X3 max-pooling and striding by 3X3. Thus, 28/3 X 28/3 X 1 = 9X9X1

In [20]:
#Forward propagation
def cnn(x, weights, biases, keep_prob):#it will require the inputs, weights and biases and probability of keeping unit
    #when they will go through the dropout layer.
    #i/p from MNIST is an image represented as 1D array consisting of 784 elements, also, there isn't a single image we will
    #get multiple images, and as we'll be using batch system we would have a batch size (how many images in a batch) say 550
    #or let's take it as 100. So in a batch we'll be having 100 images of 784 pixels. We want the image to be represented as
    #a 2D array, so we'll reshape 100X784 into 100X28X28X1 (1 because we have number of channels as 1 here, it might be 
    #another number in another case so we want to include that while reshaping)
    x = tf.reshape(x, shape = [-1, input_height, input_width, input_channels]) #we know the three values but not the 4th one 
    #among reshape dimensions and we don't even need to know, as we're using reshape and we mentioned there -1, it will 
    #infer it.
     
    #The next step is to pass it to the convolutional layer1, we'll be using the same tensorflow code, but will be
    #combining a few lines
    conv1 = conv(x, weights['wc1'], biases['bc1'], stride_conv1) #getting o/p of convolutional layer1
    conv1_pool = maxpooling(conv1, max_pool1_k) #getting o/p of max pooling layer1
    conv2 = conv(conv1_pool, weights['wc2'], biases['bc2'], stride_conv2) #getting o/p of convolutional layer2
    conv2_pool = maxpooling(conv2, max_pool2_k) #getting o/p of max pooling layer2
    
    #Now the next layer is the dense layer, but, before passing forward to we need to reshape it into 1D array, as discussed
    #in the notes
    hidden_input = tf.reshape(conv2_pool, shape = [-1, input_size_to_hidden]) #Here we don't need to explicitly mention the
    #three remaining dimensions as we have already calculated their product and stored in input_size_to_hidden
    #Now's the time to get the output from the dense layer. 
    #But, before applying the activation function we need to do the weighted summation and add biases as well. WX+B
    hidden_output_before_activation = tf.add(tf.matmul(hidden_input, weights['wh1']), biases['bh1'])
    #Now getting the final o/p from the hidden layer by applying the RELU activation function (before it is passed to 
    #dropout layer.)
    hidden_output_before_dropout = tf.nn.relu(hidden_output_before_activation)
    #Output of hidden layer that passes through dropout layer
    hidden_output = tf.nn.dropout(hidden_output_before_dropout, keep_prob)
    
    #Getting the o/p from the o/p layer. We are using the identity activation function i.e. no activation function in the
    #o/p layer, just the weighted summation
    output = tf.add(tf.matmul(hidden_output, weights['wo']), biases['bo'])
    return output

In [21]:
#Now our CNN is ready
#Now we have to declare the x and y and then do the predictions using our CNN forward propagation
#For this, the code is exactly same as that we wrote for mnist in ANN, just the names of variables differ.
x = tf.placeholder("float", [None, input_pixels])
y = tf.placeholder(tf.int32, [None, n_out])
keep_prob = tf.placeholder("float", )
pred = cnn(x, weights, biases, keep_prob)

In [22]:
#Now let's get the cost. Same code as in ANN, but here we'll use _v2 at end to stop deprecation warning
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = pred, labels = y))

In [24]:
#Now we have to do optimization. Same code copy-pasted from ANN.
optimizer = tf.train.AdamOptimizer(learning_rate = 0.01)
optimizer = optimizer.minimize(cost)

In [25]:
#Now we need to initialize the variables
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [None]:
#Now we just need the code which will run the optimization code multiple times
#copy-pasted as well (from ANN MNIST)
batch_size = 100
for i in range(25):
    num_batches = int(mnist.train.num_examples/batch_size)
    total_cost = 0
    for j in range(num_batches):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        c, _ = sess.run([cost, optimize], feed_dict = {x : batch_x, y : batch_y, keep_prob : 0.8})
        total_cost += c
    print(total_cost)
#NOTE: This will take a LLLOOOOTTTTTT of time to run.

In [None]:
#code to find accuracy on our test data
#copy-pasted as well (from ANN MNIST)
predictions = tf.argmax(pred, 1)
correct_labels = tf.argmax(y, 1)
correct_predictions = tf.equal(predictions, correct_labels)
predictions, correct_preds = sess.run([predictions, correct_predictions], feed_dict = {x : mnist.test.images, y : mnist.test.labels, keep_prob : 1.0})
correct_preds.sum()
#Remember: always have keep_prob:1 in case of testing data, also in training if you don't want dropout there too.