# Fetching the Dataset:

In [48]:
import tensorflow as tf

In [63]:
from tensorflow.examples.tutorials.mnist import input_data
mnist=input_data.read_data_sets("MNIST_data/",one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [79]:
set(mnist.train.images[0])

{0.0,
 0.015686275,
 0.019607844,
 0.050980397,
 0.07058824,
 0.08235294,
 0.09019608,
 0.098039225,
 0.121568635,
 0.13333334,
 0.14509805,
 0.14901961,
 0.16078432,
 0.18823531,
 0.20000002,
 0.22352943,
 0.2392157,
 0.24313727,
 0.26666668,
 0.27058825,
 0.29411766,
 0.3019608,
 0.32156864,
 0.32941177,
 0.3372549,
 0.34901962,
 0.3529412,
 0.37647063,
 0.3803922,
 0.4156863,
 0.4431373,
 0.45098042,
 0.45882356,
 0.46274513,
 0.4666667,
 0.48627454,
 0.5019608,
 0.5411765,
 0.54509807,
 0.54901963,
 0.5568628,
 0.6156863,
 0.6509804,
 0.65882355,
 0.6627451,
 0.6901961,
 0.73333335,
 0.7411765,
 0.74509805,
 0.7803922,
 0.7843138,
 0.8078432,
 0.81568635,
 0.8235295,
 0.8352942,
 0.8431373,
 0.8588236,
 0.86274517,
 0.8705883,
 0.8745099,
 0.8862746,
 0.89019614,
 0.8941177,
 0.9058824,
 0.9176471,
 0.9215687,
 0.9333334,
 0.93725497,
 0.94117653,
 0.9450981,
 0.9490197,
 0.95294124,
 0.9607844,
 0.96470594,
 0.9725491,
 0.9803922,
 0.9843138,
 0.9921569,
 0.9960785}

# Initializing Weights and Biases

In [50]:
#intializing all the constant parameters
input_width=28
input_height=28
input_channels=1                            #input image is 28*28*1
input_pixels=784

n_conv1=32                                  #no. of units in conv layer 1
n_conv2=64

conv1_k=5                                   #filter size for conv layer 1
conv2_k=5

conv1_stride=1                              #stride for conv layer 1
conv2_stride=1

pool1_k=2                                   #pool size for pooling layer 1
pool2_k=2

n_hidden=1024                               #no. of units in hidden layer
n_output=10                                 #no. of units in output layer(there are 10 classes)

input_size_to_hiddenlayer=(input_height//(pool1_k*pool2_k))*(input_width//(pool1_k*pool2_k))*n_conv2
#this represents the input size going to hidden layer(from pooling layer 2). input_height and
#input_width are getting divided by 2 twice(once for pooling layer 1 once for PL2)

In [51]:
weights={"c1":tf.Variable(tf.random_normal([conv1_k,conv1_k,input_channels,n_conv1])),
         "c2":tf.Variable(tf.random_normal([conv2_k,conv2_k,n_conv1,n_conv2])),
         "h":tf.Variable(tf.random_normal([input_size_to_hiddenlayer,n_hidden])),  
         "o":tf.Variable(tf.random_normal([n_hidden,n_output]))
         }
bias={"c1":tf.Variable(tf.random_normal([n_conv1])),
      "c2":tf.Variable(tf.random_normal([n_conv2])),
      "h":tf.Variable(tf.random_normal([n_hidden])),
      "o":tf.Variable(tf.random_normal([n_output]))
     }
#we are going to take biases for all layers(except pooling layers obviously)

# Forward Propagation:

In [52]:
def convolution(x,weights,bias,stride=1):
    
    #inbuilt function which gives output of convoultion layer given the filter(weights)
    
    conv_output=tf.nn.conv2d(x,weights,padding="SAME",strides=[1,stride,stride,1])  
    
    #the arguments of strides is a list, the first element is 1 if u don't want
    #to skip any image(no striding over images, basically stride for images=1),
    #we also don't want striding over channels, so last argument is also 1 2nd 
    #argument represents the stride along height and the 3rd argument represent
    #the stride along width.
    
    #inbuilt function to add biases
    conv_output=tf.nn.bias_add(conv_output,bias)
    
    #applying activation function:
    conv_output=tf.nn.relu(conv_output)
    
    return conv_output

In [53]:
def pooling(x,k=2):                                                                 #K is pool size
    return tf.nn.max_pool(x,padding="SAME",ksize=[1,k,k,1],strides=[1,k,k,1])       #just ike conv2d arguments,ksize=window size

In [54]:
def cnn(x,weights,biases,keep_prob):
    #images we get will be 784*1, we need to convert it to 28*28*1(1 represents the no. of channels)
    x=tf.reshape(x,shape=[-1,input_height,input_width,input_channels])  #first argument of shape means x.shape[0] (no. of images)
    
    x_conv1=convolution(x,weights["c1"],bias["c1"],conv1_stride)#this will give us the images after passing through conv layer 1
    x_pool1=pooling(x_conv1,pool1_k)                            #images after passing through pooling layer 1
    x_conv2=convolution(x_pool1,weights["c2"],bias["c2"],conv2_stride)#this will give us the images after passing through conv layer 2
    x_pool2=pooling(x_conv2,pool2_k)                            #images after passing through pooling layer 2
    
    x_hidden=tf.reshape(x_pool2,shape=[-1,input_size_to_hiddenlayer])        #reshaping images to 1d array for hidden layer
    
    #Adding the dropout layer:
    x_output_before_dropout_layer=tf.nn.relu(tf.add(tf.matmul(x_hidden,weights["h"]),bias["h"])) #applying relu activation function for hidden layer
    x_output=tf.nn.dropout(x_output_before_dropout,keep_prob)                                                                                    #after applying dropout layer
    
    output=tf.add(tf.matmul(x_output,weights["o"]),bias["o"])               #identity activation function
    return output

# Optimizing:

In [55]:
ses=tf.Session()
#ses.run(tf.global_variables_initializer())         
#if u run global_variables intializer her, it would result in 
#an error later on as u are only initializing global variables 
#before this cell, however, AdamOptimizer has its own Variables, 
#which also need to be intialized

In [56]:
x=tf.placeholder("float",[None,input_pixels])        #we give placeholders so that later on we can put any desired 
y=tf.placeholder(tf.int32,[None,n_output])           #input here(like train data or test data or validation data)

keep_prob=tf.placeholder("float")                    
#we are making keep_prob a placeholder as we only want to 
#dropout during training, but during testing, we don't want 
#any dropout(thus during training, we will giv keep_prob a 
#value say 0.8 and during testing we will keep it 1.0(so that 
#all testing data passes). This will also allow us to change 
#the keep_prob for trainig later on in the code if we want)

pred=cnn(x,weights,bias,keep_prob)

In [57]:
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))      
#_v2 is put because of deprecation warning(see by removing _v2)(don't worry if u don't understand)

In [58]:
optimizer=tf.train.AdamOptimizer(learning_rate=0.01)
optimize=optimizer.minimize(cost)

In [60]:
ses.run(tf.global_variables_initializer())

In [61]:
#Optimizing using batch gradient descent:        #this will take some hours as there are many weights
batch_size=100
for iter in range(100):
    num_batches=int(mnist.train.images.shape[0]/batch_size)
    total_cost=0
    for j in range(num_batches):
        batch_x,batch_y=mnist.train.next_batch(batch_size)
        c, _ = ses.run([cost,optimize], feed_dict={x:batch_x,y:batch_y,keep_prob:0.8})
        total_cost+=c
    print(total_cost)

760527.8340978622
24023.986181051732
14825.647005804683
10198.983971779413
10155.817009843886
10458.878803368973
7581.631994838268
7802.71980526032
7925.24695124953
7315.492102293252
5292.536300963599
5828.928874166671
4165.717502967664
3468.582054795027
4388.864816717729
3071.01848409013
2361.3068031206667
3928.6884337277324
2873.775004928157
2021.5280072032153
3314.2890032692185
2824.3441081284814
2643.750886018076
1760.6566221107475
2039.6279076071355
2036.9504055351686
2582.3366525556144
1885.7656999756719
1731.7163763058952
2010.0390537490184
1742.7571859448872
1511.603920751379
1993.1389727053386
1639.5083270730424
2354.685860394624
1583.1375458155603
1731.2513395837304
2144.006169395725
1602.1907064692305
1723.8053895546857
1556.2196072489023
1897.1282285518387
1570.9283383473753
1692.176312802927
2579.4086716920137
1776.7032322727364
1215.4996296328306
1902.2291058024753
1696.9001075863443
1820.1481824219227
1675.3447245144844
1541.4318955496208
1595.4300154982984
2169.61753951

In [62]:
predictions=tf.argmax(pred,axis=1)
true=tf.argmax(y,axis=1)
correct_pred=tf.equal(predictions,true)

y_pred,y_test,correct_predictions=ses.run([predictions,true,correct_pred],feed_dict={x:mnist.test.images,y:mnist.test.labels
                                                                                    ,keep_prob=1.0})

correct_predictions.sum()

#see that 9894/10000 predicitions are correct

9894