In [None]:
import tensorflow as tf

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)
# one_hot allow labels to be in ont_hot_encoded format

In [None]:
# image dims
input_width = 28
input_height = 28

# image channels
input_channels = 1
# image total pixels
input_pixels = 784

# units in cl layers
n_conv1 = 32
n_conv2 = 64

# filter size -> consider to be square
conv1_k = 5
conv2_k = 5
# stride size 
stride1 = 1
stride2 = 1
# pooling layer size
max_pool1_k = 2
max_pool2_k = 2
# dense layer size
n_hidden = 1024
# no of classes
n_out = 10

input_size_to_hidden = ((input_width //(max_pool1_k*max_pool2_k))* (input_height //(max_pool1_k*max_pool2_k))*n_conv2)
# at the end image size will be  -> width and height divide by pool sizes and channel will be the unit of last cl layer

In [None]:
weights = {
    'wcl1' : tf.Variable(tf.random_normal([conv1_k,conv1_k,input_channels,n_conv1])),
    # weights will be like -> filter widht , height , channles , unit in layer
    'wcl2' : tf.Variable(tf.random_normal([conv2_k,conv2_k,n_conv1,n_conv2])),
    # here channel will be the unit of previous layer
    'wh1' : tf.Variable(tf.random_normal([input_size_to_hidden,n_hidden])),
    'wo' : tf.Variable(tf.random_normal([n_hidden,n_out]))
}
biases = {
    
    'bcl1' : tf.Variable(tf.random_normal([n_conv1])),
    'bcl2' : tf.Variable(tf.random_normal([n_conv2])),
    'bh1' : tf.Variable(tf.random_normal([n_hidden])),
    'bo' : tf.Variable(tf.random_normal([n_out]))
}

In [None]:
# here we will define conv function -> basically process inside cl layer

# here we will have 3 things to do -> you can skip other 2 if you want
# 1) apply convulation process
# 2) add biases
# 3) apply activation function

def conv(x, weights,bias, strides = 1):
    out = tf.nn.conv2d(x , weights ,  padding = "SAME" , strides = [1,strides,strides,1])
    # what is strides = [1,strides,strides,1] 
    #    => here first argument is -> how many image need to access -> pass 1 -> access one at time -> so that all can be covered
    #    => here last argument is -> how many channel need to access 
    #                 -> pass 1 -> give result as channel to be 1
    #  => here 2nd/3rd argument is -> how many pixel need to shift 
    #        -> after applying convulation on group of pixel -> width wise and height wise
    
    out = tf.nn.bias_add(out,bias) # adding biases
    out = tf.nn.relu(out) # applying activation function
    return out

# maxPooling function
def maxpooling(x , k = 2):
    # here k is window size
    return tf.nn.max_pool(x , padding = "SAME" ,ksize = [1,k,k,1] ,strides = [1,k,k,1])
# as we want to shift our pool winow exactly by 2 thats why k is passed here in stride and kshize denotes the window size of 
# pooling


In [None]:
# lets define function to build our model
def cnn(x, weights , biases):
    # lets reshape it into 2D 
    x = tf.reshape(x, shape=[-1,input_width,input_height,1])
    # as we channel to be 1 and -1 referes that -> algo will calculate itself
    
    #colvolutional layer 1
    conv1 = conv(x,weights['wcl1'],biases['bcl1'],stride1)
    #max pooling 1
    conv1_pool = maxpooling(conv1,max_pool1_k)
    
    # similarly for cl2
    
    conv2 = conv(conv1_pool,weights['wcl2'],biases['bcl2'],stride2)
    conv2_pool = maxpooling(conv2,max_pool2_k)
    
    # here we got our output -> need to apply to dense layer
    
    # again need to reshape it in 1D
    hidden_input = tf.reshape(conv2_pool, shape=[-1,input_size_to_hidden])
        
    hidden_output_before_activation = tf.add(tf.matmul(hidden_input,weights['wh1']),biases['bh1'])
    hidden_output = tf.nn.relu(hidden_output_before_activation)
    
    output = tf.add(tf.matmul(hidden_output,weights['wo']),biases['bo'])
    # at output layer -> applying default identity activation function

    return output

refer code from neural network code in case of any explanation required

In [None]:
x = tf.placeholder('float',[None,input_pixels]) 
y = tf.placeholder(tf.int32,[None,n_out]) 
pred = cnn( x , weights , biases )

In [None]:
cost =tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred,labels = y))

In [None]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
optimize = optimizer.minimize(cost)


In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [None]:
# Now we will run cost optimizer on batch gradient descent
batch_size = 100
for i in range(25):
    num_batches = int(mnist.train.num_examples/batch_size)
    # mnist.train.num_examples it will give size of train 
    
    total_cost=0
    for j in range(num_batches):
        batch_x , batch_y = mnist.train.next_batch(batch_size)
        # it will give us data in batches -> just provide batch size
        c , _ =sess.run([cost,optimize],feed_dict={x:batch_x , y:batch_y})
        total_cost += c
    print(c)
        

In [None]:
predictions = tf.argmax(pred,1)
true_labels = tf.argmax(y,1)
correct_predictions = tf.equal(predictions,true_labels)
predictions,correct_predictions = sess.run([predictions,correct_predictions],feed_dict={ x: mnist.test.images, y:mnist.test.labels})
correct_predictions.sum()
# getting around 85% accuracy -> far better than before -> it was 10% but good for random initialization
# getting around 96% accuracy -> when we have batch gradient descent  
# it allow us to reach faster 


In [None]:
# DROPOUT LAYER

"""
Basically in order to prevent overfitting of weights of neural network

we will perform regularization and to do so we will introduce dropout layer in between hidden layer

what it will do -> it will drop some percentage of parameters lets say 0.2% of weights are drop

then it will not be considered in forward and backward propagation as well

and we assume that our model is smart enough to perform this operation -> and it will increase the accuracy


with the help of regularization -> accuracy may inc or dec depend on dropout layer droppping technique.


as we use it only at training time therefore at testing we will pass -> keep_prob = 1
i.e keep all weights and biases

in training we will pass -> 0.8 i.e keep 0.8 % of parameters else ignore it.


it has syntax like this.
tf.nn.drop_out(hidden_output , keep_prob) 



"""