In [1]:
#In the first part, we learned how to use a simple ANN to classify MNIST. 
#Now we are going to expand our knowledge using a Deep Neural Network.

#Architecture of our network is:

#    (Input) -> [batch_size, 28, 28, 1] >> Apply 32 filter of [5x5]
#    (Convolutional layer 1) -> [batch_size, 28, 28, 32]
#    (ReLU 1) -> [?, 28, 28, 32]
#    (Max pooling 1) -> [?, 14, 14, 32]
#    (Convolutional layer 2) -> [?, 14, 14, 64]
#    (ReLU 2) -> [?, 14, 14, 64]
#    (Max pooling 2) -> [?, 7, 7, 64]
#    [fully connected layer 3] -> [1x1024]
#    [ReLU 3] -> [1x1024]
#    [Drop out] -> [1x1024]
#    [fully connected layer 4] -> [1x10]

In [2]:
import tensorflow as tf

# finish possible remaining session
#sess.close()

#Start interactive session
sess = tf.InteractiveSession()

In [3]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# just run this untill it works

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data\train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [4]:
# Initial parameters

width = 28 # width of the image in pixels 
height = 28 # height of the image in pixels
flat = width * height # number of pixels in one image 
class_output = 10 # number of possible classifications for the problem

In [5]:
# Input and output

x  = tf.placeholder(tf.float32, shape=[None, flat]) # input - dimension of input # pixels
y_ = tf.placeholder(tf.float32, shape=[None, class_output]) # estimated output - dim = # possible classifications 

In [6]:
#Converting images of the data set to tensors
#The input image is a 28 pixels by 28 pixels, 1 channel (grayscale). 
#In this case, the first dimension is the batch number of the image, and can be of any size (so we set it to -1).

x_image = tf.reshape(x, [-1,28,28,1])  # input
x_image

<tf.Tensor 'Reshape:0' shape=(?, 28, 28, 1) dtype=float32>

In [7]:
## Convolutional Layer 1

#Defining kernel weight and bias
#We define a kernle here. The Size of the filter/kernel is 5x5; Input channels is 1 (greyscale); 
#and we need 32 different feature maps (here, 32 feature maps means 32 different filters are applied on each image. 
#So, the output of convolution layer would be 28x28x32). 
#In this step, we create a filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels]

W_conv1 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1)) # width, height, grayscale, # outputs
b_conv1 = tf.Variable(tf.constant(0.1, shape=[32])) # need 32 biases for 32 outputs

In [8]:
#Convolve with weight tensor and add biases.

#To creat convolutional layer, we use tf.nn.conv2d. It computes a 2-D convolution given 4-D input and filter tensors.
#Inputs:
#   - tensor of shape [batch, in_height, in_width, in_channels]. x of shape [batch_size,28 ,28, 1]
#   - a filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels]. 
# W is of size [5, 5, 1, 32]
#   - stride which is [1, 1, 1, 1]. The convolutional layer, slides the "kernel window" across the input tensor. 
# As the input tensor has 4 dimensions: [batch, height, width, channels], 
# then the convolution operates on a 2D window on the height and width dimensions. 
# strides determines how much the window shifts by in each of the dimensions. 
# As the first and last dimensions are related to batch and channels, we set the stride to 1. 
# But for second and third dimension, we coould set other values, e.g. [1, 2, 2, 1]
#Process:
#   - Change the filter to a 2-D matrix with shape [5*5*1,32]
#   - Extracts image patches from the input tensor to form a virtual tensor of shape [batch, 28, 28, 5*5*1].
#   - For each batch, right-multiplies the filter matrix and the image vector.
#Output:
#   - A Tensor (a 2-D convolution) of size <tf.Tensor 'add_7:0' shape=(?, 28, 28, 32)- Notice: 
# the output of the first convolution layer is 32 [28x28] images. 
# Here 32 is considered as volume/depth of the output image.

convolve1= tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1

In [9]:
#Apply the ReLU activation Function
h_conv1 = tf.nn.relu(convolve1)

In [10]:
#Apply the max pooling

#max pooling is a form of non-linear down-sampling. 
#It partitions the input image into a set of rectangles and, and then find the maximum value for that region.

#Lets use tf.nn.max_pool function to perform max pooling. 
#Kernel size: 2x2 (if the window is a 2x2 matrix, it would result in one output pixel)
#Strides: dictates the sliding behaviour of the kernel. 
#In this case it will move 2 pixels everytime, thus not overlapping. 
#The input is a matix of size 14x14x32, and the output would be a matrix of size 14x14x32.

conv1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #max_pool_2x2
conv1

<tf.Tensor 'MaxPool:0' shape=(?, 14, 14, 32) dtype=float32>

In [11]:
## Convolutional Layer 2

#Weights and Biases of kernels
#We apply the convolution again in this layer. Lets look at the second layer kernel:
#    Filter/kernel: 5x5 (25 pixels)
#    Input channels: 32 (from the 1st Conv layer, we had 32 feature maps)
#    64 output feature maps
#Notice: here, the input image is [14x14x32], the filter is [5x5x32], we use 64 filters of size [5x5x32], 
#and the output of the convolutional layer would be 64 covolved image, [14x14x64].
#Notice: the convolution result of applying a filter of size [5x5x32] on image of size [14x14x32] 
#is an image of size [14x14x1], that is, the convolution is functioning on volume.

W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1))
b_conv2 = tf.Variable(tf.constant(0.1, shape=[64])) #need 64 biases for 64 outputs

In [12]:
#Convolve image with weight tensor and add biases.

convolve2= tf.nn.conv2d(conv1, W_conv2, strides=[1, 1, 1, 1], padding='SAME')+ b_conv2

In [13]:
#Apply the ReLU activation Function

h_conv2 = tf.nn.relu(convolve2)

In [14]:
#Apply the max pooling

conv2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') #max_pool_2x2
conv2

<tf.Tensor 'MaxPool_1:0' shape=(?, 7, 7, 64) dtype=float32>

In [15]:
#Second layer completed. So, what is the output of the second layer, layer2?
#    it is 64 matrices of [7x7]

In [16]:
## Fully Connected Layer
#each matrix [7x7] will be converted to a matrix of [49x1], and then all of the 64 matrix will be connected, 
#which make an array of size [3136x1]. We will connect it into another layer of size [1024x1]. 
#So, the weight between these 2 layers will be [3136x1024]

In [33]:
layer2_matrix = tf.reshape(conv2, [-1, 7*7*64]) # tensorflow reshapes to 1 dim vector
layer2_matrix

<tf.Tensor 'Reshape_2:0' shape=(?, 3136) dtype=float32>

In [31]:
# Weights and Biases between layer 2 and 3

#Composition of the feature map from the last layer (7x7) multiplied by the number of feature maps (64); 
#1027 outputs to Softmax layer
W_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1024], stddev=0.1))
b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024])) # need 1024 biases for 1024 outputs
print(W_fc1)
print(b_fc1)

<tf.Variable 'Variable_8:0' shape=(3136, 1024) dtype=float32_ref>
<tf.Variable 'Variable_9:0' shape=(1024,) dtype=float32_ref>


In [30]:
# Matrix Multiplication (applying weights and biases)
fcl = tf.matmul(layer2_matrix, W_fc1) + b_fc1
print(fcl)

# Apply the ReLU activation Function
h_fc1 = tf.nn.relu(fcl)
h_fc1

#third level complete

Tensor("add_5:0", shape=(?, 1024), dtype=float32)


<tf.Tensor 'Relu_4:0' shape=(?, 1024) dtype=float32>

In [32]:
# Dropout Layer, Optional phase for reducing overfitting

#It is a phase where the network "forget" some features. 
#At each training step in a mini-batch, some units get switched off randomly 
#so that it will not interact with the network. 
#That is, it weights cannot be updated, nor affect the learning of the other network nodes. 
#This can be very useful for very large neural networks to prevent overfitting
keep_prob = tf.placeholder(tf.float32)
layer_drop = tf.nn.dropout(h_fc1, keep_prob)
layer_drop

<tf.Tensor 'dropout_1/mul:0' shape=(?, 1024) dtype=float32>

In [34]:
# Readout Layer (Softmax Layer)

#Weights and Biases
#In last layer, CNN takes the high-level filtered images and translate them into votes using softmax. 
#Input channels: 1024 (neurons from the 3rd Layer); 10 output features
W_fc2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1)) #1024 neurons
b_fc2 = tf.Variable(tf.constant(0.1, shape=[10])) # 10 possibilities for digits [0,1,2,3,4,5,6,7,8,9]

In [35]:
#Matrix Multiplication (applying weights and biases)
fc = tf.matmul(layer_drop, W_fc2) + b_fc2

In [37]:
#Apply the Softmax activation Function
#softmax allows us to interpret the outputs of fcl4 as probabilities. So, y_conv is a tensor of probablities.
y_CNN = tf.nn.softmax(fc)
y_CNN

<tf.Tensor 'Softmax_1:0' shape=(?, 10) dtype=float32>

In [38]:
## Summary of the Deep Convolutional Neural Network

#0) Input - MNIST dataset
#1) Convolutional and Max-Pooling
#2) Convolutional and Max-Pooling
#3) Fully Connected Layer
#4) Processing - Dropout
#5) Readout layer - Fully Connected
#6) Outputs - Classified digits

In [41]:
# Define functions and train the model
#Define the loss function

#We need to compare our output, layer4 tensor, with ground truth for all mini_batch. 
#we can use cross entropy to see how bad our CNN is working - to measure the error at a softmax layer.

#The following code shows an toy sample of cross-entropy for a mini-batch of size 2 
#which its items have been classified. 
#You can run it (first change the cell type to code in the toolbar) to see hoe cross entropy changes.

#dunno what's going on in this section
import numpy as np
layer4_test = [[0.9, 0.1, 0.1],[0.9, 0.1, 0.1]]
y_test = [[1.0, 0.0, 0.0],[1.0, 0.0, 0.0]]
np.mean( -np.sum(y_test * np.log(layer4_test),1))

0.10536051565782628