# 5 Layer Convolutional Neural Network | _by Victor_

## Import dependencies

In [1]:
import tensorflow as tf
import numpy as np
from tqdm import tqdm

## Collecting the dataset

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('../datasets/MNIST', one_hot=True)

Extracting ../datasets/MNIST/train-images-idx3-ubyte.gz
Extracting ../datasets/MNIST/train-labels-idx1-ubyte.gz
Extracting ../datasets/MNIST/t10k-images-idx3-ubyte.gz
Extracting ../datasets/MNIST/t10k-labels-idx1-ubyte.gz


In [3]:
print('Training set    = {:,}'.format(len(data.train.labels)))
print('Testing set     = {:,}'.format(len(data.test.labels)))
print('Validation set  =  {:,}'.format(len(data.validation.labels)))

Training set    = 55,000
Testing set     = 10,000
Validation set  =  5,000


## Building the Computation Graph

### Hyperparameters

In [4]:
# Inputs
image_size = 28
image_channel = 1
image_shape = (image_size, image_size, image_channel)
image_size_flat = image_size * image_size * image_channel
num_classes = 10

# Network
filter_size = 5
hidden1_filter = 8
hidden2_filter = 16
hidden3_filter = 32
hidden4_filter = 68
hidden5_filter = 128
fc1_size = 512
fc2_size = 1024
dropout = 0.8


# Training
train_batch = 100
test_batch = 50
val_batch = 25
n_iters = 0
learning_rate = 1e-3

### Model Placeholders

In [5]:
# Input gateway
X = tf.placeholder(tf.float32, [None, image_size_flat])
y = tf.placeholder(tf.float32, [None, num_classes])

# For convnets
X_image = tf.reshape(X, [-1, image_size, image_size, image_channel])
y_true = tf.argmax(y, axis=1)

# Dropout
keep_prob = tf.placeholder(tf.float32)

### Helper Methods

#### `weight` and `bias`

In [6]:
def weight(shape):
    initial = tf.truncated_normal(shape=shape, mean=0, stddev=0.5)
    return tf.Variable(initial, name='weight')

def bias(shape):
    initial = tf.zeros(shape=[shape])
    return tf.Variable(initial, name='bias')

#### `convolution` and `max pooling`

In [7]:
def conv2d(X, W):
    return tf.nn.conv2d(X, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool(X):
    return tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

#### `flatten` layer

In [8]:
def flatten(layer):
    layer_shape = layer.get_shape()
    num_features = np.array(layer_shape[1:4], dtype=int).prod()
    layer_flat = tf.reshape(layer, [-1, num_features])
    return layer_flat, num_features

#### Convolutional Block

In [9]:
def conv_layer(prev_layer, prev_filter, layer_filter, layer_name, use_pool=True):
    with tf.name_scope(layer_name):
        W = weight(shape=[filter_size, filter_size, prev_filter, layer_filter])
        b = bias(shape=layer_filter)
        layer = tf.nn.relu(conv2d(prev_layer, W) + b)
        if use_pool:
            layer = max_pool(layer)
        return layer

def fc_layer(prev_layer, prev_size, layer_size, layer_name, use_relu=True, dropout=True):
    with tf.name_scope(layer_name):
        W = weight(shape=[prev_size, layer_size])
        b = bias(shape=layer_size)
        layer = tf.matmul(prev_layer, W) + b
        if use_relu:
            layer = tf.nn.relu(layer)
        if dropout:
            layer = tf.nn.dropout(layer, keep_prob)
        return layer

def output_layer(fc_layer, fc_size, num_classes):
    with tf.name_scope('output_layer'):
        W = weight(shape=[fc_size, num_classes])
        b = bias(shape=num_classes)
        logits = tf.matmul(fc_layer, W) + b
        y_pred = tf.nn.softmax(logits)
        y_pred_true = tf.argmax(y_pred, axis=1)
        return logits, y_pred_true

### Building the Network!

#### Convolutional Layers/Blocks

In [10]:
# Input layer => Hidden layer 1
hidden1 = conv_layer(X_image, image_channel, hidden1_filter, 'hidden_layer_1', use_pool=False)
# Hidden layer 1 => Hidden layer 2
hidden2 = conv_layer(hidden1, hidden1_filter, hidden2_filter, 'hidden_layer_2', use_pool=False)
# Hidden layer 2 => Hidden layer 3
hidden3 = conv_layer(hidden2, hidden2_filter, hidden3_filter, 'hidden_layer_3', use_pool=True)
# Hidden layer 3 => Hidden layer 4
hidden4 = conv_layer(hidden3, hidden3_filter, hidden4_filter, 'hidden_layer_4', use_pool=True)
# Hidden layer 4 => Hidden layer 5
hidden5 = conv_layer(hidden4, hidden4_filter, hidden5_filter, 'hidden_layer_5', use_pool=False)
print(hidden1)
print(hidden2)
print(hidden3)
print(hidden4)
print(hidden5)

Tensor("hidden_layer_1/Relu:0", shape=(?, 28, 28, 8), dtype=float32)
Tensor("hidden_layer_2/Relu:0", shape=(?, 28, 28, 16), dtype=float32)
Tensor("hidden_layer_3/MaxPool:0", shape=(?, 14, 14, 32), dtype=float32)
Tensor("hidden_layer_4/MaxPool:0", shape=(?, 7, 7, 68), dtype=float32)
Tensor("hidden_layer_5/Relu:0", shape=(?, 7, 7, 128), dtype=float32)


In [11]:
hidden5_flat, hidden5_flat_filter = flatten(hidden5)
print(hidden5_flat)
print('hidden5_flat_filter =', hidden5_flat_filter)

Tensor("Reshape_1:0", shape=(?, 6272), dtype=float32)
hidden5_flat_filter = 6272


#### Fully Connected Layers

In [12]:
fc1_layer = fc_layer(hidden5_flat, hidden5_flat_filter, fc1_size, 'fc_layer_1', use_relu=True, dropout=True)
fc2_layer = fc_layer(fc1_layer, fc1_size, fc2_size, 'fc_layer_2', use_relu=True, dropout=True)
print(fc1_layer)
print(fc2_layer)

Tensor("fc_layer_1/dropout/mul:0", shape=(?, 512), dtype=float32)
Tensor("fc_layer_2/dropout/mul:0", shape=(?, 1024), dtype=float32)


#### Output Layer

In [13]:
logits, y_pred_true = output_layer(fc2_layer, fc2_size, num_classes)

In [14]:
logits

<tf.Tensor 'output_layer/add:0' shape=(?, 10) dtype=float32>

In [15]:
y_pred_true

<tf.Tensor 'output_layer/ArgMax:0' shape=(?,) dtype=int64>

### cross entropy, cost, optimizer

In [16]:
xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y, name='xentropy')
cost = tf.reduce_mean(xentropy, name='cost')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_step = optimizer.minimize(cost)

### Model Accuracy Evaluation

In [17]:
correct = tf.equal(y_true, y_pred_true)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [18]:
accuracy

<tf.Tensor 'Mean:0' shape=() dtype=float32>

## Running the Computational Graph

In [19]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

### More Helper functions

#### Perform Optimization

In [20]:
def train(iterations=1000):
    global n_iters
    for _ in tqdm(range(iterations)):
        X_batch, y_batch = data.train.next_batch(train_batch)
        feed_dict = {X: X_batch, y: y_batch, keep_prob: dropout}
        sess.run(train_step, feed_dict=feed_dict)
        n_iters += 1
    print(80*'=')
    print('\tCompleted {:,} iterations.'.format(n_iters))
    print(80*'=')

#### Evaluate accuracy

In [21]:
def score(test=False, validation=True, use_batch=True):
    print(80*'=')
    print('Accuracy after {:,} iterations'.format(n_iters))
    feed_dict = {}
    if test:
        if use_batch:
            X_batch, y_batch = data.test.next_batch(test_batch)
            feed_dict = {X: X_batch, y: y_batch, keep_prob:dropout}
        else:
            feed_dict = {X: data.test.images, y: data.test.labels, keep_prob:dropout}
        acc = sess.run(accuracy, feed_dict=feed_dict)
        print('Accuracy on test set: {:.02%}'.format(acc))
    if validation:
        if use_batch:
            X_batch, y_batch = data.validation.next_batch(val_batch)
            feed_dict = {X: X_batch, y: y_batch, keep_prob:dropout}
        else:
            feed_dict = {X: data.validation.images, y: data.validation.labels, keep_prob:dropout}
        acc = sess.run(accuracy, feed_dict=feed_dict)
        print('Accuracy on validation set: {:.02%}'.format(acc))
    print(80*'=')

### Training the Network

In [22]:
train(iterations=10)
score(test=False, validation=True, use_batch=False)

100%|██████████| 10/10 [00:08<00:00,  1.14it/s]


	Completed 10 iterations.
Accuracy after 10 iterations
Accuracy on validation set: 37.44%


In [23]:
train(iterations=90)
score(test=False, validation=True, use_batch=False)

100%|██████████| 90/90 [01:22<00:00,  1.19it/s]


	Completed 100 iterations.
Accuracy after 100 iterations
Accuracy on validation set: 81.36%


In [24]:
train(iterations=900)
score(test=False, validation=True, use_batch=True)

100%|██████████| 900/900 [14:13<00:00,  1.05s/it]

	Completed 1,000 iterations.
Accuracy after 1,000 iterations
Accuracy on validation set: 92.00%





In [25]:
train(iterations=1000)
score(test=True, validation=True, use_batch=True)

100%|██████████| 9000/9000 [2:31:21<00:00,  1.15it/s]    


	Completed 10,000 iterations.
Accuracy after 10,000 iterations
Accuracy on test set: 12.00%
Accuracy on validation set: 16.00%


### Closing the tensorflow's `Session`

In [26]:
# sess.close()