# 2-Layer Convolutional Neural Network

## Import dependencies

In [1]:
import os
import sys
import pickle
from datetime import datetime as dt

import tensorflow as tf
import numpy as np

## Load in the dataset

In [2]:
from tensorflow.examples.tutorials.mnist import input_data


save_dir = '../saved/convnet/2-layers'
data_dir = '../datasets/MNIST'
saved_data = os.path.join(save_dir, f'data/{os.path.basename(data_dir)}.pkl')

if not os.path.isfile(saved_data):
    start = dt.now()
    data = input_data.read_data_sets(data_dir, one_hot=True)
    print(f'Took {dt.now() - start}')
    if not os.path.exists(os.path.dirname(saved_data)):
        os.makedirs(os.path.dirname(saved_data))
    pickle.dump(file=open(saved_data, 'wb'), obj=data)
    
    print('\nCached data for future use.')
else:
    start = dt.now()
    data = pickle.load(file=open(saved_data, 'rb'))
    print('Loaded cached data.')
    print(f'Took {dt.now() - start}')

# free memory
del start

Extracting ../datasets/MNIST/train-images-idx3-ubyte.gz
Extracting ../datasets/MNIST/train-labels-idx1-ubyte.gz
Extracting ../datasets/MNIST/t10k-images-idx3-ubyte.gz
Extracting ../datasets/MNIST/t10k-labels-idx1-ubyte.gz
Took 0:00:00.654338

Cached data for future use.


In [3]:
print('Training set    = {:,}'.format(len(data.train.labels)))
print('Testing set     = {:,}'.format(len(data.test.labels)))
print('Validation set  =  {:,}'.format(len(data.validation.labels)))

Training set    = 55,000
Testing set     = 10,000
Validation set  =  5,000


## Hyperparameters

In [4]:
# Data dimension
image_size = 28
image_channel = 1
image_shape = (image_size, image_size, image_channel)
image_shape_flat = image_size * image_size
num_classes = 10

# Network
filter_size = 5
filter_1 = 32
filter_2 = 64
fc_size = 256
dropout = 0.8

# Training
train_batch = 100
test_batch = 50
val_batch = 25
learning_rate = 1e-2
n_iters = 0  # Total number of completed optimization iterations
save_interval = 100

## Helper functions

#### `weights` and `biases`

In [5]:
def weight(shape):
    initial = tf.truncated_normal(shape=shape, stddev=0.5, mean=0)
    return tf.Variable(initial, name='weight')


def bias(shape):
    initial = tf.zeros(shape=[shape])
    return tf.Variable(initial, name='bias')


#### `convolution` and `pooling`

In [6]:
def conv2d(X, W):
    return tf.nn.conv2d(X, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool(X):
    return tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


### `conv`, `fully connected` & `flatten` layer

In [27]:
def conv_layer(incoming, filter_size, out_units, activation=tf.nn.relu):
    in_units = incoming.get_shape()[-1].value
    # Weights and biases
    W = weight(shape=[filter_size, filter_size, in_units, out_units])
    b = bias(shape=out_units)
    # convolution and add bias
    layer = conv2d(incoming, W) + b
    # max pooling
    layer = max_pool(layer)
    # apply activation function
    if activation:
        layer = activation(layer)
    return layer


def fully_connected(incoming, units, activation=tf.nn.relu, dropout=None):
    in_units = incoming.get_shape()[-1].value
    # parameters
    W = weight(shape=[in_units, units])
    b = bias(shape=units)
    # matrix multiplicaiton
    layer = tf.matmul(incoming, W) + b
    # add dropout
    if dropout:
        layer = tf.nn.dropout(layer, keep_prob=droput)
    # apply activation
    if activation:
        layer = activation(layer)
    return layer


def flatten(layer):
    layer_shape = layer.get_shape()
    num_features = np.array(layer_shape[1:4], dtype=int).prod()
    layer_flat = tf.reshape(layer, [-1, num_features])
    return layer_flat, num_features

## Building the Computational Graph

### Placeholder variables

In [8]:
X = tf.placeholder(tf.float32, [None, image_shape_flat])
y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)

X_image = tf.reshape(X, [-1, image_size, image_size, image_channel])
y_true = tf.argmax(y, axis=1)
X_image

<tf.Tensor 'Reshape:0' shape=(?, 28, 28, 1) dtype=float32>

### Building the `convnet`

#### Input Layer »» Hidden Layer 1

In [30]:
hidden1 = conv_layer(X_image, filter_size, filter_1)
print(f'{hidden1}')

Tensor("Relu_2:0", shape=(?, 14, 14, 32), dtype=float32)


#### Hidden Layer 1 »» Hidden Layer 2

In [31]:
hidden2 = conv_layer(hidden1, filter_size, filter_2)
print(f'{hidden2}')

Tensor("Relu_3:0", shape=(?, 7, 7, 64), dtype=float32)


#### Flatten Hidden Layer 2 output

In [34]:
hidden2_flat, num_features = flatten(hidden2)
print(f'{hidden2_flat}\tFeatures: {num_features:,}')

Tensor("Reshape_3:0", shape=(?, 3136), dtype=float32)	Features: 3,136


#### HIdden Layer 2 »» Fully connected layer

In [35]:
# W_fc = weight(shape=[num_features, fc_size])
# b_fc = bias(shape=fc_size)
# fc_layer = tf.nn.relu(tf.matmul(hidden2_flat, W_fc) + b_fc)
# fc_drop = tf.nn.dropout(fc_layer, keep_prob=keep_prob)
# fc_drop
fc_drop = fully_connected(hidden2_flat, fc_size, dropout=keep_prob)

TypeError: Failed to convert object of type <class 'list'> to Tensor. Contents: [Dimension(3136), 256]. Consider casting elements to a supported type.

#### Fully connected Layer »» Output layer

In [13]:
W_out = weight(shape=[fc_size, num_classes])
b_out = bias(shape=num_classes)
logits = tf.matmul(fc_layer, W_out) + b_out
y_pred = tf.nn.softmax(logits)
y_pred_true = tf.argmax(y_pred, axis=1)
logits

<tf.Tensor 'add_3:0' shape=(?, 10) dtype=float32>

### Cost function

In [14]:
xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
cost = tf.reduce_mean(xentropy)

### Optimizer

In [15]:
global_step = tf.Variable(0, trainable=False, name='global_step')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_step = optimizer.minimize(cost, global_step=global_step)

### Evaluate Accuracy

In [16]:
correct = tf.equal(y_true, y_pred_true)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
accuracy

<tf.Tensor 'Mean_1:0' shape=() dtype=float32>

## Running the Computional Graph

### tensorflow's `Session`

In [17]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

### Tensorboard

In [18]:
# tensorboard logging
tensorboard_dir = os.path.join(save_dir, 'tensorboard')
logdir = os.path.join(tensorboard_dir, 'log')
# Pre-trained model
model_dir = os.path.join(save_dir, 'models')
model_file = os.path.join(model_dir, 'model.ckpt')

# Summary
tf.summary.scalar('cost', cost)
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()

# writer and saver
saver = tf.train.Saver()
writer = tf.summary.FileWriter(logdir=logdir, graph=sess.graph)

### Restore last checkpoint

In [19]:
if tf.gfile.Exists(model_dir):
    # noinspection PyBroadException
    try:
        print('INFO: Attempting to restore last checkpoint.')
        last_ckpt = tf.train.latest_checkpoint(model_dir)
        saver.restore(sess=sess, save_path=last_ckpt)
        print(f'SUCCESS: Checkpoint restored @ {last_ckpt}')
    except Exception as e:
        sys.stderr.write(f'ERR: Could not load checkpoint. {e}')
        sys.stderr.flush()
else:
    tf.gfile.MakeDirs(model_dir)
    print(f'INFO: Checkpoint folder created - {model_dir}')

INFO: Attempting to restore last checkpoint.
INFO:tensorflow:Restoring parameters from ../saved/convnet/2-layers/models/model.ckpt-10000
SUCCESS: Checkpoint restored @ ../saved/convnet/2-layers/models/model.ckpt-10000


### Helper functions

#### Perform Optimzation

In [20]:
def train(iterations=1000):
    global n_iters
    start = dt.now()
    for _ in range(iterations):
        n_iters += 1
        X_batch, y_batch = data.train.next_batch(train_batch)
        feed_dict = {X: X_batch, y: y_batch, keep_prob: dropout}
        _, i_global = sess.run([train_step, global_step], feed_dict=feed_dict)
        # Save checkpoint and summarize tensorboard
        if n_iters % save_interval == 0:
            summary = sess.run(merged, feed_dict=feed_dict)
            writer.add_summary(summary, global_step=i_global)
            saver.save(sess=sess, save_path=model_file, global_step=global_step)
        # Log progress
        sys.stdout.write(f'\rIter: {n_iters:,}\tGlobal step: {i_global:,}'
                         f'\tTime taken: {dt.now() - start}')
        sys.stdout.flush()
    print(f"\n{80*'='}")
    print(f'\tCompleted {n_iters:,} iterations.')
    print(80*'=')

#### Evaluate Accuracy

In [21]:
def score(test=True, validation=False, use_batch=True):
    print(80 * '=')
    print('Accuracy after {:,} iterations'.format(n_iters))
    if test:
        if use_batch:
            X_batch, y_batch = data.test.next_batch(test_batch)
            feed_dict = {X: X_batch, y: y_batch, keep_prob: dropout}
        else:
            feed_dict = {X: data.test.images, y: data.test.labels, keep_prob: dropout}
        acc = sess.run(accuracy, feed_dict=feed_dict)
        print('Accuracy on test set: {:.02%}'.format(acc))
    if validation:
        if use_batch:
            X_batch, y_batch = data.validation.next_batch(val_batch)
            feed_dict = {X: X_batch, y: y_batch, keep_prob: dropout}
        else:
            feed_dict = {X: data.validation.images, y: data.validation.labels, keep_prob: dropout}
        acc = sess.run(accuracy, feed_dict=feed_dict)
        print('Accuracy on validation set: {:.02%}'.format(acc))
    print(80 * '=')


## Training the Network!

In [22]:
train(iterations=10)
score(test=True, use_batch=False)

Iter: 10	Global step: 10,010	Time taken: 0:00:03.019108
	Completed 10 iterations.
Accuracy after 10 iterations
Accuracy on test set: 83.06%


In [23]:
train(iterations=90)
score(test=True, use_batch=False)

Iter: 100	Global step: 10,100	Time taken: 0:00:25.518886
	Completed 100 iterations.
Accuracy after 100 iterations
Accuracy on test set: 87.22%


In [24]:
train(iterations=900)
score(test=True, validation=True, use_batch=True)

Iter: 1,000	Global step: 11,000	Time taken: 0:04:12.165139
	Completed 1,000 iterations.
Accuracy after 1,000 iterations
Accuracy on test set: 78.00%
Accuracy on validation set: 76.00%


In [None]:
train(iterations=9000)
score(test=True, validation=True, use_batch=True)

In [None]:
import shutil

# Clear saved mnist `data`
shutil.rmtree(os.path.dirname(saved_data))

In [None]:
sess.close()