# 2-Layer Convolutional Neural Network

## Import dependencies

In [None]:
import os
import sys
import pickle
from datetime import datetime as dt

import tensorflow as tf
import numpy as np

## Load in the dataset

In [None]:
from tensorflow.examples.tutorials.mnist import input_data


save_dir = '../saved/convnet/2-layers'
data_dir = '../datasets/MNIST'
saved_data = os.path.join(save_dir, f'data/{os.path.basename(data_dir)}.pkl')

if not os.path.isfile(saved_data):
    start = dt.now()
    data = input_data.read_data_sets(data_dir, one_hot=True)
    print(f'Took {dt.now() - start}')
    if not os.path.exists(os.path.dirname(saved_data)):
        os.makedirs(os.path.dirname(saved_data))
    pickle.dump(file=open(saved_data, 'wb'), obj=data)
    
    print('\nCached data for future use.')
else:
    start = dt.now()
    data = pickle.load(file=open(saved_data, 'rb'))
    print('Loaded cached data.')
    print(f'Took {dt.now() - start}')

# free memory
del start

In [None]:
print('Training set    = {:,}'.format(len(data.train.labels)))
print('Testing set     = {:,}'.format(len(data.test.labels)))
print('Validation set  =  {:,}'.format(len(data.validation.labels)))

## Hyperparameters

In [None]:
# Data dimension
image_size = 28
image_channel = 1
image_shape = (image_size, image_size, image_channel)
image_shape_flat = image_size * image_size
num_classes = 10

# Network
filter_size = 5
filter_1 = 32
filter_2 = 64
fc_size = 256
dropout = 0.8

# Training
train_batch = 100
test_batch = 50
val_batch = 25
learning_rate = 1e-1
n_iters = 0  # Total number of completed optimization iterations
save_interval = 100

## Helper functions

#### `weights` and `biases`

In [None]:
def weight(shape):
    initial = tf.truncated_normal(shape=shape, stddev=0.5, mean=0)
    return tf.Variable(initial, name='weight')

def bias(shape):
    initial = tf.zeros(shape=[shape])
    return tf.Variable(initial, name='bias')

#### `convolution` and `pooling`

In [None]:
def conv2d(X, W):
    return tf.nn.conv2d(X, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool(X):
    return tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

#### `flatten` layer

In [None]:
def flatten(layer):
    layer_shape = layer.get_shape()
    num_features = np.array(layer_shape[1:4], dtype=int).prod()
    layer_flat = tf.reshape(layer, [-1, num_features])
    return layer_flat, num_features

## Building the Computational Graph

### Placeholder variables

In [None]:
X = tf.placeholder(tf.float32, [None, image_shape_flat])
y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)

X_image = tf.reshape(X, [-1, image_size, image_size, image_channel])
y_true = tf.argmax(y, axis=1)
X_image

### Building the `convnet`

#### Input Layer »» Hidden Layer 1

In [None]:
W_hidden1 = weight(shape=[filter_size, filter_size, image_channel, filter_1])
b_hidden1 = bias(shape=filter_1)
hidden1 = tf.nn.relu(conv2d(X_image, W_hidden1) + b_hidden1)
hidden1 = max_pool(hidden1)
hidden1

#### Hidden Layer 1 »» Hidden Layer 2

In [None]:
W_hidden2 = weight(shape=[filter_size, filter_size, filter_1, filter_2])
b_hidden2 = bias(shape=filter_2)
hidden2 = tf.nn.relu(conv2d(hidden1, W_hidden2) + b_hidden2)
hidden2 = max_pool(hidden2)
hidden2

#### Flatten Hidden Layer 2 output

In [None]:
hidden2_flat, num_features = flatten(hidden2)
hidden2_flat, num_features

#### HIdden Layer 2 »» Fully connected layer

In [None]:
W_fc = weight(shape=[num_features, fc_size])
b_fc = bias(shape=fc_size)
fc_layer = tf.nn.relu(tf.matmul(hidden2_flat, W_fc) + b_fc)
fc_drop = tf.nn.dropout(fc_layer, keep_prob=keep_prob)
fc_drop

#### Fully connected Layer »» Output layer

In [None]:
W_out = weight(shape=[fc_size, num_classes])
b_out = bias(shape=num_classes)
logits = tf.matmul(fc_layer, W_out) + b_out
y_pred = tf.nn.softmax(logits)
y_pred_true = tf.argmax(y_pred, axis=1)
logits

### Cost function

In [None]:
xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
cost = tf.reduce_mean(xentropy)

### Optimizer

In [None]:
global_step = tf.Variable(0, trainable=False, name='global_step')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_step = optimizer.minimize(cost, global_step=global_step)

### Evaluate Accuracy

In [None]:
correct = tf.equal(y_true, y_pred_true)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
accuracy

## Running the Computional Graph

### tensorflow's `Session`

In [None]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

### Tensorboard

In [None]:
# tensorboard logging
tensorboard_dir = os.path.join(save_dir, 'tensorboard')
logdir = os.path.join(tensorboard_dir, 'log')
# Pre-trained model
model_dir = os.path.join(save_dir, 'models')
model_file = os.path.join(model_dir, 'model.ckpt')

# Summary
tf.summary.scalar('cost', cost)
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()

# writer and saver
saver = tf.train.Saver()
writer = tf.summary.FileWriter(logdir=logdir, graph=sess.graph)

### Restore last checkpoint

In [None]:
if tf.gfile.Exists(model_dir):
    try:
        print('INFO: Attempting to restore last checkpoint.')
        last_ckpt = tf.latest_checkpoint(model_dir)
        saver.restore(sess=sess, save_path=last_ckpt)
        print(f'SUCCESS: Checkpoint restored @ {last_ckpt}')
    except Exception as e:
        sys.stderr.write(f'ERR: Could not load checkpoint. {e}')
        sys.stderr.flush()
else:
    tf.gfile.MakeDirs(model_dir)
    print(f'INFO: Checkpoint folder created - {model_dir}')

### Helper functions

#### Perform Optimzation

In [None]:
def train(iterations=1000):
    global n_iters
    start = dt.now()
    for _ in range(iterations):
        n_iters += 1
        X_batch, y_batch = data.train.next_batch(train_batch)
        feed_dict = {X: X_batch, y: y_batch, keep_prob: dropout}
        _, i_global = sess.run([train_step, global_step], feed_dict=feed_dict)
        # Save checkpoint and summarize tensorboard
        if n_iters % save_interval == 0:
            summary = sess.run(merged, feed_dict=feed_dict)
            writer.add_summary(summary, global_step=i_global)
            saver.save(sess=sess, save_path=model_file, global_step=global_step)
        # Log progress
        sys.stdout.write(f'\rIter: {n_iters:,}\tGlobal step: {i_global:,}'
                         f'\tTime taken: {dt.now() - start}')
        sys.stdout.flush()
    print(f"\n{80*'='}")
    print('\tCompleted {n_iters:,} iterations.')
    print(80*'=')

#### Evaluate Accuracy

In [None]:
def score(test=True, validation=False, use_batch=True):
    print(80*'=')
    print('Accuracy after {:,} iterations'.format(n_iters))
    feed_dict = {}
    if test:
        if use_batch:
            X_batch, y_batch = data.test.next_batch(test_batch)
            feed_dict = {X: X_batch, y: y_batch, keep_prob:dropout}
        else:
            feed_dict = {X: data.test.images, y: data.test.labels, keep_prob:dropout}
        acc = sess.run(accuracy, feed_dict=feed_dict)
        print('Accuracy on test set: {:.02%}'.format(acc))
    if validation:
        if use_batch:
            X_batch, y_batch = data.validation.next_batch(val_batch)
            feed_dict = {X: X_batch, y: y_batch, keep_prob:dropout}
        else:
            feed_dict = {X: data.validation.images, y: data.validation.labels, keep_prob:dropout}
        acc = sess.run(accuracy, feed_dict=feed_dict)
        print('Accuracy on validation set: {:.02%}'.format(acc))
    print(80*'=')

## Training the Network!

In [None]:
train(iterations=10)
score(test=True, use_batch=False)

In [None]:
train(iterations=90)
score(test=True, use_batch=False)

In [None]:
train(iterations=900)
score(test=True, validation=True, use_batch=True)

In [None]:
train(iterations=9000)
score(test=True, validation=True, use_batch=True)

In [None]:
import shutil

# Clear saved mnist `data`
shutil.rmtree(os.path.dirname(saved_data))

In [None]:
sess.close()