# Chapter 13: Convolutional Neural Networks

## Exercise 7

Build your own CNN and try to achieve the highest possible accuracy on MNIST.

## Solution

For this exercise, I am going to implement an augmented version of LeNet-5 where the convolutional layers with a 5$\times$5 kernel have been replaced by two layers with 3$\times$3 kernels. This insight came from the model in [this article on Kaggle](https://www.kaggle.com/cdeotte/25-million-images-0-99757-mnist). The model achieves over 99% accuracy on the test set, beating the book's solution just barely.

In [0]:
!pip install tensorflow-gpu

In [3]:
# Downloading MNIST dataset.

import tensorflow as tf
import numpy as np

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28, 28, 1) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28, 28, 1) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [0]:
# Defining the TensorFlow graph.

graph = tf.Graph()

batch_size = 50
n_batches = len(X_train) // batch_size

with graph.as_default():
  with tf.device('/cpu:0'):
    training = tf.placeholder_with_default(False, shape=())
  with tf.device('/gpu:0'):
    X = tf.placeholder(tf.float32, shape=(None, 28, 28, 1), name='X')
    X_padded = tf.pad(X, paddings=[[0, 0], [2, 2], [2, 2], [0, 0]],
                      name='X_padded')
    y = tf.placeholder(tf.int32, shape=(None), name='y')

    C1 = tf.nn.relu(tf.layers.conv2d(X_padded, filters=6, kernel_size=3,
                                     strides=[1,1], name='C1'))
    C2 = tf.nn.relu(tf.layers.conv2d(C1, filters=6, kernel_size=3,
                                     strides=[1,1], name='C2'))
    S3 = tf.nn.relu(tf.nn.avg_pool(C2, ksize=[1,2,2,1], strides=[1,2,2,1],
                                   name='S3', padding='VALID'))
    C4 = tf.nn.relu(tf.layers.conv2d(S3, filters=16, kernel_size=3,
                                     strides=[1,1], name='C4'))
    C5 = tf.nn.relu(tf.layers.conv2d(C4, filters=16, kernel_size=3,
                                     strides=[1,1], name='C6'))
    S6 = tf.nn.relu(tf.nn.avg_pool(C5, ksize=[1,2,2,1], strides=[1,2,2,1],
                                   name='S4', padding='VALID'))
    C7 = tf.nn.relu(tf.layers.conv2d(S6, filters=120, kernel_size=3,
                                     strides=[1,1], name='C7'))
    C8 = tf.nn.relu(tf.layers.conv2d(C7, filters=120, kernel_size=3,
                                     strides=[1,1], name='C8'))
    F9 = tf.layers.dense(C8, 84, activation=tf.math.tanh, name='F6')
    flatten = tf.squeeze(tf.squeeze(F9, axis=1), axis=1)
    D = tf.nn.dropout(flatten, rate=0.4)
    logits = tf.layers.dense(D, 10, name='logits')

    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

  with tf.device('/cpu:0'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    rnd_queue = tf.RandomShuffleQueue(capacity=len(X_train),
                                      min_after_dequeue=0,
                                      dtypes=[tf.float32, tf.int32],
                                      shapes=[(28, 28, 1), ()])
    enqueue_op = rnd_queue.enqueue_many([X, y])
    dequeue_op = rnd_queue.dequeue_up_to(batch_size)

    saver = tf.train.Saver()
    init = tf.global_variables_initializer()

In [19]:
# Training the model using early stopping.

n_epochs = 100
model_path = 'my_model.ckpt'
max_rounds_without_improvement = 20

with graph.as_default():
  with tf.Session() as sess:
    sess.run(init)

    best_loss = float('inf')
    rounds_since_best_loss = 0

    for epoch in range(n_epochs):
      sess.run(enqueue_op, feed_dict={X: X_train, y: y_train})
      for _ in range(n_batches):
        X_batch, y_batch = sess.run(dequeue_op)
        sess.run(training_op, feed_dict={X: X_batch, y: y_batch,
                                         training: True})
      loss_val = loss.eval(feed_dict={X: X_valid, y: y_valid})
      if loss_val < best_loss:
        best_loss = loss_val
        rounds_since_best_loss = 0
        saver.save(sess, model_path)
      else:
        rounds_since_best_loss += 1
      acc_val = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
      print('Epoch:', epoch+1, 'Loss:', loss_val, 'Accuracy:', acc_val)
      if rounds_since_best_loss >= max_rounds_without_improvement:
        print('Early stopping at epoch:', epoch+1)
        break
    else:
      saver.save(sess, model_path)

    saver.restore(sess, model_path)
    train_acc_val = accuracy.eval(feed_dict={X: X_train, y: y_train})
    test_acc_val = accuracy.eval(feed_dict={X: X_test, y: y_test})

    print('Model training complete.')
    print('Training epochs until convergence:', epoch+1)
    print('Training set accuracy:', train_acc_val)
    print('Test set accuracy:', test_acc_val)

Epoch: 1 Loss: 0.07682998 Accuracy: 1.0
Epoch: 2 Loss: 0.052485272 Accuracy: 0.98
Epoch: 3 Loss: 0.058004133 Accuracy: 0.96
Epoch: 4 Loss: 0.05148828 Accuracy: 1.0
Epoch: 5 Loss: 0.048665736 Accuracy: 1.0
Epoch: 6 Loss: 0.038181033 Accuracy: 1.0
Epoch: 7 Loss: 0.04755825 Accuracy: 1.0
Epoch: 8 Loss: 0.03901503 Accuracy: 1.0
Epoch: 9 Loss: 0.042196285 Accuracy: 0.98
Epoch: 10 Loss: 0.044133294 Accuracy: 1.0
Epoch: 11 Loss: 0.04028176 Accuracy: 1.0
Epoch: 12 Loss: 0.04125908 Accuracy: 1.0
Epoch: 13 Loss: 0.04644689 Accuracy: 1.0
Epoch: 14 Loss: 0.03866143 Accuracy: 1.0
Epoch: 15 Loss: 0.03517187 Accuracy: 1.0
Epoch: 16 Loss: 0.052956782 Accuracy: 1.0
Epoch: 17 Loss: 0.039823458 Accuracy: 1.0
Epoch: 18 Loss: 0.038623672 Accuracy: 1.0
Epoch: 19 Loss: 0.03389454 Accuracy: 1.0
Epoch: 20 Loss: 0.055777185 Accuracy: 1.0
Epoch: 21 Loss: 0.035255823 Accuracy: 1.0
Epoch: 22 Loss: 0.038339764 Accuracy: 1.0
Epoch: 23 Loss: 0.037628587 Accuracy: 1.0
Epoch: 24 Loss: 0.03967332 Accuracy: 1.0
Epoch: 25