# Chapter 13: Convolutional Neural Networks

## Exercise 7

Build your own CNN and try to achieve the highest possible accuracy on MNIST.

## Solution

For this exercise, I am going to implement a simplified version of LeNet-5.

In [0]:
!pip install tensorflow-gpu

In [0]:
# Downloading MNIST dataset.

import tensorflow as tf
import numpy as np

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28, 28, 1) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28, 28, 1) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [0]:
# Defining the TensorFlow graph.

graph = tf.Graph()

batch_size = 50
n_batches = len(X_train) // batch_size

with graph.as_default():
  with tf.device('/cpu:0'):
    training = tf.placeholder_with_default(False, shape=())
  with tf.device('/gpu:0'):
    X = tf.placeholder(tf.float32, shape=(None, 28, 28, 1), name='X')
    X_padded = tf.pad(X, paddings=[[0, 0], [2, 2], [2, 2], [0, 0]],
                      name='X_padded')
    y = tf.placeholder(tf.int32, shape=(None), name='y')

    C1 = tf.math.tanh(tf.layers.conv2d(X_padded, filters=6, kernel_size=5,
                                       strides=[1,1], name='C1'))
    S2 = tf.math.tanh(tf.nn.avg_pool(C1, ksize=[1,2,2,1], strides=[1,2,2,1],
                                     name='S2', padding='VALID'))
    C3 = tf.math.tanh(tf.layers.conv2d(S2, filters=16, kernel_size=5,
                                      strides=[1,1], name='C3'))
    S4 = tf.math.tanh(tf.nn.avg_pool(C3, ksize=[1,2,2,1], strides=[1,2,2,1],
                                     name='S4', padding='VALID'))
    C5 = tf.math.tanh(tf.layers.conv2d(S4, filters=120, kernel_size=5,
                                      strides=[1,1], name='C5'))
    F6 = tf.squeeze(
        tf.squeeze(
            tf.layers.dense(C5, 84, activation=tf.math.tanh, name='F6'),
            axis=1),
        axis=1)
    logits = tf.layers.dense(F6, 10, name='logits')

    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

  with tf.device('/cpu:0'):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    rnd_queue = tf.RandomShuffleQueue(capacity=len(X_train),
                                      min_after_dequeue=0,
                                      dtypes=[tf.float32, tf.int32],
                                      shapes=[(28, 28, 1), ()])
    enqueue_op = rnd_queue.enqueue_many([X, y])
    dequeue_op = rnd_queue.dequeue_up_to(batch_size)

    saver = tf.train.Saver()
    init = tf.global_variables_initializer()

In [67]:
# Training the model using early stopping.

n_epochs = 100
model_path = 'my_model.ckpt'

with graph.as_default():
  with tf.Session() as sess:
    sess.run(init)

    best_loss = None
    rounds_since_best_loss = 0

    for epoch in range(n_epochs):
      sess.run(enqueue_op, feed_dict={X: X_train, y: y_train, training: True})
      for _ in range(n_batches):
        X_batch, y_batch = sess.run(dequeue_op)
        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
      if epoch == 0:
        best_loss = loss.eval(feed_dict={X: X_valid, y: y_valid})
        saver.save(sess, model_path)
      elif epoch % 5 == 0:
        loss_val = loss.eval(feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
          best_loss = loss_val
          rounds_since_best_loss = 0
        else:
          rounds_since_best_loss += 1
          if rounds_since_best_loss == 4:
            print('Early stopping at epoch:', epoch+1)
            break
      else:
        saver.save(sess, model_path)

    saver.restore(sess, model_path)
    train_acc_val = accuracy.eval(feed_dict={X: X_train, y: y_train})
    test_acc_val = accuracy.eval(feed_dict={X: X_test, y: y_test})

    print('Model training complete.')
    print('Training epochs until convergence:', epoch+1)
    print('Training set accuracy:', train_acc_val)
    print('Test set accuracy:', test_acc_val)

Early stopping at epoch: 26
INFO:tensorflow:Restoring parameters from my_model.ckpt
Model training complete.
Training epochs until convergence: 26
Training set accuracy: 0.99841815
Test set accuracy: 0.9872
