In [None]:
import os, sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import KerasTools as KT
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

import numpy as np
from keras.datasets import mnist
from keras import models
from keras import layers
from keras import optimizers
from keras.utils import to_categorical

In [None]:
# Load MNIST data and preprocess it
#
# Note: We will now use a 2D convolutional network, which expects input in format (height, width, channels)
# The MNIST data has only one channel per image (grayscale) (RGB images have 3 channels)
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [None]:
# Build the neural network
#
# A convnet consists of a convolutional base to generate image features,
# followed by a normal dense classifier to make the final classification.

def build_network():
    network = models.Sequential()
    # Convolutional base
    # We use the usual kernel size of (3,3), followed by a (2,2) maxpool layer
    network.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
    network.add(layers.MaxPooling2D((2, 2)))
    network.add(layers.Conv2D(64, (3, 3), activation='relu'))
    network.add(layers.MaxPooling2D((2, 2)))
    network.add(layers.Conv2D(64, (3, 3), activation='relu'))
    
    # DNN classifier
    # Note that the feature maps returned by the convolutional base are still 2D maps,
    # and need to be flattened before they can be fed into a dense feedforward network
    network.add(layers.Flatten())
    network.add(layers.Dense(64, activation='relu'))
    network.add(layers.Dense(10, activation='softmax'))
    
    # To speed up training, we use the RMSProp optimizer
    network.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

    return network

In [None]:
# Build a fresh new neural network and plot its architecture
network = build_network()
network.summary()

In [None]:
# Pass 1: Train the network as usual with fit() and a validation split of 20%
# Note: The network will need significantly more computation power for training than the previous DNN

epochs = 8
history = network.fit(train_images, train_labels, epochs=epochs, batch_size=128, validation_split=0.2)

In [None]:
KT.plot_history(history.history)

In [None]:
# Pass 2: Train the final production network at onset of overfitting with the whole training set
network = build_network()
epochs = 5
network.fit(train_images, train_labels, epochs=epochs, batch_size=128)

test_loss, test_acc = network.evaluate(test_images, test_labels)
print()
print("Test loss", test_loss)
print("Test accuracy", test_acc)
history.history['test_loss'] = test_loss
history.history['test_acc'] = test_acc
history.history['epochs'] = epochs

In [None]:
KT.plot_history(history.history)

In [None]:
# Save network for later production use
network.save("./mnist_convnet_trained.h5")