In [None]:
from __future__ import print_function
import KerasTools as KT
import numpy as np
from keras.datasets import mnist
from keras import models
from keras import layers
from keras import optimizers
from keras.utils import to_categorical

In [None]:
# Load MNIST data and preprocess it
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [None]:
# Generate a neural network
#
# For this example we will deliberately use a network 
# with too much capacity to provoke early overfitting

def build_network():
    network = models.Sequential()
    network.add(layers.Dense(256, activation='relu', input_shape=(28*28*1,)))
    network.add(layers.Dense(256, activation='relu'))
    network.add(layers.Dense(10, activation='softmax'))

    network.compile(optimizer=optimizers.rmsprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

    return network

network = build_network()
network.summary()

epochs = 20
history = network.fit(train_images, train_labels, epochs=epochs, batch_size=128, validation_split=0.2)

In [None]:
KT.plot_history(history.history)

In [None]:
# Now we apply image data augmentation to address early overfitting

# Image augmentation needs 2D with channel data.
# We therefore reload the MNIST data and reshape it this time to (28, 28, 1) arrays
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

# As the augmented input data is now a 3D array, 
# the first network layer must flatten the data to a 1D array suitable for Dense layers. 
def build_generator_network():
    network = models.Sequential()
    network.add(layers.Flatten(input_shape=(28,28,1)))
    network.add(layers.Dense(256, activation='relu', input_shape=(28*28*1,)))
    network.add(layers.Dense(256, activation='relu'))
    network.add(layers.Dense(10, activation='softmax'))

    network.compile(optimizer=optimizers.rmsprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

    return network

network = build_generator_network()

# Build the Image data generator, using various image modification techniques.
# We take 20% of the training data for validation
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2, height_shift_range=0.2,
    horizontal_flip=False, validation_split=0.2)

# Define the actual generators, one each for the training and validation subsets
train_generator = datagen.flow(train_images, train_labels, batch_size=128, subset='training')
validation_generator = datagen.flow(train_images, train_labels, batch_size=128, subset='validation')

# We use the method `fit_generator` to use generators as input for training / validation data.
# On a multi-core machine, we can apply multiprocessing to do the augmentation work in parallel.
history = network.fit_generator(train_generator, 
                                epochs=20, 
                                validation_data = validation_generator, 
                                use_multiprocessing=True)

In [None]:
KT.plot_history(history.history)