In [1]:
# This demo draws heavily from the handwritten digit example in
# Chapter 2 of Francois Chollet's "Deep Learning with Python" book.
# I've added a simpler single-layer example first before moving to
# the 2-layer example. -George Chen (CMU Fall 2017)

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

from keras.datasets import mnist
from keras import models
from keras import layers

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

flattened_train_images = train_images.reshape(len(train_images), -1)  # flattens out each training image
flattened_train_images = flattened_train_images.astype(np.float32) / 255  # rescale to be between 0 and 1
flattened_test_images = test_images.reshape(len(test_images), -1)  # flattens out each test image
flattened_test_images = flattened_test_images.astype(np.float32) / 255  # rescale to be between 0 and 1

from keras.utils import to_categorical
train_labels_categorical = to_categorical(train_labels)
test_labels_categorical = to_categorical(test_labels)

Using TensorFlow backend.


In [2]:
train_labels[0]

5

In [3]:
train_labels_categorical[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [4]:
# extremely shallow single-layer model
single_layer_model = models.Sequential()  # this is Keras's way of specifying a model that is a single sequence of layers
single_layer_model.add(layers.Dense(10, activation='softmax', input_shape=(784,)))
single_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 10)                7850      
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________


In [5]:
single_layer_model.compile(optimizer='rmsprop',
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])

In [6]:
single_layer_model.fit(flattened_train_images,
                       train_labels_categorical,
                       validation_split=0.2,
                       epochs=5,
                       batch_size=128)

Train on 48000 samples, validate on 12000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0xb3f8ee898>

In [7]:
# two-layer model
two_layer_model = models.Sequential()  # this is Keras's way of specifying a model that is a single sequence of layers
two_layer_model.add(layers.Dense(512, activation='relu', input_shape=(784,)))
two_layer_model.add(layers.Dense(10, activation='softmax'))
two_layer_model.compile(optimizer='rmsprop',
                        loss='categorical_crossentropy',
                        metrics=['accuracy'])
two_layer_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


In [8]:
two_layer_model.fit(flattened_train_images,
                    train_labels_categorical,
                    validation_split=0.2,
                    epochs=5,
                    batch_size=128)

Train on 48000 samples, validate on 12000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x10ca91b00>

In [9]:
# reshape images to have an additional dimension for color (even though there's no color)
scaled_train_images = train_images.reshape(len(train_images), train_images.shape[1], train_images.shape[2], -1)
scaled_test_images = test_images.reshape(len(test_images), test_images.shape[1], test_images.shape[2], -1)

# rescale to be between 0 and 1
scaled_train_images = scaled_train_images.astype(np.float32) / 255
scaled_test_images = scaled_test_images.astype(np.float32) / 255

In [10]:
print(scaled_train_images.shape)

(60000, 28, 28, 1)


In [11]:
simple_convnet_model = models.Sequential()
simple_convnet_model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
simple_convnet_model.add(layers.MaxPooling2D((2, 2)))
simple_convnet_model.add(layers.Flatten())
simple_convnet_model.add(layers.Dense(10, activation='softmax'))
simple_convnet_model.summary()

simple_convnet_model.compile(optimizer='rmsprop',
                             loss='categorical_crossentropy',
                             metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 5408)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 10)                54090     
Total params: 54,410
Trainable params: 54,410
Non-trainable params: 0
_________________________________________________________________


In [12]:
simple_convnet_model.fit(scaled_train_images,
                         train_labels_categorical,
                         validation_split=0.2,
                         epochs=5,
                         batch_size=128)

Train on 48000 samples, validate on 12000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0xb561818d0>

In [13]:
deeper_convnet_model = models.Sequential()
deeper_convnet_model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
deeper_convnet_model.add(layers.MaxPooling2D((2, 2)))
deeper_convnet_model.add(layers.Conv2D(32, (3, 3), activation='relu'))
deeper_convnet_model.add(layers.MaxPooling2D((2, 2)))
deeper_convnet_model.add(layers.Flatten())
deeper_convnet_model.add(layers.Dense(10, activation='softmax'))
deeper_convnet_model.summary()

deeper_convnet_model.compile(optimizer='rmsprop',
                             loss='categorical_crossentropy',
                             metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 11, 11, 32)        9248      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 5, 5, 32)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 800)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                8010      
Total params: 17,578
Trainable params: 17,578
Non-trainable params: 0
_________________________________________________________________


In [14]:
deeper_convnet_model.fit(scaled_train_images,
                         train_labels_categorical,
                         validation_split=0.2,
                         epochs=5,
                         batch_size=128)

Train on 48000 samples, validate on 12000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0xb6e0d6b70>

## Finally evaluate on test data

In [15]:
test_loss, test_acc = single_layer_model.evaluate(flattened_test_images, test_labels_categorical)
print('Test accuracy:', test_acc)

Test accuracy: 0.9227


In [16]:
test_loss, test_acc = two_layer_model.evaluate(flattened_test_images, test_labels_categorical)
print('Test accuracy:', test_acc)

Test accuracy: 0.9778


In [17]:
test_loss, test_acc = simple_convnet_model.evaluate(scaled_test_images, test_labels_categorical)
print('Test accuracy:', test_acc)

Test accuracy: 0.9756


In [18]:
test_loss, test_acc = deeper_convnet_model.evaluate(scaled_test_images, test_labels_categorical)
print('Test accuracy:', test_acc)

Test accuracy: 0.9857
