Build a convolutional neural network to analyze handwriting dataset, and compare its performance to a conventional neural network

In [4]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

In [6]:
from keras.layers import Conv2D # to add convolutional layers
from keras.layers import MaxPooling2D # to add pooling layers
from keras.layers import Flatten # to flatten data for fully connected layers

Import and prepare the data

In [7]:
# import data
from keras.datasets import mnist

# load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# reshape to be [samples][pixels][width][height]
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')

In [8]:
X_train = X_train / 255 # normalize training data
X_test = X_test / 255 # normalize test data

Generate columns for target categories in dataset

In [9]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

num_classes = y_test.shape[1] # number of categories

Build a model with one set of convolution and pooling layers

In [10]:
def convolutional_model():
    
    # create model
    model = Sequential()
    model.add(Conv2D(16, (5, 5), strides=(1, 1), activation='relu', input_shape=(28, 28, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    # compile model
    model.compile(optimizer='adam', loss='categorical_crossentropy',  metrics=['accuracy'])
    return model

In [11]:
# build the model
model = convolutional_model()

# fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)

# evaluate the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: {} \n Error: {}".format(scores[1], 100-scores[1]*100))




Epoch 1/10


300/300 - 12s - loss: 0.2756 - accuracy: 0.9261 - val_loss: 0.0874 - val_accuracy: 0.9741 - 12s/epoch - 42ms/step
Epoch 2/10
300/300 - 13s - loss: 0.0783 - accuracy: 0.9774 - val_loss: 0.0563 - val_accuracy: 0.9822 - 13s/epoch - 43ms/step
Epoch 3/10
300/300 - 16s - loss: 0.0545 - accuracy: 0.9837 - val_loss: 0.0479 - val_accuracy: 0.9836 - 16s/epoch - 53ms/step
Epoch 4/10
300/300 - 15s - loss: 0.0422 - accuracy: 0.9874 - val_loss: 0.0423 - val_accuracy: 0.9845 - 15s/epoch - 50ms/step
Epoch 5/10
300/300 - 16s - loss: 0.0341 - accuracy: 0.9897 - val_loss: 0.0392 - val_accuracy: 0.9875 - 16s/epoch - 55ms/step
Epoch 6/10
300/300 - 18s - loss: 0.0282 - accuracy: 0.9913 - val_loss: 0.0362 - val_accuracy: 0.9879 - 18s/epoch - 61ms/step
Epoch 7/10
300/300 - 18s - loss: 0.0236 - accuracy: 0.9926 - val_loss: 0.0426 - val_accuracy: 0.9864 - 18s/epoch - 58ms/step
Epoch 8/10
300/300 - 20s - loss: 0.0192 - accuracy: 0.9941 - val_loss: 0.0382 - val_accuracy: 0.9876 - 20s/epoch - 68ms/

Build new model with two convolutional and pooling layers

In [12]:
def convolutional_model():
    
    # create model
    model = Sequential()
    model.add(Conv2D(16, (5, 5), activation='relu', input_shape=(28, 28, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
    model.add(Conv2D(8, (2, 2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    # Compile model
    model.compile(optimizer='adam', loss='categorical_crossentropy',  metrics=['accuracy'])
    return model

In [13]:
# build the model
model = convolutional_model()

# fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)

# evaluate the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: {} \n Error: {}".format(scores[1], 100-scores[1]*100))

Epoch 1/10
300/300 - 24s - loss: 0.5136 - accuracy: 0.8516 - val_loss: 0.1722 - val_accuracy: 0.9484 - 24s/epoch - 81ms/step
Epoch 2/10
300/300 - 28s - loss: 0.1404 - accuracy: 0.9578 - val_loss: 0.0937 - val_accuracy: 0.9710 - 28s/epoch - 93ms/step
Epoch 3/10
300/300 - 31s - loss: 0.0986 - accuracy: 0.9704 - val_loss: 0.0724 - val_accuracy: 0.9775 - 31s/epoch - 103ms/step
Epoch 4/10
300/300 - 35s - loss: 0.0796 - accuracy: 0.9756 - val_loss: 0.0655 - val_accuracy: 0.9795 - 35s/epoch - 116ms/step
Epoch 5/10
300/300 - 37s - loss: 0.0686 - accuracy: 0.9788 - val_loss: 0.0597 - val_accuracy: 0.9814 - 37s/epoch - 124ms/step
Epoch 6/10
300/300 - 37s - loss: 0.0582 - accuracy: 0.9824 - val_loss: 0.0586 - val_accuracy: 0.9816 - 37s/epoch - 124ms/step
Epoch 7/10
300/300 - 39s - loss: 0.0529 - accuracy: 0.9841 - val_loss: 0.0493 - val_accuracy: 0.9843 - 39s/epoch - 129ms/step
Epoch 8/10
300/300 - 47s - loss: 0.0476 - accuracy: 0.9857 - val_loss: 0.0482 - val_accuracy: 0.9839 - 47s/epoch - 158ms

In this case, the extra layers added compute time but did not increase performance. The conventional network in the Classification Models notebook had a lower accuracy score, at 0.980, and a higher error, at 2.00