In [None]:
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Conv2D, MaxPooling2D, Flatten   # required for CNN

import numpy as np
import matplotlib.pyplot as plt 
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
%matplotlib inline

In [None]:
# prepare data
# input image dimensions
img_rows, img_cols = 28, 28

# split between training  and testing data sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

######### new #####################
# add an extra dimension to adapt BW image to CNN
print(X_train.shape)
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
print(X_train.shape)

X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)


# normalize data to float in range 0..1
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

######### new #####################
# no need to convert target values to one hot vectors

In [None]:
# built a Convolutional Neural Network
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(28,28,1)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.summary()

model.compile(loss='sparse_categorical_crossentropy', # no need to use a one hot vector
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

Convolution layers: <br>
number of parameters = (filter_height * filter_width * input_image_channels + 1) * number_of_filters <br>
(+ 1 is for bias)
1. convolution layer = (3 * 3 * 1 + 1 ) * 32  = 320 <br>
2. convolution layer = (3 * 3 * 32 + 1 ) * 64  = 18496 <br>

Dense layers: <br>
number of parameters = (number of neurons previous layer + 1) * number of neurons this layer 
1. dense layer = (9216 +1) * 128 = 1 179 776
2. dense layer = (128 +1) * 10 = 1290

In [None]:
# load saved weights for the model
# model.load_weights('CNN_20_epochs_128_batch_Adam.h5')

# and/or train the model
batch_size = 128
epochs = 20

history = model.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(X_test, y_test))



# save the weights
model.save_weights('CNN_20_epochs_128_batch_Adam.h5')

### notebook stores last set of weights, running training again starts from the "pre-trained" model

In [None]:
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss: {:.4}'.format(score[0]))
print('Test accuracy: {:.4}'.format(score[1]))


0.9190 accuracy with logistic regeression and l2 regularization

0.9662 accuracy with random forest of 40 trees

0.9485 accuracy with simple NN model  (Trainable params: 11,935) 

0.9815 accuracy with complex NN model  (Trainable params: 669,706) 


In [None]:
# show the learning process
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1,len(accuracy)+1)

plt.figure(figsize=(14,9))
plt.plot(epochs, accuracy, 'bo', label='Training')
plt.plot(epochs, val_accuracy, 'r', label='Test')
plt.xlabel('Epoch', size=14)
plt.ylabel('Accuracy', size=14)
plt.legend()
plt.show()


plt.figure(figsize=(14,9))
plt.plot(epochs, loss, 'bo', label='Training')
plt.plot(epochs, val_loss, 'r', label='Test')
plt.xlabel('Epoch', size=14)
plt.ylabel('Loss', size=14)
plt.legend()
#plt.savefig('loss.png')
plt.show()

In [None]:
# confusion matrix
import seaborn as sns
from sklearn import metrics

y_pred = model.predict(X_test)
# convert one hot to vector
y_pred = np.argmax(y_pred, axis = 1)

cm1 = metrics.confusion_matrix(y_test, y_pred)

plt.figure(figsize=(7,7))
sns.heatmap(cm1, annot=True, fmt=".0f", linewidths=.5, square=True, cmap='Blues_r')
plt.ylabel('true number', size=17)
plt.xlabel('predicted number', size=17)
plt.show()

In [None]:
# let's have a look at the errors
errors = (y_pred - y_test != 0)
error_pos = np.array(np.where(errors))
print('total number of errors:', len(error_pos[0,:]))

# show the errors
plt.figure(figsize=(12,12))
x, y =5, 4
for i in range(20):  
    plt.subplot(y, x, i+1)
    idx = error_pos[0,i] 
    plt.imshow(X_test[idx].reshape((28,28)),cmap='gray')
    plt.title('target: {}\npredicted: {}'.format(y_test[idx], y_pred[idx]))
    plt.axis('off')
plt.show()
