# Multilayer Perceptron from raw data
This notebook will guide you through the use of the `keras` package to train a multilayer perceptron for handwritten digits classification. You are going to use the `mnist` dataset from LeCun et al. 1998

## Loading the packages

In [None]:
#%pip install tensorflow --upgrade
#%pip install keras --upgrade

import numpy as np
from matplotlib import pyplot as pl

from keras.datasets import mnist
from tensorflow.keras.utils import plot_model
from keras.models import Sequential
from keras.layers.core import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop
from keras.utils import np_utils
from sklearn import metrics as me

%matplotlib inline

In [None]:
def plot_confusion_matrix(conf_matrix, classes_names):
    fig, ax = pl.subplots()
    im = ax.imshow(conf_matrix, cmap="viridis")

    # Show all ticks and label them with the respective list entries
    ax.set_xticks(np.arange(len(classes_names)))#, labels=classes_names)
    ax.set_yticks(np.arange(len(classes_names)))#, labels=classes_names)

    # Rotate the tick labels and set their alignment.
    pl.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    for i in range(len(classes_names)):
        for j in range(len(classes_names)):
            text = ax.text(j, i, conf_matrix[i, j], fontweight="bold", fontsize="large", ha="center", va="center", c="r")

    pl.title('Confusion matrix')
    pl.xlabel('Predicted')
    pl.ylabel('Excpected')
    fig.set_figwidth(7)
    fig.set_figheight(7)
    fig.tight_layout()
    pl.show()

## Using raw data to train a MLP
First load the `mnist` dataset and normalize it to be in the range [0, 1]

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')

n_classes = 10
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, n_classes)
Y_test = np_utils.to_categorical(y_test, n_classes)

Create the MLP

In [None]:
model = Sequential(name="MLP-raw")
model.add(Dense(256, input_shape=(784,), activation='relu', name="Input"))
model.add(Dropout(0.25))
model.add(Dense(n_classes, activation='softmax', name="Output"))

model.summary()
plot_model(model, show_shapes=True)

Define some constants and train the MLP

In [None]:
batch_size = 128
n_epoch = 20

model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])
history = model.fit(X_train, Y_train,
                    batch_size=batch_size, epochs=n_epoch,
                    verbose=1, validation_data=(X_test, Y_test))

Show the performance of the model

In [None]:
pl.plot(history.history['loss'], label='Training')
pl.plot(history.history['val_loss'], label='Testing')
pl.title(f'Loss over time')
pl.xlabel('Epoch')
pl.ylabel('Loss')
pl.legend()
pl.grid()

score = model.evaluate(X_test, Y_test, verbose=0)
print(f'Test score: {round(score[0], 4)}')
print(f'Test accuracy: {round(score[1]*100, 2)}%')

Confusion matrix

In [None]:
pred = model.predict(X_test)
pred = np.argmax(pred, axis=-1)
cm = me.confusion_matrix(y_test, pred)
classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
plot_confusion_matrix(cm, classes)