In [24]:
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np

def get_mnist():
    (x_train, labels_train), (x_test, labels_test) = keras.datasets.mnist.load_data()
    return x_train, x_test, labels_train, labels_test
seed = 42
def prepare_mnist(x_train, x_test, labels_train, labels_test, seed):
    N_mnist = 28
    # scaling the data
    x_train, x_test = x_train / 255.0 * 2 - 1, x_test / 255.0 * 2 - 1
    x_train, x_val, labels_train, labels_val \
        = train_test_split(x_train, labels_train, test_size=0.05, random_state=seed)
    # for conv layers
    x_train = x_train.reshape(-1, N_mnist, N_mnist, 1)
    x_val = x_val.reshape(-1, N_mnist, N_mnist, 1)
    x_test = x_test.reshape(-1, N_mnist, N_mnist, 1)
    # all classes should be present in the validation set
    assert np.unique(labels_val).shape == (10,), 'not all classes are represented in validation dataset'
    # categorical labels
    labels_train =  keras.utils.to_categorical(labels_train)
    labels_val = keras.utils.to_categorical(labels_val)
    labels_test = keras.utils.to_categorical(labels_test)
    return x_train, x_val, x_test, labels_train, labels_val, labels_test

In [None]:
# augmentation
# TODO

In [31]:
from tensorflow import keras
# cnn model
def build_model():
    activation='relu'
    input = keras.Input(shape=(28, 28, 1))
    out = keras.layers.Conv2D(filters=8, kernel_size=(3, 3), padding='same',
                              activation=activation)(input)
    out = keras.layers.BatchNormalization()(out)
    out = keras.layers.Conv2D(filters=8, kernel_size=(3, 3), padding='same',
                              activation=activation)(input)
    out = keras.layers.BatchNormalization()(out)
    out = keras.layers.MaxPool2D()(out)

    out = keras.layers.Conv2D(filters=16, kernel_size=(3, 3), padding='same',
                              activation=activation)(out)
    out = keras.layers.BatchNormalization()(out)
    out = keras.layers.MaxPool2D()(out)

    out = keras.layers.Conv2D(filters=32, kernel_size=(3, 3), padding='same',
                              activation=activation)(out)
    out = keras.layers.BatchNormalization()(out)
    out = keras.layers.MaxPool2D()(out)

    out = keras.layers.Flatten()(out)
    out = keras.layers.Dense(10, activation='softmax')(out)
    return keras.Model(inputs=input, outputs=out, name='validation_cnn')

def compile_model(model):
    lr = 0.001
    beta_1 = 0.9
    beta_2 = 0.999
    optimizer = keras.optimizers.Adam(learning_rate=lr, beta_1=beta_1, beta_2=beta_2)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics='accuracy')

def get_callbacks():
    patience = 10
    factor = 0.8
    es = keras.callbacks.EarlyStopping(patience=patience, verbose=1)
    rlr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
            factor=factor, patience=0.5*patience, min_lr=1e-5)
    cp = keras.callbacks.ModelCheckpoint(filepath='validation_cnn.hdf5',
                                         save_best_only=True, verbose=1)
    return [es, rlr, cp]

In [None]:
# training
seed = 42
x_train, x_test, labels_train, labels_test = get_mnist()
x_train, x_val, x_test, labels_train, labels_val, labels_test \
    = prepare_mnist(x_train, x_test, labels_train, labels_test, seed)

model = build_model()
compile_model(model)
model.summary()
callbacks = get_callbacks()

batch = 32
epochs = 25

model.fit(x_train, labels_train, epochs=epochs, batch_size=batch, 
          validation_data=(x_val, labels_val), verbose=2, callbacks=callbacks)

In [32]:
# test
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, confusion_matrix,classification_report

model = keras.models.load_model('validation_cnn.hdf5')
labels_pred = model.predict(x_test)

labels_pred = np.argmax(labels_pred,1)
labels_true = np.argmax(labels_test,1)
print(f'Test accuracy: {accuracy_score(labels_true, labels_pred)}')
print('Confusion matrix')
conf=confusion_matrix(labels_true, labels_pred)
print(conf)

Test accuracy: 0.9914
Confusion matrix
[[ 978    0    0    0    0    0    1    1    0    0]
 [   0 1133    1    0    0    0    1    0    0    0]
 [   2    0 1015    0    0    0    0    8    7    0]
 [   0    1    0  997    0    9    0    1    2    0]
 [   0    0    0    0  975    0    2    0    0    5]
 [   1    0    0    4    0  883    2    0    1    1]
 [   3    1    1    0    2    1  948    0    2    0]
 [   0    0    3    0    0    0    0 1022    1    2]
 [   0    1    0    0    1    0    1    0  970    1]
 [   0    0    0    0    6    2    1    2    5  993]]
