In [191]:
import matplotlib.pyplot as plt 
import math 

plt.rcParams['figure.figsize'] = [18, 9]
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['axes.labelcolor'] = 'w'
plt.rcParams['axes.titlecolor'] = 'w'
plt.rcParams['axes.titlesize'] = 20 
plt.rcParams['legend.fontsize'] = 18

In [193]:
from keras.datasets import mnist
from keras.models import Model
from keras.layers import Dense, Dropout, Input
from keras.utils import np_utils, plot_model

import numpy as np 
np.set_printoptions(suppress=True)

In [None]:
### Mnist dataset containing images of handwritten digits ###

(X_train, y_train), (X_test, y_test) = mnist.load_data()   # Dataset loading and partitioning it into train and test sets

print('X_train original shape: ', X_train.shape)  
print('y_train original shape: ', y_train.shape)

print('X_test original shape: ', X_test.shape)  
print('y_test original shape: ', y_test.shape)

print('\nX sample:\n', X_train[8]) # Visualize one random sample
print('\nY sample:', y_train[8])   # Visualize the coresponding label

In [None]:
### Look over a few samples of the dataset ###
sample_number = 9

# Parse the first 9 samples and plot them
for index, (image, label) in enumerate(zip(X_train[:sample_number], y_train[:sample_number])): 
    plt.subplot(3, 3, index+1)          # Create plot
    plt.imshow(image, cmap='gray')      # Add images to the plot
    plt.title('Class {}'.format(label)) # Write class of each image

In [196]:
### Image preprocessing ###

# Flatten the data to fit into the network (Renounce to the last dimension representing the columns)
X_train = X_train.reshape(60000, 784)   # 28 x 28 = 784
X_test = X_test.reshape(10000, 784)

X_train = X_train.astype('float32') # Convert from int to float for data 
X_test = X_test.astype('float32')

X_train /= 255  # Normalize the data from interval [0, 255] to [0, 1]
X_test /= 255

In [None]:
### Label preprocessing ###

classes_no = 10     # 10 classes: 0-9

Y_train = np_utils.to_categorical(y_train, classes_no) # One-hot encoding of training labels
Y_test = np_utils.to_categorical(y_test, classes_no) # One-hot encoding of testing labels
print('Y sample encoding: Number {} -> Array {}'.format(y_train[8], Y_train[8]))

In [None]:
def network():
    inputs = Input(shape=(784,))                        # Input layer - placeholder for data: shows how the input data should look like
    x = Dense(units=512, activation='relu')(inputs)     # Dense layer - takes as input the output of previous layer
                                                        # Dense means that each unit (neuron) on this layer is connected to each unit of the previous and next layers 
    # x = Dropout(rate=0.3)(x)                          # Dropout deactivates a part of the connections between units of two layers, randomly, on each epoch during training
                                                        # Helps at preventing overfitting
    x = Dense(units=512, activation='relu')(x)
    # x = Dropout(rate=0.3)(x)
    x = Dense(units=10, activation='softmax')(x)        # Softmax activation function is usually used in the case of multi-class classification problems
                                                        # It takes as input a vector of n real numbers and normalizez it into a probability distribution
                                                        # consisting of n probabilities
    outputs = x
    return Model(inputs, outputs)                       # Construct the model

model = network()
plot_model(model, show_shapes=True)

In [186]:
# Configuration step - setting the loss, optimizer and metrics to use during training step
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Number of epochs to train the model
epochs = 10

# Batch size represents the number of images that passes through the network at once
batch_size = 256 

history = model.fit(x = X_train,
                    y = Y_train,
                    batch_size = batch_size,
                    epochs = epochs,
                    validation_split=0.2,   # Split training data into training and validation
                                            # Validation is an extra step added to the training 
                                            # which helps seeing if the network can generalize to unseen data
                    verbose = 1)

In [None]:
### Plot training and validation metrics ###

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

plt.plot(acc, 'bo', label='Training accuracy')
plt.plot(val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(loss, 'bo', label='Training loss')
plt.plot(val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
score = model.evaluate(X_test, Y_test) # Evaluate model by using the test dataset
                                       # Test dataset contains data that the network has never seen before

print('Test loss: {:.3f}'.format(score[0])) # Visualize real loss score
print('Test accuracy: {:.3f}'.format(score[1])) # Visualize real accuracy

In [None]:
predictions = model.predict(X_test) # Predict o test images

# Visualize prediction for the first image
for number, prediction in enumerate(predictions[0]):
    print('Number {} : {:.8f} %'.format(number, prediction * 100))

# Select the highest probability from the prediction vector for each image
predicted_classes = np.argmax(predictions, axis=1)

print('Predicted class -> {}'.format(predicted_classes[0]))

# Check which items we got right / wrong
correct_indices = np.nonzero(predicted_classes == y_test)[0]
incorrect_indices = np.nonzero(predicted_classes != y_test)[0]


In [None]:
# Plot a few good predictions of the network

plt.figure()
for i, correct in enumerate(correct_indices[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(X_test[correct].reshape(28,28), cmap='gray', interpolation='none')
    plt.title("Predicted {}, Class {}".format(predicted_classes[correct], y_test[correct]))

In [None]:
# Plot a few bad predictions of the network

plt.figure()
for i, incorrzect in enumerate(incorrect_indices[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(X_test[incorrect].reshape(28,28), cmap='gray', interpolation='none')
    plt.title("Predicted {}, Class {}".format(predicted_classes[incorrect], y_test[incorrect]))