## MNIST handwritten digit recognition algorithm

### Import the relevant libraries

In [25]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds

### Data

In [26]:
# downloading mnist dataset with info 
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

# the tensorflow only provieds us the train and test datasets
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

# let's make a validation dataset
                        # taking 10% of the train_dataset
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

# write the function to scale the image
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)

test_data = mnist_test.map(scale)

#### Shuffle and Batch the data

In [27]:
# buffer_size = 1, no shuffling will actually happen
# buffer_size >= num_samples, shuffling will ahppen ata once (uniformly)
# if 1 < buffer_size < num_samples, we will be optimizing the computational power
BUFFER_SIZE = 10_000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

# batch size = 1 = Stocahstic gradient descent (SGD)
# batch size = # samples = (single batch) GD
# 1 < batch size < # samples = mini-batch GD
BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))

### Model 

#### Outline the model

In [28]:
# there are 784 inputs, and 10 known outputs, we will work with 2 hidden layers cosisting of 50 nodes each
input_size = 784
output_size = 10
hidden_layer_size = 100 # as we were trying to get higher number of accuracy we tried fiddling around with the hidden_layer_size, started from 50
        # function that is laying down the model (used to 'stack the layers')
model = tf.keras.Sequential([
    # tf.keras.layers.Flatten(original shape) -> transform (flattens) a tensor into a vector
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
    # tf.keras.layers.Dense(output size) -> taken the inputs, provides the model and calculates the dot product of the inputs and the weights and add the bias, This is also where we can apply an activation function
    # our first hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    # and the second hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    # specifying the output layer
    tf.keras.layers.Dense(output_size, activation='softmax'),
    
])

#### Select the Optimizer and Loss function

In [29]:
# model.compile(optimizer, loss, metrics) -> configures the model for training
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#### Training

In [30]:
NUM_EPOCHS = 5
# fit the model
model.fit(train_data, epochs= NUM_EPOCHS, validation_data= (validation_inputs, validation_targets), verbose=2)

Epoch 1/5
540/540 - 6s - loss: 0.3290 - accuracy: 0.9059 - val_loss: 0.1554 - val_accuracy: 0.9513
Epoch 2/5
540/540 - 4s - loss: 0.1340 - accuracy: 0.9603 - val_loss: 0.1292 - val_accuracy: 0.9587
Epoch 3/5
540/540 - 3s - loss: 0.0965 - accuracy: 0.9708 - val_loss: 0.0940 - val_accuracy: 0.9717
Epoch 4/5
540/540 - 3s - loss: 0.0741 - accuracy: 0.9771 - val_loss: 0.0723 - val_accuracy: 0.9773
Epoch 5/5
540/540 - 3s - loss: 0.0590 - accuracy: 0.9826 - val_loss: 0.0649 - val_accuracy: 0.9787


<keras.callbacks.History at 0x1f11c3cf2b0>

#### Test the model

In [31]:
# model.evaluate() -> returns the loss value and metrics values for the model in 'test mode'
test_loss, test_accuracy = model.evaluate(test_data)



In [35]:
print(f'Test loss: {test_loss:.2f}. Test accuracy: {test_accuracy*100:.2f}%')

Test loss: 0.08. Test accuracy: 97.22%


In [36]:
# we have successfully trained and tested our machine learning algorithm with 97% accuracy