In [1]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
# Tensorflow datasets includes the 'MNIST' dataset, which we need for the project

In [2]:
# tfds.load(name='mnist', as_supervised=True) are actually only necessary arguments
# with_info=True returns tuple containing info about version, features and number of samples, which we save into variable
# mnist_info
# data_dir="" saves MNIST data in a specific disk for future use, first time we run the code it downloads the dataset
# afterwards, it is readily available to where we save it.
# mnist_dataset variable will now have mnist data training and test values. We don't have validation sample separately
# as TF doesn't include it. However this is an opportunity for us to learn how to split training data into validation and training
mnist_dataset, mnist_info = tfds.load(name='mnist', data_dir='C:/', with_info=True, as_supervised=True)

# Now we are going to split the mnist_dataset into training and test, mnist dataset already has 'train' and 'test' columns
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

# Now, we shall decide how much of the training data we want to be used for validating.Let that number be 10% of all train data.
# mnist_info['train'] returns the number of samples in training data and splits it according to our request
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
# Now the value that is returned for validation samples, might be in binary, so we need to convert it to integer
# For that Purpose we shall use tf.cast(samples, format) takes two arguments: sample we want to 'cast' and data type
# Let's transform the data
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

# Now let's store num of test samples in a variable
num_test_samples = mnist_info.splits['test'].num_examples
# And cast it to integer
num_test_samples = tf.cast(num_test_samples, tf.int64)

# Next step is to scale images from 0 to 1, for them to be numerically stable, since rgb colors have numbers assigned from 0 to 255
# We will come up with a function that scales them properly. "." means that returned value should be float. 

def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

# dataset.map(*function*) -takes a function as an input. That function should define the transformation. 
# It is of utmost importance for function we define for transformation to take image and label as inputs and return img, label.
# Below we will be scaling train and validation data. 1 below, we will also scale the test data. 
scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

# Another preprocessing step we need to take is to shuffle the data. This is needed because we are going to be batching it later.
# We will be choosing BUFFER_SIZE, which indicates how many samples will be shuffled at a time. 
BUFFER_SIZE = 10000

# We have a function for shuffling our data in tf, so we are just going to use it
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)
# We have successfully shuffled our data 10k sample at a time.

# Now we will use .take function to extract validation and training samples from shuffled sample
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
# .skip method takes "everything but" the sample we specify as an argument
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

# We will use 'mini batch method' to split our data into batches
BATCH_SIZE = 100
# Below operation will add another column to our tensor to indicate how many samples it should take per batch
train_data = train_data.batch(BATCH_SIZE)
# We won't be batching validation data, because there's no backpropagatain done there, in contrast - 
# in training data weights and biases will be updated through backpropagataion every 100 samples.(every 1 batch)
# We will only overwrite validation data with batch, because our model requires so
validation_data = validation_data.batch(num_validation_samples) # single batch. 
# Same with test_data no batching needed per batch size, we just need to format it for model
test_data = test_data.batch(num_test_samples)

# Our validation data should have same size and object properties as train and test data.\
# making data iterable - iter(), next() loads the inputs and targets
validation_inputs, validation_targets = next(iter(validation_data))




# Model

### Outline the model

In [3]:
# It's time to build our model; We shall start with setting the hyperparameters: width, depth etc.
input_size = 784
output_size = 10
hidden_layer_size = 500
# We build the model using tf.keras.Sequential([]) | tf.keras.layers.Flatten flattens tensor into vector to use in our model.]
# Then we start building as many hidden layers as we want, using "tf.keras.layers.Dense(layer_size, activation_function)"
# For MNIST, let's use "relu" activation as it is advised
# We will build 2 hidden layers as advised and then the output layer. In the output layer we will use different activation
# function - softmax - which gives outputs probabilities.
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), #1st Layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), #2nd Layer
    tf.keras.layers.Dense(output_size, activation='softmax') # Output Layer
])

## Choose the optimizer and the loss function

In [4]:
# model.compile(optimizer, loss) configures our model for training
# Best optimizer we got is 'Adaptive moment estimation' aka 'Adam'
# This model should solve classification problem, therefore we need to use some kind of crossentropy loss function:
# We 've got 3 options: Binary, categorical and sparse_categorical. The last one applies one-hot encoding, which is what we need.
# We shall also include 'metrics - accuracy', since we want to calculate how accurate our model is.
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Training

In [5]:
# Time for the last step - We need to train our model. Let's choose the number of epochs we want our model to learn for.
NUM_EPOCHS = 5

# Let's now fit everything on our model
model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)
# Let's state what we expect to happen in each epoch before executing:
# 1. At the beginning of each epoch, training loss will be set to 0
# 2. Algorithm will iterate over a preset number of batches, all from train_data
# 3. Weights and Biases will be updated as many times as there are batches.
# 4. At the end of each epoch, we will see the training loss to see how our model is doing/learning.
# 5. We will also see the training accuracy.
# 6. Moreover, at the end of each epoch, the algorithm will forward propagate the validation set to calculate the validation accuracy.
# Eventually, when algorithm reaches the total number of epochs, our model will finish training.

Epoch 1/5
540/540 - 7s - loss: 0.2191 - accuracy: 0.9359 - val_loss: 0.1114 - val_accuracy: 0.9658 - 7s/epoch - 13ms/step
Epoch 2/5
540/540 - 5s - loss: 0.0832 - accuracy: 0.9738 - val_loss: 0.0787 - val_accuracy: 0.9778 - 5s/epoch - 8ms/step
Epoch 3/5
540/540 - 4s - loss: 0.0542 - accuracy: 0.9833 - val_loss: 0.0589 - val_accuracy: 0.9832 - 4s/epoch - 7ms/step
Epoch 4/5
540/540 - 4s - loss: 0.0428 - accuracy: 0.9863 - val_loss: 0.0514 - val_accuracy: 0.9848 - 4s/epoch - 7ms/step
Epoch 5/5
540/540 - 4s - loss: 0.0320 - accuracy: 0.9894 - val_loss: 0.0541 - val_accuracy: 0.9832 - 4s/epoch - 8ms/step


<keras.callbacks.History at 0x11b3f283e20>

### We got the validation accuracy of 98.32%, which is solid - there are still ways to improve our model of course.

In [None]:
# The ways to improve: Increase batch_size, increase the number of hidden layers, increase the width of a hidden layer dramatically. 

## Test the Model

### We are now going to add the absolute last step to check our model.

In [6]:
test_loss, test_accuracy = model.evaluate(test_data)



In [7]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.09. Test accuracy: 97.73%
