In [2]:
#Import relevant Libraries
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [3]:
#Loading the Dataset
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)

In [4]:
#Once we have loaded the dataset, we can easily extract the training and testing dataset with the built references
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

In [5]:
#We will take our Validation dataset from the training dataset
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
#Let's cast this number to an integer, as a float may cause an error along the way
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

#While we are at let's also store the number of test samples in a dedicated variable (instead of using the mnist_info one)
num_test_samples = mnist_info.splits['test'].num_examples

num_test_samples = tf.cast(num_test_samples, tf.int64)

In [6]:
#Let's define a function called: scale, that will take an MNIST image and its label
def scale(image, label):
    #we make sure the value is a float
    image = tf.cast(image, tf.float32)
    #since the possible values for the inputs are 0 to 255 (256 different shades of grey)
    #if we divide each element by 255, we would get the desired result -> all elements will be between 0 and 1 
    image /= 255.

    return image, label

In [7]:
#The method .map() allows us to apply a custom transformation to a given dataset
#we have already decided that we will get the validation data from mnist_train, so 
scaled_train_and_validation_data = mnist_train.map(scale)
#Finally, we scale and batch the test data
#We scale it so it has the same magnitude as the train and validation
test_data = mnist_test.map(scale)

In [8]:
# let's also shuffle the data (also, we don't need to shuffle test data)

BUFFER_SIZE = 10000

#This BUFFER_SIZE parameter is here for cases when we're dealing with enormous datasets
#then we can't shuffle the whole dataset in one go because we can't fit it all in memory
#so instead TF only stores BUFFER_SIZE samples in memory at a time and shuffles them
#if BUFFER_SIZE=1 => no shuffling will actually happen
#if BUFFER_SIZE >= num samples => shuffling is uniform
#BUFFER_SIZE in between - a computational optimization to approximate uniform shuffling

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)

#The train_data is everything else, so we skip as many samples as there are in the validation dataset
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

In [9]:
# determine the batch size
BATCH_SIZE = 100

# we can also take advantage of the occasion to batch the train data
# this would be very helpful when we train, as we would be able to iterate over the different batches
train_data = train_data.batch(BATCH_SIZE)

validation_data = validation_data.batch(num_validation_samples)

# batch the test data
test_data = test_data.batch(num_test_samples)


# takes next batch (it is the only batch)
# because as_supervized=True, we've got a 2-tuple structure
validation_inputs, validation_targets = next(iter(validation_data))

In [13]:
#Clears the memory of all the variables left from previous runs
tf.compat.v1.get_default_graph
#Now we outline the model
input_size = 784
output_size = 10
hidden_layer_size = 50

#Defining how the model will look like
model = tf.keras.Sequential([
    
    # the first layer (the input layer)
    # each observation is 28x28x1 pixels, therefore it is a tensor of rank 3
    # since we don't know CNNs yet, we don't know how to feed such input into our net, so we must flatten the images
    # there is a convenient method 'Flatten' that simply takes our 28x28x1 tensor and orders it into a (None,) 
    # or (28x28x1,) = (784,) vector
    # this allows us to actually create a feed forward neural network
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)), # input layer
    
    # tf.keras.layers.Dense is basically implementing: output = activation(dot(input, weight) + bias)
    # it takes several arguments, but the most important ones for us are the hidden_layer_size and the activation function
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
    
    # the final layer is no different, we just make sure to activate it with softmax
    tf.keras.layers.Dense(output_size, activation='softmax') # output layer
])

In [17]:
#We now define the optimizer we'd like to use, 
# the loss function, 
# and the metrics we are interested in obtaining at each iteration
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [19]:
tf.compat.v1.ragged.RaggedTensorValue
#Now we train,
#determine the maximum number of epochs
NUM_EPOCHS = 5

# we fit the model, specifying the
# training data
# the total number of epochs
# and the validation data we just created ourselves in the format: (inputs,targets)
model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose =2)

Epoch 1/5
540/540 - 2s - loss: 0.0859 - accuracy: 0.9741 - val_loss: 0.0818 - val_accuracy: 0.9752 - 2s/epoch - 3ms/step
Epoch 2/5
540/540 - 1s - loss: 0.0724 - accuracy: 0.9781 - val_loss: 0.0780 - val_accuracy: 0.9762 - 1s/epoch - 3ms/step
Epoch 3/5
540/540 - 1s - loss: 0.0665 - accuracy: 0.9796 - val_loss: 0.0716 - val_accuracy: 0.9797 - 1s/epoch - 3ms/step
Epoch 4/5
540/540 - 2s - loss: 0.0608 - accuracy: 0.9820 - val_loss: 0.0658 - val_accuracy: 0.9790 - 2s/epoch - 3ms/step
Epoch 5/5
540/540 - 1s - loss: 0.0542 - accuracy: 0.9837 - val_loss: 0.0647 - val_accuracy: 0.9807 - 1s/epoch - 3ms/step


<keras.src.callbacks.History at 0x28b25b943d0>

In [20]:
#Now we test the Data, we only test if validation results are satisfactory and val_accuracy is quite fine
test_loss, test_accuracy = model.evaluate(test_data)



In [21]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.11. Test accuracy: 96.71%
