## Imports

In [14]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

## Preprocessing

### Retrieve Dataset

In [15]:
mnist_dataset, mnist_info = tfds.load(name = 'mnist', with_info = True, as_supervised = True)

In [16]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64) # Make sure it is an integer

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

def scale(image, label): # Scale the pixel values to be between 0 and 1
    image = tf.cast(image, tf.float32)
    image /= 255. 
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)
scaled_test_data = mnist_test.map(scale)

### Shuffle the Data

In [18]:
Buffer_size = 10000 # Some datasets may be too big to shuffle all at once. Buffer size tells the computer to take 10000 at a time
# and only shuffle those, before shuffling the next 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(Buffer_size)

# Split data into training and validation

validation_data = shuffled_train_and_validation_data.take(num_validation_samples) # Take first 10% of the training data for validation
train_data = shuffled_train_and_validation_data.skip(num_test_samples) # Take the remaining data for training

Batch_size = 100 # Train on 100 samples at a time before adjusting the weights for the next batch
train_data = train_data.batch(Batch_size)
validation_data = validation_data.batch(num_validation_samples) # Only batched for syntax purpose so we take the whole set as a single batch
test_data = scaled_test_data.batch(num_test_samples) # Also doesn't need to be batched

validation_inputs, validation_targets = next(iter(validation_data))

## Model

### Outline the Model

In [35]:
input_size = 784 # 28 x 28 images. 1 input per pixel
output_size = 10 # 10 possible results, 0 to 9
hidden_layer_size = 200 # We are using 50 results in each hidden layer. All hidden layers are the same size

# Flatten transforms the tensor into a vector
# Dense finds the dot products of the inputs and weights and adds the bias, and applies the activation function. This is done between each layer
# from the inputs, through the hidden layers up to the output
model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape = (28, 28, 1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                            tf.keras.layers.Dense(output_size, activation = 'softmax')
                            ])

### Optimization

In [36]:
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

### Training

In [37]:
num_epochs = 5 # Weights and biases will be adjusted after each epoch

model.fit(train_data, epochs = num_epochs, validation_data = (validation_inputs, validation_targets), verbose = 2)

Epoch 1/5


500/500 - 7s - loss: 0.2892 - accuracy: 0.9163 - val_loss: 0.1366 - val_accuracy: 0.9590 - 7s/epoch - 14ms/step
Epoch 2/5
500/500 - 5s - loss: 0.1081 - accuracy: 0.9676 - val_loss: 0.0990 - val_accuracy: 0.9712 - 5s/epoch - 10ms/step
Epoch 3/5
500/500 - 5s - loss: 0.0723 - accuracy: 0.9776 - val_loss: 0.0748 - val_accuracy: 0.9773 - 5s/epoch - 10ms/step
Epoch 4/5
500/500 - 5s - loss: 0.0522 - accuracy: 0.9834 - val_loss: 0.0711 - val_accuracy: 0.9797 - 5s/epoch - 10ms/step
Epoch 5/5
500/500 - 6s - loss: 0.0381 - accuracy: 0.9878 - val_loss: 0.0663 - val_accuracy: 0.9815 - 6s/epoch - 11ms/step


<keras.src.callbacks.History at 0x2fc312fc100>

## Testing

In [40]:
test_loss, test_accuracy = model.evaluate(test_data)



In [41]:
print("Test loss: {0:.2f}. Test accuracy: {1:.2f}%".format(test_loss, test_accuracy*100.))

Test loss: 0.08. Test accuracy: 97.56%
