In [1]:
#Libraries
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
#Data
mnist_dataset, mnist_info = tfds.load(name='mnist', shuffle_files= False, with_info=True, as_supervised=True)

mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64)  ##casts (converts) a variable into a given date type

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

#Normally we'd like to scale our data in some way to make the result more numerically stable (e.g inputs between o to 1)

def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255. #used dot(.) to make it float
    return image, label

#dataset.map(*function*) applies a custom transformation to a given dataset. It takes as input a function which determines the transformation

scaled_train_and_validation_data = mnist_train.map(scale)

test_data = mnist_test.map(scale)

# We need to shuffle the data so they are distributed equally in every portion
# When we are dealing with enormous datasets, we can't shuffle all the data at once
Buffer_size = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(Buffer_size)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)


BATCH_SIZE = 100

## dataset.batch(batch_size) a mthod that combines the consecutive elements of a dataset into batches.
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)
## As we only run the model on the validation set, we only propagate forward. We just calculate the validation loss. On average it should equal to the training loss.

#Mnist data is iterable and in 2-tuple format (as_supervised = true)
validation_inputs, validation_targets = next(iter(validation_data))

# Model

### Outline the Model

In [3]:
input_size = 784
output_size = 10
hidden_layer_size = 100 # The uderlying assumption is that all hidden layers are of the same size

## tf.keras.layers.Flatten(original shape) transforms ((flatten)) tensor into a vector.
## tf.keras.layers.Dense(output size) takes the inputs, provided to the model and calculate the dot product of the inputs and the weights and adds the bias. This is also where we can apply activation function. 
model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape=(28,28,1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
                            tf.keras.layers.Dense(output_size, activation = 'softmax') #since it works with probability
                            ])


### Choose the optimizer and the loss function

In [4]:
# model.compile(optimizer,loss) configure the model for training
model.compile(optimizer='adam', loss ='sparse_categorical_crossentropy', metrics = ['accuracy']) #sparse_categorical_crossentropy applies one-hot encoding

### Training

In [5]:
NUM_EPOCHS = 5
# What happens inside an epoch

## At the beginning of eachepoch, the training loss will be set to 0
## The algorithm will iterate over a preset number of batches, all from train_data
## The weights and biases will be updated as many times as there are batches
## We will get a value for the loss function, indicating how training is going
## We will also see training accuracy
## At the end of each epochs, the algorithm will forward propogate the whole validation set.

### Note- When we reach maximum number of epochs the trainning will be over.

model.fit(train_data, epochs = NUM_EPOCHS, validation_data=(validation_inputs,validation_targets), verbose=2)

Epoch 1/5
540/540 - 3s - loss: 0.3308 - accuracy: 0.9055 - val_loss: 0.1783 - val_accuracy: 0.9495
Epoch 2/5
540/540 - 2s - loss: 0.1400 - accuracy: 0.9588 - val_loss: 0.1207 - val_accuracy: 0.9685
Epoch 3/5
540/540 - 2s - loss: 0.0980 - accuracy: 0.9706 - val_loss: 0.0884 - val_accuracy: 0.9745
Epoch 4/5
540/540 - 2s - loss: 0.0734 - accuracy: 0.9770 - val_loss: 0.0792 - val_accuracy: 0.9787
Epoch 5/5
540/540 - 2s - loss: 0.0596 - accuracy: 0.9813 - val_loss: 0.0629 - val_accuracy: 0.9823


<tensorflow.python.keras.callbacks.History at 0x1eb222d9910>

### Test the model

In [6]:
test_loss, test_accuracy = model.evaluate(test_data)



In [7]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.08. Test accuracy: 97.65%
