In [1]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds

  from .autonotebook import tqdm as notebook_tqdm


## Data

In [2]:
# tfds.load(name, with_info, as_supervised): loads a dataset from Tensorflow datasets
    # as_supervised = True, loads the data in a 2-tuple structure [input, target]
    # with_info = True, provides a tuple containing info about version, features, # samples of the dataset
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True)



In [3]:
mnist_dataset['test']

<_OptionsDataset element_spec=(TensorSpec(shape=(28, 28, 1), dtype=tf.uint8, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>

In [4]:
mnist_info

tfds.core.DatasetInfo(
    name='mnist',
    version=1.0.0,
    description='The MNIST database of handwritten digits.',
    urls=['https://storage.googleapis.com/cvdf-datasets/mnist/'],
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    total_num_examples=70000,
    splits={
        'test': 10000,
        'train': 60000,
    },
    supervised_keys=('image', 'label'),
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
    redistribution_info=,
)

In [5]:
print(mnist_info.splits['train'].num_examples)
print(mnist_info.features['image'].shape)
print(mnist_info.features['label'].num_classes)
print(mnist_info.citation)

60000
(28, 28, 1)
10
@article{lecun2010mnist,
  title={MNIST handwritten digit database},
  author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
  journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
  volume={2},
  year={2010}
}



In [6]:
## seperate train and test
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
# tf.cast(x,dtype): casts (converts) a variable into a given data type
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

## Scaling 

In [7]:
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.
    return image, label

In [8]:
# dataset.map(*function*) applies a custom transformation to a given dataset. 
    # It takes as input a function which determines the transformation
    # image and label to the scale function.
scaled_train_and_validation_data = mnist_train.map(scale)

In [9]:
scaled_test_data = mnist_test.map(scale)

## Shuffling and batching

In [10]:
# shuffing =  keeping the same infomation but in a different order
# shuffling it can help to ensure that the model is not biased towards any particular class.
# if 1 < Buffer_size < num_samples we will be optimizing the computational power
Buffer_size = 10000

# When we are dealing with enormous datasets, we can't shuffle all data at once
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(Buffer_size)

# take the first number of validation samples
validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
# skip the first number of validation samples
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

In [11]:
validation_data

<TakeDataset element_spec=(TensorSpec(shape=(28, 28, 1), dtype=tf.float32, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>

In [12]:
train_data

<SkipDataset element_spec=(TensorSpec(shape=(28, 28, 1), dtype=tf.float32, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))>

In [13]:
# batch size = 1 = SGD
# batch size = n samples = (single batch) GD
# 1 < batch size < n samples = mini-batch GD 
Batch_size = 100

# when batching we find the Average loss and accuracy
    # This will add a new column to the tensor would indcate to the model how many smaples it should take in each batch
    # contains batches of 100 elements each
# train_data contained 54,000 tensors of shape (28,28,1). 
    #When batching, we split train_data to  batches (groups) of 100 samples, yielding 540 batches of shape (100, 28, 28, 1). 
# dataset.batch(batch_size) a method that combines the consecutive elements of a dataset into batches
train_data = train_data.batch(Batch_size)

# The model expects the validation dataset in batch form too
# we don't need to conduct back propogation, omly fowawrd propogarion, so take all smaples in a batch at once
validation_data = validation_data.batch(num_validation_samples)
test_data = scaled_test_data.batch(num_test_samples)

In [14]:
# iter() creats an object which can be iterated one element at a time (in a for loop or while loop)
# next() loads the next element of an iterable object
# the fit function requires validation inputs and validation targets to be separated. 
    # That's why we use iter() to separate inputs and targets.
# iter(validation_data) makes the 'validation_data' object iterable.
    # Using next(), we are telling it to load the next batch.

validation_inputs, validation_targets = next(iter(validation_data))

## Model

## Outline the model

In [15]:
input_size = 784
output_size = 10
hidden_layer_size = 900

activation_sigmoid = 'sigmoid'
activation_relu = 'relu'
activation_tanh = 'tanh'

# tf.keras.Flatten(original shape) transforms ((flattens) a tensor into a vector)
# tf.keras.layers.Dense(output size, kernel_initializer, bias_initializer):takes the inputs provided to the model 
    # and calculates the dot product of the inputs and the weights and adds the bias
    # This is also where we can apply an activation function
model = tf.keras.Sequential([
    # height: 28pixels, width:28pixels, third dimension: black and white color ranges from 0 to 255
    # neural network (actually the first dense layer) gets each input as a vector, 
        # so we flatten each input to make a vector. The input shape is (28, 28, 1) 
        # and thus the Flatten layer creates a vector of shape (784,) for each input and passes it to the first dense layer.
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(hidden_layer_size, activation=activation_relu),
    tf.keras.layers.Dense(hidden_layer_size, activation=activation_relu),
    #tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    #tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    #tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

### Choose the optimizer and the loss function

In [16]:
custom_optimizer = tf.keras.optimizers.Adam(learning_rate=0.005)
optimizer_adam='adam'

# model.complie(optimizer,loss,metrics): configure the model for training
# One-hot encoded/categorical_crossentropy targets are not integers! They are vectors like: [0,1,0]. 
    # Each value is a scalar (integer), but the target itself is a vector of length 3 (in this case).
    # sparse_categorical_crossentropy, means that your target will be the integer "2", 
    # instead of a one-hot encoded one like [0,1,0].
model.compile(optimizer=optimizer_adam, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Training

### What happens inside an epoch
### 1. At the beginning of each epoch, the training loss will be set to 0
### 2. The alogorithm will iterate over a preset number of batches, all from train_data
### 3. The weights and biases will be updated as many times as there are batches
### 4. We will get a value for the loss function, indicating how the training is going
### 5. We will also see a training accuracy
### 6. At the end of the epoch, the algorithm will forward propagate the whole validation set
## When we reach the maximum number of epochs the training will be over

In [17]:
Num_epochs = 10

# 540/540: number of a single batch in a batch
# #s: time to take to calcualte the loss
# loss: loss values and 540 different weights and bias updates one for each batch
# accuracy: in the what % of the cases our outputs were equal to the targets
# val_loss: is used to determine overfitting
# val_accuracy: is the true accuracy of the model because it is the average accuracy across each batch
model.fit(train_data, epochs=Num_epochs, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/10
540/540 - 6s - loss: 0.1928 - accuracy: 0.9409 - val_loss: 0.0942 - val_accuracy: 0.9727 - 6s/epoch - 11ms/step
Epoch 2/10
540/540 - 4s - loss: 0.0758 - accuracy: 0.9762 - val_loss: 0.0612 - val_accuracy: 0.9808 - 4s/epoch - 8ms/step
Epoch 3/10
540/540 - 6s - loss: 0.0505 - accuracy: 0.9844 - val_loss: 0.0457 - val_accuracy: 0.9857 - 6s/epoch - 11ms/step
Epoch 4/10
540/540 - 5s - loss: 0.0390 - accuracy: 0.9875 - val_loss: 0.0364 - val_accuracy: 0.9890 - 5s/epoch - 10ms/step
Epoch 5/10
540/540 - 5s - loss: 0.0300 - accuracy: 0.9907 - val_loss: 0.0268 - val_accuracy: 0.9915 - 5s/epoch - 9ms/step
Epoch 6/10
540/540 - 5s - loss: 0.0248 - accuracy: 0.9918 - val_loss: 0.0275 - val_accuracy: 0.9913 - 5s/epoch - 10ms/step
Epoch 7/10
540/540 - 5s - loss: 0.0223 - accuracy: 0.9925 - val_loss: 0.0271 - val_accuracy: 0.9905 - 5s/epoch - 9ms/step
Epoch 8/10
540/540 - 4s - loss: 0.0190 - accuracy: 0.9941 - val_loss: 0.0283 - val_accuracy: 0.9940 - 4s/epoch - 8ms/step
Epoch 9/10
540/540 -

<keras.callbacks.History at 0x242d732d660>

## Test the model

In [20]:
# forward proporgation through the network
# model.evaluate(): returns the loss value and metrics values for the model in 'test mode'
test_loss, test_accuracy = model.evaluate(test_data)



In [21]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss,test_accuracy*100.))

Test loss: 0.09. Test accuracy: 98.06%
