# Deep Neural Network for MNIST

In [1]:
import numpy as np
import tensorflow as tf

# use tensorflow data provider of MNIST

import tensorflow_datasets as tfds

In [2]:
# as_supervised can load the dataset in a two tuple structure input and target
# with_info cna provides a tuple containing info about version, features and number of samples of the dataset

mnist_dataset, mnist_info = tfds.load(name = 'mnist', with_info = True, as_supervised = True)



In [3]:
# extract the train and test dataset
# by default tensorflow has training and testing datasets but no validation datasets
# that's one of the more irritating properties of the tensorflow datasets module
# but in fact it gives us the opportunity to actually practice splitting datasets on our own
# the train dataset is much bigger than the test one
# so it will take the validation data from the train dataset

mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

# take an arbitrary percentage of the train dataset to serve as validation
# it should start by setting the number of validation samples
# it can extract the number of samples by writting mnist_info.split['train'].num_examples

num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples

# get the validation samples equal the number of training samples divided by 10
# but we are not sure this will be an integer though which is not really a possible number of validation samples
# to solbe this issue effortlessly, can overwrite the numbe of validation samples variables
# use this method will cast the value of stored in the number of validation samples variable to an integer
# thereby preventing any potential issues

num_validation_samples = tf.cast(num_validation_samples, tf.int64)

# store the number of test samples and dedicated variable

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

# normally, we'd like to scale data in some way to make the result more numerically stable
# so it will simply prefer to have inputs between 0 and 1
# define a function that will scale the inputs called scale

# as a precaution, let's make sure all values are floats
# next proceed by scaling it, the mnist images contain values from 0 to 255
# it representing the 256 of gray, so divided each element by 255, we'll get the desired result
# all element will be between 0 and 1
# the . at the end signifies that we want a result to be a float

def scale(image, label):
    
    image = tf.cast(image, tf.float32)
    image /= 255.
    
    return image, label

# there is a tensorflow method called map which allow us to apply a custom transformation to a given dataset
# moreover this map can only apply transformation that can take an input and a label and return an input and a label
# already decided we will take the validation data from train
# this will scale the whole train dataset and store it in our new variable

scale_train_and_validation_data = mnist_train.map(scale)

test_data = mnist_test.map(scale)

# will shuffle the data and then creat the validation dataset
# shuffle is mean keeping the same infromation but in a different order

# it's possible that the targets are stored in ascending order resulting in the first batches haveing 0 targets and the other batches having only 1 targets
# since we'll be matching, we'd better shuffle the data, it should be randomly spread as possible so that matching works as intended

# imagine the data is ordered and we have 10 batches, each batch contains only given digit
# so the first batch has only 0, the second has only 1 etc
# it will confuse the stochastic gradient descent algorithm
# becuase each batch is homogenous inside it but completely different from all other batches causing the loss to differ greatly
# in other word, the data should be shuffled

# start by defining a buffer size
# this buffer size parameter is used in cases when we dealing with enormous datasets
# in such cases, we can't shuffle the whole dataset in one go, because we can't possibly fit it all in the memory of the computer
# so instead we must instruct tensorflow to take samples 10000 at time, shuffle them and then take the next 10000

# if buffer_size is 1, there no shuffling will actually happen
# if buffer_size is equal or bigger than the total number of samples, shuffling will take place at once and shuffle them uniformly
# if a buffer_size is between the 1 and the total sample size, it will be optimizing the computational power

BUFFER_SIZE = 10000

# there is shuffle method readily available and we just need to specify the buffer_size

shuffled_train_and_validation_data = scale_train_and_validation_data.shuffle(BUFFER_SIZE)

# once we have scaled and shuffle the data
# we can proceed to actually extracting the train and validation datasets
# our validation data will be equal to 10% of the training set, which we have already calcultated and stored in num_validation_samples
# we can use the method take to extract that many samples

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)

# in the same way, we can create the train data by extracting all element but the first X validation samples

train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

# using batching gradient descent to train this model
# this is the most efficient way to perform deep learning as the tradeoff accuracy and speed is optimal
# to do that we must set a batch size and prepare the data for batching

# the batch size is 1, is the stochastic gradient descent
# the batch size is the nuber of samples, is the singel batch gradient descent
# the batch size is between the 1 and the total sample size, is the mini-batch gradient descent

BATCH_SIZE = 100

# there is a method batch we can use on the dataset to combine its consecutive elements in the batches
# add a new column to tensor that would indicate to the model how many samples it should take in each batch

train_data = train_data.batch(BATCH_SIZE)

# what about the validation data
# since we won't be backpropagation on the validation data
# but only forward propagating, we don't really need to batch
# the batching was useful in updating weights only once per batch, which is like 100 samples rather than at every sample, hence reducing noise in the training updates
# so whenever we validate or test we simply forward propagate once
# when batching we usually find the average loss and average accuracy
# during validation and testing we want the exact values, therefore we should take all the data at once
# moreover when forward propagating we don's use that much computational power so it's not expensive to calculate the exact values
# however the model expects our validation set in batch form too
# create a new column in tensor indicating that the model should take the whole validation dataset at once when it utilizes it 

validation_data = validation_data.batch(num_validation_samples)

test_data = test_data.batch(num_test_samples)

# finally our validation data must have the same shape and object properties as the train and test data
# the mnist data is iterable and in 2-tuple format
# therefore we must extract and convet the validation inputs and targets appropriately

# iter is the python syntax for making the validation data and iterator
# by the default it will make the dataset iterable but will not load any data
# next loads the next batch
# since there is only one batch it will load the inputs and the targets

validation_inputs, validation_targets = next(iter(validation_data))
