# Importing relevent libraries

In [1]:
import numpy as np
import tensorflow as tf
#We need the tensorflow-datasets module, therefore, if you haven't please install the package using
# pip install tensorflow-datasets 
# or
# conda install tensorflow-datasets

import tensorflow_datasets as tfds

# Data

In [2]:
mnist_dataset, mnist_info=tfds.load(name='mnist',with_info=True,as_supervised=True)

# Explore the data

In [3]:
mnist_dataset

{'test': <PrefetchDataset shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>,
 'train': <PrefetchDataset shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>}

# Split

In [4]:
mnist_train,mnist_test=mnist_dataset['train'],mnist_dataset['test']

In [5]:
#Further splitting to get Validation Set

In [6]:
num_validation_samples=0.1*mnist_info.splits['train'].num_examples     #Using mnist_info to split training set
num_validation_samples=tf.cast(num_validation_samples, tf.int64)      #tf.cast convert the variable into int64 data type

In [7]:
num_test_samples=mnist_info.splits['test'].num_examples
prev=num_test_samples
num_test_samples=tf.cast(num_test_samples, tf.int64)          #convert from int to numpy int64

In [8]:
prev

10000

In [9]:
num_test_samples

<tf.Tensor: shape=(), dtype=int64, numpy=10000>

# Preprocessing

## Feature Scaling

In [10]:
#we would like to scale\transform\resize the data in such a way that their input hace equal effect on weights(coefficients)
# or we can say it is to make the result more numerically stable
# in this case we will simply prefer to have inputs between 0 and 1
def scale(image, label):
    # we make sure the value is a float
    image = tf.cast(image, tf.float32)
    # since the possible values for the inputs are 0 to 255 (256 different shades of grey)
    # if we divide each element by 255, we would get the desired result -> all elements will be between 0 and 1 
    image /= 255.

    return image, label


In [11]:
scaled_train_and_validation_data=mnist_train.map(scale)  #dataset.map(function) applies custom transformation to a dataset. 
                                                         #Take function as a input which define transformation

In [12]:
scaled_train_and_validation_data

<MapDataset shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>

In [13]:
#Scaling Test dataset so it has the same scale as train and validation data
test_data=mnist_test.map(scale)

In [14]:
test_data

<MapDataset shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>

## Shuffle and Batch

In [15]:
#SHUFFLE
# this BUFFER_SIZE parameter is here for cases when we're dealing with enormous datasets
# then we can't shuffle the whole dataset in one go because we can't fit it all in memory
# so instead TF only stores BUFFER_SIZE samples in memory at a time and shuffles them
# if BUFFER_SIZE=1 => no shuffling will actually happen
# if BUFFER_SIZE >= num samples => shuffling is uniform
# BUFFER_SIZE in between - a computational optimization to approximate uniform shuffling

BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples) #.take() method will take that same number 
                                                                                  #of samples as in num_validation_samples
train_data=shuffled_train_and_validation_data.skip(num_validation_samples)    #Skip the no. of samples in 
                                                                              #num_validation_samples

In [16]:
#Batch
BATCH_SIZE = 100                          #one batch include 100/60000 observations

train_data = train_data.batch(BATCH_SIZE)

validation_data = validation_data.batch(num_validation_samples)  #No Batching done for validation set

test_data = test_data.batch(num_test_samples)                    #No batching for test set also


#To make the validation_data to have same format and feature as train and test data
# because as_supervized=True, we've got a 2-tuple structure
validation_inputs, validation_targets = next(iter(validation_data))

# MODEL

## Outline the Model

In [17]:
input_size=784
output_size=10
hidden_layer_size=200      #U can choose the hidden layer of different sizes
model = tf.keras.Sequential([    #Sequetial used to stack layers
                                 #Flatten used to convert tensor into vector   
            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
                             
                             #Dense used to build each layer 
                             #It calculates dot product and here we apply activation function                             
            tf.keras.layers.Dense(hidden_layer_size,activation='relu'),  #Layer 1
            tf.keras.layers.Dense(hidden_layer_size,activation='relu'),  #Layer 2
            tf.keras.layers.Dense(hidden_layer_size,activation='relu'),  #Layer 3
    
    
            tf.keras.layers.Dense(output_size,activation='softmax')  #Output layer with probabilities(softmax)
])

# Objective Function

## Choose the Optimizer and the Loss Function

In [18]:
#custom_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Training

In [19]:
NUM_EPOCHS = 50

model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), verbose=2)

Epoch 1/50
540/540 - 3s - loss: 0.2694 - accuracy: 0.9199 - val_loss: 0.1284 - val_accuracy: 0.9605
Epoch 2/50
540/540 - 2s - loss: 0.0995 - accuracy: 0.9691 - val_loss: 0.0966 - val_accuracy: 0.9688
Epoch 3/50
540/540 - 2s - loss: 0.0670 - accuracy: 0.9785 - val_loss: 0.0815 - val_accuracy: 0.9752
Epoch 4/50
540/540 - 2s - loss: 0.0508 - accuracy: 0.9835 - val_loss: 0.0561 - val_accuracy: 0.9822
Epoch 5/50
540/540 - 2s - loss: 0.0441 - accuracy: 0.9859 - val_loss: 0.0436 - val_accuracy: 0.9860
Epoch 6/50
540/540 - 2s - loss: 0.0342 - accuracy: 0.9895 - val_loss: 0.0404 - val_accuracy: 0.9858
Epoch 7/50
540/540 - 2s - loss: 0.0288 - accuracy: 0.9902 - val_loss: 0.0336 - val_accuracy: 0.9887
Epoch 8/50
540/540 - 3s - loss: 0.0249 - accuracy: 0.9921 - val_loss: 0.0433 - val_accuracy: 0.9862
Epoch 9/50
540/540 - 3s - loss: 0.0210 - accuracy: 0.9928 - val_loss: 0.0313 - val_accuracy: 0.9895
Epoch 10/50
540/540 - 2s - loss: 0.0201 - accuracy: 0.9938 - val_loss: 0.0316 - val_accuracy: 0.9907

<tensorflow.python.keras.callbacks.History at 0x1df9a7ec9d0>

In [20]:
test_loss, test_accuracy = model.evaluate(test_data)



In [21]:
# We can apply some nice formatting if we want to
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.11. Test accuracy: 98.30%
