# MNIST classification

#### Importing libraries

In [18]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds

#### Loading the dataset

In [19]:
mnistData, mnistInfo = tfds.load(
    name='mnist',   # dataset name
    as_supervised=True, # loads the dataset as a 2 tuple structure (input, target)
    with_info= True, # mnist data info
)

#### Preprocessing

Train test split

In [20]:
mnistTrain, mnistTest = mnistData['train'], mnistData['test']  

# out of 70K data, 60K is training, 10k is testing


Validation

In [21]:
# Creating the validation dataset

num_train = mnistInfo.splits['train'].num_examples   # 60000

num_Validation = 0.1 * num_train


# type(num_Validation) # it is a float

# so now we will convert it to an int

num_Validation = tf.cast(num_Validation,tf.int64)

type(num_Validation)


num_Test = mnistInfo.splits['test'].num_examples 

num_Test = tf.cast(num_Test,tf.int64)





Scaling

In [22]:
def scale(image, label):
    image = tf.cast(image, tf.float32 ) # ensuring all data is of same datatype
    
    # since each color is between 0 to 255 range, so if we divide each num with 255, we will get same scale
    
    image /=255.    # dot means float result
    
    return image, label



In [23]:
scaledTrainAndValidationData = mnistTrain.map(scale)
scaledTestData = mnistTest.map(scale)


Shuffling data

In [24]:
# Since we will use mini-batch, we need to shuffle each batch or the dataset itself
BUFFER_SIZE = 10000
# since our dataset is huge, so we will not shuffle whole data at once, since it will be memory wise expensive
# so we will shuffle 10k data at once and then next

shuffled_train_and_validation_data = scaledTrainAndValidationData.shuffle(BUFFER_SIZE)


Train, Validation and Test set

In [25]:
# Since we got the number of validation data ie. 6k, we will extract it from the dataset now

validation_data = shuffled_train_and_validation_data.take(num_Validation)

train_data = shuffled_train_and_validation_data.skip(num_Validation)


INFO-

SGD => BATCH SIZE = 1

BATCH GD => BATCH SIZE = NO. OF SAMPLES

MINI-BATCH => 1 < BATCH SIZE < NO. OF SAMPLES


_______________________________________________________

Batching

In [26]:
BATCH_SIZE = 100
train_data = train_data.batch(BATCH_SIZE)
validation_data = validation_data.batch(num_Validation)
test_data = scaledTestData.batch(num_Test)
 

In [27]:
validation_inputs, validation_targets = next(iter(validation_data))
# Next loads the next batch
# Iter makes the object iterable one element at a time like for loop

#### MODEL

There are 784 inputs (INPUT LAYERS), 10 output nodes as there are 10 digits (OUTPUT LAYERS). We will create 2 HIDDLE LAYERS with 50 nodes each

In [28]:
input_size = 784
output_size = 10
hidden_size = 100 # assumping that all hidden layers are of same size


In [29]:
model = tf.keras.Sequential([
    
    tf.keras.layers.Flatten(input_shape = (28,28,1)),   # INPUT LAYER
    # FLATTEN - it is used to transform a TENSOR of n-rank into a vector
    
    tf.keras.layers.Dense(hidden_size, activation = 'relu'),    # HIDDLE LAYER 1
    # this step takes the input and calculate the DOT PRODUCT of the input and weights and adds a bias
    
    tf.keras.layers.Dense(hidden_size, activation = 'relu'),    # HIDDLE LAYER 2
    
    tf.keras.layers.Dense(output_size, activation= 'softmax')   # OUTPUT LAYER
    # SOFTMAX ensures that the output is transformed into probabilites of output
    
    
    
])

Optimizer and Loss Function

In [30]:
model.compile(
    optimizer = 'adam', # Adaptive Moment Estimator
    loss ='sparse_categorical_crossentropy',
    metrics = ['accuracy'],
)

# there are many types of crossentropy, 
# binary CE gives binary form
# categorical CE expects that the data is already in ONE HOT ENCODED form
# sparse CCE applies one hot encoding

#### Training

In [31]:
NUM_EPOCHS= 10

In [32]:

model.fit(
    train_data,
    epochs = NUM_EPOCHS,
    validation_data = (validation_inputs, validation_targets),
    # verbose = 2
)

# After training the NN epoch by epoch and passing the train data as batches while updating the weights, this is FORWARD PROPAGRATION
# after this is done, the VALIDATION data is passed entirely through the NN as forward propagation

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x22fc6ca0670>

When using 50 nodes in hidden layer, the accuracy of Validation was 97 <br>
When using 100 nodes in hidden layer, the accuracy of Validation went to 98 <br>
NOTE - Validation accuracy is true accuracy, hence u can see if your model is facing overfitting or not

<hr>

After adding another hidden layer, the accuracy increased to 98.20

<hr>

After adding another hidden layer, and increasing nodes to 200, the accuracy actually decreased to 98.15
Hence, the optimal value of hidden layer for this problem is 2-3 with nodes from 50-100
<hr>

After updating EPOCHS = 10, the VALIDATION accuracy reached 99% (one of the highest)

#### Testing

In [33]:
test_loss , test_accuracy = model.evaluate(test_data)



In [34]:
print("Test Loss : {0: .2f}%\nTest Accuracy : {1: .2f}%".format(test_loss, test_accuracy*100.))

Test Loss :  0.08%
Test Accuracy :  97.69%
