## Deep Neural Network for MNIST Classification

### Import packages 

In [1]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds

### Data

In [2]:
#load dataset in a 2 tuple structure input and target
mnist_dts, mnist_info = tfds.load(name = 'mnist', with_info = True, as_supervised = True)

In [3]:
mnist_train,mnist_test = mnist_dts['train'], mnist_dts['test']

#split 1/10 training dataset to validation dataset
num_val_samples = 0.1* mnist_info.splits['train'].num_examples

#convert num_val_samples into int64 type (integer)
num_val_samples = tf.cast(num_val_samples,tf.int64)

#get test samples
num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples,tf.int64)

In [4]:
#scale inputs 
def scale(image, label):
    image = tf.cast(image,tf.float32)
    image /= 255. #the '.' makes sure that this is a float
    return image, label

train_val_scaled = mnist_train.map(scale)
test_data = mnist_test.map(scale)

In [5]:
#shuffle data
BUFFER_SIZE = 10000 # use when deal when enourmous datasets: shuffle 10000 at a time

train_val_shuffled = train_val_scaled.shuffle(BUFFER_SIZE)

val_data = train_val_shuffled.take(num_val_samples)
train_data = train_val_shuffled.skip(num_val_samples)

In [6]:
# combine consecutive elements of a dataset into batches, how many samples Tensorflow should take in each batch
BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)
val_data = val_data.batch(num_val_samples)
test_data = test_data.batch(num_test_samples)

In [7]:
# extract and convert the validation inputs and targets 
#iter(): create an object which can be iterated one element at a time
#next(): load the next element of an iterable object (next batch)
val_inputs, val_targets = next(iter(val_data))

### Model 1

#### Outline the model

In [8]:
input_size = 784
output_size = 10
hidden_layer_size = 100

model_1 = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape = (28,28,1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax'),
])
# tf.keras.Sequential(): stack layers
# tf.keras.layers.Flatten(original shape): flattens a tensor into a vector
# tf.keras.layers.Dense(output size): calculates the dot product of the inputs and the wights and adds the bias. Can also apply activation function

#### Choose the optimizer and the loss function

In [9]:
model_1.compile(optimizer='Adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#categorical_crossentropy: did one-hot encoded the targets
#sparse_categorical_crossentropy: apply one-hot encoding for you

#### Training

In [10]:
NUM_EPOCHS = 5

model_1.fit(train_data,epochs=NUM_EPOCHS,validation_data = (val_inputs, val_targets),validation_steps=1,verbose=2)

Epoch 1/5
540/540 - 10s - loss: 0.3360 - accuracy: 0.9050 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 5s - loss: 0.1369 - accuracy: 0.9595 - val_loss: 0.1230 - val_accuracy: 0.9640
Epoch 3/5
540/540 - 6s - loss: 0.0964 - accuracy: 0.9708 - val_loss: 0.0983 - val_accuracy: 0.9715
Epoch 4/5
540/540 - 6s - loss: 0.0754 - accuracy: 0.9770 - val_loss: 0.0837 - val_accuracy: 0.9765
Epoch 5/5
540/540 - 5s - loss: 0.0609 - accuracy: 0.9818 - val_loss: 0.0710 - val_accuracy: 0.9792


<tensorflow.python.keras.callbacks.History at 0x21d77d1cc08>

### Model 2 

In [11]:
input_size = 784
output_size = 10
hidden_layer_size = 200

model_2 = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape = (28,28,1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax'),
])

model_2.compile(optimizer='Adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [12]:
NUM_EPOCHS = 5

model_2.fit(train_data,epochs=NUM_EPOCHS,validation_data = (val_inputs, val_targets),validation_steps=1,verbose=2)

Epoch 1/5
540/540 - 6s - loss: 0.2706 - accuracy: 0.9224 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 6s - loss: 0.1059 - accuracy: 0.9682 - val_loss: 0.1073 - val_accuracy: 0.9680
Epoch 3/5
540/540 - 6s - loss: 0.0716 - accuracy: 0.9779 - val_loss: 0.0801 - val_accuracy: 0.9758
Epoch 4/5
540/540 - 6s - loss: 0.0516 - accuracy: 0.9837 - val_loss: 0.0658 - val_accuracy: 0.9777
Epoch 5/5
540/540 - 6s - loss: 0.0403 - accuracy: 0.9871 - val_loss: 0.0485 - val_accuracy: 0.9857


<tensorflow.python.keras.callbacks.History at 0x21d020d75c8>

### Model 3

In [13]:
input_size = 784
output_size = 10
hidden_layer_size = 200

model_3 = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape = (28,28,1)),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax'),
])

model_3.compile(optimizer='Adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [14]:
NUM_EPOCHS = 5

model_3.fit(train_data,epochs=NUM_EPOCHS,validation_data = (val_inputs, val_targets),validation_steps=1,verbose=2)

Epoch 1/5
540/540 - 7s - loss: 0.2708 - accuracy: 0.9194 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 6s - loss: 0.1020 - accuracy: 0.9686 - val_loss: 0.0897 - val_accuracy: 0.9737
Epoch 3/5
540/540 - 6s - loss: 0.0699 - accuracy: 0.9783 - val_loss: 0.0671 - val_accuracy: 0.9797
Epoch 4/5
540/540 - 6s - loss: 0.0505 - accuracy: 0.9840 - val_loss: 0.0662 - val_accuracy: 0.9805
Epoch 5/5
540/540 - 6s - loss: 0.0405 - accuracy: 0.9871 - val_loss: 0.0596 - val_accuracy: 0.9822


<tensorflow.python.keras.callbacks.History at 0x21d03730288>

### Test all models

In [15]:
test_loss_1,test_accuracy_1 = model_1.evaluate(test_data)
print('Model 1. Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss_1,test_accuracy_1*100.))

test_loss_2,test_accuracy_2 = model_2.evaluate(test_data)
print('Model 2. Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss_2,test_accuracy_2*100.))

test_loss_3,test_accuracy_3 = model_3.evaluate(test_data)
print('Model 3. Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss_3,test_accuracy_3*100.))

Model 1. Test loss: 0.08. Test accuracy: 97.56%
Model 2. Test loss: 0.08. Test accuracy: 97.70%
Model 3. Test loss: 0.08. Test accuracy: 97.65%
