In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

### MNIST Train

In [49]:


def scale(image,label):
    # changes the image data type to float
    image = tf.cast(image, tf.float32)
    # input the range 0-255 that a pixel can be for the dataset
    image /= 255.
    return image, label

def keras_sequence (hidden_layer_size ,depth,layer_activation):
    sequential = [tf.keras.layers.Flatten(input_shape=(28,28,1))]
    for i in range(depth):
        sequential.append(tf.keras.layers.Dense(hidden_layer_size, activation=layer_activation))
    sequential.append(tf.keras.layers.Dense(10, activation='softmax'))
    return sequential

def MNIST_Train(
    name, 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation
    ):
    # load the dataset and the info from tensorflow_dataset 'mnist'
    mnist_dataset, mnist_info = tfds.load(name = name,with_info = True, as_supervised=True)
    
    # set the train dataset and the test dataset
    mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']
    
    # set the number of data you want to take from training for validation dataset
    num_validation_sample = mnist_info.splits['train'].num_examples*validation_per
    # making sure that number is an int type
    num_validation_sample = tf.cast(num_validation_sample,tf.int64)
    
    # set the number of data you want to take for tests whcih is just the length of the test dataset
    num_test_sample = mnist_info.splits['test'].num_examples
    # making sure the number is an int type
    num_test_sample = tf.cast(num_test_sample,tf.int64)
    
    # scale the train dataset and the test data set with the scale function
    scaled_train = mnist_train.map(scale)
    test_data = mnist_test.map(scale)
    
    # now we need to shuffle the data to make sure the sequence of the data is random
    # we use the method .shuffle with specific buffer size for the task
    shuffled_train_dataset = scaled_train.shuffle(BUFFER_SIZE)
    
    # now we extract the validation dataset from train dataset and remove validation dataset from train dataset
    validation_data = shuffled_train_dataset.take(num_validation_sample)
    train_data = shuffled_train_dataset.skip(num_validation_sample)
    
    # now we batch up the data so it doesn't crash the computer if it is a big dataset
    train_data = train_data.batch(BATCH_SIZE)
    
    # we set the amount of data we want to batch for validation dataset to be the same as the length of validation dataset.
    validation_data = validation_data.batch(num_validation_sample)
    
    # we set the amount of data we want to batch for test dataset to be the same as the length of test dataset.
    test_data = test_data.batch(num_test_sample)
    
    # we now iterate the next batch until all of the dataset is used.
    validation_inputs, validation_targets = next(iter(validation_data))
    
    # we formate the keras_sequence that will be the input into the model
    sequential = keras_sequence(hidden_layer_size,depth,layer_activation)
    # set the right sequential to the model
    model = tf.keras.Sequential(sequential)
    
    # compile the model
    model.compile(optimizer = optimizer, loss = loss, metrics = metrics)
    
    # fit the model according to the number of EPOCHS
    model.fit(train_data, epochs = NUM_EPOCHS, validation_data = (validation_inputs,validation_targets), verbose = 2)
    
    return model, test_data


In [6]:
# adjusting hidden layer size to 200
name = 'mnist'
validation_per = 0.1
optimizer = 'adam'
loss = 'sparse_categorical_crossentropy'
metrics = 'accuracy'
hidden_layer_size = 200
depth = 2
layer_activation = 'relu'
BUFFER_SIZE = 10000
BATCH_SIZE = 100
NUM_EPOCHS = 5

MNIST_Train(
    name, 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 4s - loss: 0.2804 - accuracy: 0.9170 - val_loss: 0.1339 - val_accuracy: 0.9607
Epoch 2/5
540/540 - 3s - loss: 0.1080 - accuracy: 0.9676 - val_loss: 0.0816 - val_accuracy: 0.9758
Epoch 3/5
540/540 - 3s - loss: 0.0728 - accuracy: 0.9773 - val_loss: 0.0699 - val_accuracy: 0.9777
Epoch 4/5
540/540 - 3s - loss: 0.0526 - accuracy: 0.9839 - val_loss: 0.0578 - val_accuracy: 0.9822
Epoch 5/5
540/540 - 3s - loss: 0.0421 - accuracy: 0.9870 - val_loss: 0.0446 - val_accuracy: 0.9858


In [7]:
# the loss rom adjusting the hidden layer size by 2x incrased accuracy by around 0.002
# similar amount of rendering time and the loss from the early epochs were much lower
# might be able to reduce epochs to achieve similar results.

In [8]:
# adjusting depth from 2 to 3
depth = 3

MNIST_Train(
    name, 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 4s - loss: 0.2691 - accuracy: 0.9208 - val_loss: 0.1388 - val_accuracy: 0.9592
Epoch 2/5
540/540 - 3s - loss: 0.0990 - accuracy: 0.9691 - val_loss: 0.0989 - val_accuracy: 0.9715
Epoch 3/5
540/540 - 3s - loss: 0.0671 - accuracy: 0.9787 - val_loss: 0.0727 - val_accuracy: 0.9768
Epoch 4/5
540/540 - 3s - loss: 0.0512 - accuracy: 0.9838 - val_loss: 0.0619 - val_accuracy: 0.9797
Epoch 5/5
540/540 - 3s - loss: 0.0431 - accuracy: 0.9861 - val_loss: 0.0459 - val_accuracy: 0.9863


In [9]:
# adjusting the depth by adding a layer from 2 to 3 improved accuracy by 0.006 which without a big hit to time
# also improves the losses of early epochs by a good amount

In [10]:
# changing depth to 10
depth = 10

MNIST_Train(
    name, 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 6s - loss: 0.3416 - accuracy: 0.8948 - val_loss: 0.1640 - val_accuracy: 0.9562
Epoch 2/5
540/540 - 4s - loss: 0.1417 - accuracy: 0.9609 - val_loss: 0.1152 - val_accuracy: 0.9653
Epoch 3/5
540/540 - 5s - loss: 0.1045 - accuracy: 0.9703 - val_loss: 0.0936 - val_accuracy: 0.9758
Epoch 4/5
540/540 - 4s - loss: 0.0817 - accuracy: 0.9769 - val_loss: 0.1110 - val_accuracy: 0.9693
Epoch 5/5
540/540 - 5s - loss: 0.0719 - accuracy: 0.9794 - val_loss: 0.0754 - val_accuracy: 0.9800


In [11]:
# by adding additional depth, the accuracy went down instead of increasing, suggesting that we might be overfitting

In [12]:
depth = 5

MNIST_Train(
    name, 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 5s - loss: 0.2739 - accuracy: 0.9175 - val_loss: 0.1399 - val_accuracy: 0.9567
Epoch 2/5
540/540 - 3s - loss: 0.1108 - accuracy: 0.9658 - val_loss: 0.1102 - val_accuracy: 0.9657
Epoch 3/5
540/540 - 3s - loss: 0.0782 - accuracy: 0.9758 - val_loss: 0.0920 - val_accuracy: 0.9725
Epoch 4/5
540/540 - 3s - loss: 0.0631 - accuracy: 0.9808 - val_loss: 0.0763 - val_accuracy: 0.9795
Epoch 5/5
540/540 - 3s - loss: 0.0505 - accuracy: 0.9841 - val_loss: 0.0618 - val_accuracy: 0.9817


In [13]:
layer_activation = 'sigmoid'

MNIST_Train(
    name, 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 5s - loss: 1.0088 - accuracy: 0.6529 - val_loss: 0.3771 - val_accuracy: 0.8973
Epoch 2/5
540/540 - 3s - loss: 0.2928 - accuracy: 0.9182 - val_loss: 0.2275 - val_accuracy: 0.9355
Epoch 3/5
540/540 - 3s - loss: 0.2024 - accuracy: 0.9425 - val_loss: 0.1588 - val_accuracy: 0.9530
Epoch 4/5
540/540 - 4s - loss: 0.1520 - accuracy: 0.9566 - val_loss: 0.1408 - val_accuracy: 0.9588
Epoch 5/5
540/540 - 3s - loss: 0.1214 - accuracy: 0.9648 - val_loss: 0.1039 - val_accuracy: 0.9705


In [14]:
layer_activation = 'tanh'

MNIST_Train(
    name, 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 5s - loss: 0.2840 - accuracy: 0.9135 - val_loss: 0.1619 - val_accuracy: 0.9523
Epoch 2/5
540/540 - 3s - loss: 0.1340 - accuracy: 0.9587 - val_loss: 0.1263 - val_accuracy: 0.9640
Epoch 3/5
540/540 - 3s - loss: 0.0996 - accuracy: 0.9690 - val_loss: 0.0925 - val_accuracy: 0.9717
Epoch 4/5
540/540 - 3s - loss: 0.0766 - accuracy: 0.9756 - val_loss: 0.0723 - val_accuracy: 0.9783
Epoch 5/5
540/540 - 3s - loss: 0.0632 - accuracy: 0.9795 - val_loss: 0.0676 - val_accuracy: 0.9782


In [15]:
BATCH_SIZE = 10000

MNIST_Train(
    name, 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
6/6 - 4s - loss: 1.5524 - accuracy: 0.5456 - val_loss: 0.7355 - val_accuracy: 0.8020
Epoch 2/5
6/6 - 2s - loss: 0.5966 - accuracy: 0.8351 - val_loss: 0.4311 - val_accuracy: 0.8775
Epoch 3/5
6/6 - 2s - loss: 0.3953 - accuracy: 0.8850 - val_loss: 0.3390 - val_accuracy: 0.8998
Epoch 4/5
6/6 - 2s - loss: 0.3269 - accuracy: 0.9030 - val_loss: 0.2938 - val_accuracy: 0.9103
Epoch 5/5
6/6 - 2s - loss: 0.2879 - accuracy: 0.9138 - val_loss: 0.2630 - val_accuracy: 0.9240


In [16]:
BATCH_SIZE = 10

MNIST_Train(
    name, 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
5400/5400 - 12s - loss: 0.3091 - accuracy: 0.9081 - val_loss: 0.2093 - val_accuracy: 0.9403
Epoch 2/5
5400/5400 - 11s - loss: 0.1730 - accuracy: 0.9501 - val_loss: 0.1409 - val_accuracy: 0.9578
Epoch 3/5
5400/5400 - 12s - loss: 0.1359 - accuracy: 0.9614 - val_loss: 0.1304 - val_accuracy: 0.9617
Epoch 4/5
5400/5400 - 11s - loss: 0.1117 - accuracy: 0.9677 - val_loss: 0.1132 - val_accuracy: 0.9702
Epoch 5/5
5400/5400 - 12s - loss: 0.1005 - accuracy: 0.9713 - val_loss: 0.1048 - val_accuracy: 0.9693


## Test the model

In [58]:
# The best model seems to be with hidden_layer_size of 200 and depth of 3 while everything else stays the same

name = 'mnist'
validation_per = 0.1
optimizer = 'adam'
loss = 'sparse_categorical_crossentropy'
metrics = 'accuracy'
hidden_layer_size = 200
depth = 3
layer_activation = 'relu'
BUFFER_SIZE = 10000
BATCH_SIZE = 100
NUM_EPOCHS = 5

model, test_data = MNIST_Train(
    name, 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 4s - loss: 0.2614 - accuracy: 0.9233 - val_loss: 0.1323 - val_accuracy: 0.9613
Epoch 2/5
540/540 - 3s - loss: 0.1026 - accuracy: 0.9688 - val_loss: 0.0849 - val_accuracy: 0.9760
Epoch 3/5
540/540 - 3s - loss: 0.0705 - accuracy: 0.9779 - val_loss: 0.0587 - val_accuracy: 0.9825
Epoch 4/5
540/540 - 3s - loss: 0.0543 - accuracy: 0.9829 - val_loss: 0.0631 - val_accuracy: 0.9792
Epoch 5/5
540/540 - 3s - loss: 0.0398 - accuracy: 0.9871 - val_loss: 0.0390 - val_accuracy: 0.9890


In [59]:
test_loss, test_accuracy = model.evaluate(test_data)
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.07. Test accuracy: 97.94%


# The model has a accuracy of 97.94% in deployment