In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

### MNIST Train

In [2]:


def scale(image,label):
    # changes the image data type to float
    image = tf.cast(image, tf.float32)
    # input the range 0-255 that a pixel can be for the dataset
    image /= 255.
    return image, label

def keras_sequence (hidden_layer_size ,depth,layer_activation):
    sequential = [tf.keras.layers.Flatten(input_shape=(28,28,1))]
    for i in range(depth):
        sequential.append(tf.keras.layers.Dense(hidden_layer_size, activation=layer_activation))
    sequential.append(tf.keras.layers.Dense(10, activation='softmax'))
    return sequential

def MNIST_Train( 
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation
    ):
    # load the dataset and the info from tensorflow_dataset 'mnist'
    mnist_dataset, mnist_info = tfds.load(name = 'mnist',with_info = True, as_supervised=True)
    
    # set the train dataset and the test dataset
    mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']
    
    # set the number of data you want to take from training for validation dataset
    num_validation_sample = mnist_info.splits['train'].num_examples*validation_per
    # making sure that number is an int type
    num_validation_sample = tf.cast(num_validation_sample,tf.int64)
    
    # set the number of data you want to take for tests whcih is just the length of the test dataset
    num_test_sample = mnist_info.splits['test'].num_examples
    # making sure the number is an int type
    num_test_sample = tf.cast(num_test_sample,tf.int64)
    
    # scale the train dataset and the test data set with the scale function
    scaled_train = mnist_train.map(scale)
    test_data = mnist_test.map(scale)
    
    # now we need to shuffle the data to make sure the sequence of the data is random
    # we use the method .shuffle with specific buffer size for the task
    shuffled_train_dataset = scaled_train.shuffle(BUFFER_SIZE)
    
    # now we extract the validation dataset from train dataset and remove validation dataset from train dataset
    validation_data = shuffled_train_dataset.take(num_validation_sample)
    train_data = shuffled_train_dataset.skip(num_validation_sample)
    
    # now we batch up the data so it doesn't crash the computer if it is a big dataset
    train_data = train_data.batch(BATCH_SIZE)
    
    # we set the amount of data we want to batch for validation dataset to be the same as the length of validation dataset.
    validation_data = validation_data.batch(num_validation_sample)
    
    # we set the amount of data we want to batch for test dataset to be the same as the length of test dataset.
    test_data = test_data.batch(num_test_sample)
    
    # we now iterate the next batch until all of the dataset is used.
    validation_inputs, validation_targets = next(iter(validation_data))
    
    # we formate the keras_sequence that will be the input into the model
    sequential = keras_sequence(hidden_layer_size,depth,layer_activation)
    # set the right sequential to the model
    model = tf.keras.Sequential(sequential)
    
    # compile the model
    model.compile(optimizer = optimizer, loss = loss, metrics = metrics)
    
    # fit the model according to the number of EPOCHS
    model.fit(train_data, epochs = NUM_EPOCHS, validation_data = (validation_inputs,validation_targets), verbose = 2)
    
    return model, test_data


In [3]:
# adjusting hidden layer size to 200
validation_per = 0.1
optimizer = 'adam'
loss = 'sparse_categorical_crossentropy'
metrics = 'accuracy'
hidden_layer_size = 200
depth = 2
layer_activation = 'relu'
BUFFER_SIZE = 10000
BATCH_SIZE = 100
NUM_EPOCHS = 5

MNIST_Train(
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 4s - loss: 0.2724 - accuracy: 0.9209 - val_loss: 0.1492 - val_accuracy: 0.9538
Epoch 2/5
540/540 - 2s - loss: 0.1053 - accuracy: 0.9685 - val_loss: 0.0872 - val_accuracy: 0.9725
Epoch 3/5
540/540 - 2s - loss: 0.0703 - accuracy: 0.9780 - val_loss: 0.0581 - val_accuracy: 0.9823
Epoch 4/5
540/540 - 2s - loss: 0.0536 - accuracy: 0.9835 - val_loss: 0.0509 - val_accuracy: 0.9837
Epoch 5/5
540/540 - 2s - loss: 0.0418 - accuracy: 0.9866 - val_loss: 0.0546 - val_accuracy: 0.9838


(<tensorflow.python.keras.engine.sequential.Sequential at 0x1e2e04a6c10>,
 <BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>)

In [4]:
# the loss rom adjusting the hidden layer size by 2x incrased accuracy by around 0.002
# similar amount of rendering time and the loss from the early epochs were much lower
# might be able to reduce epochs to achieve similar results.

In [5]:
# adjusting depth from 2 to 3
depth = 3

MNIST_Train(
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 4s - loss: 0.2660 - accuracy: 0.9196 - val_loss: 0.1326 - val_accuracy: 0.9617
Epoch 2/5
540/540 - 3s - loss: 0.1022 - accuracy: 0.9682 - val_loss: 0.0883 - val_accuracy: 0.9742
Epoch 3/5
540/540 - 3s - loss: 0.0696 - accuracy: 0.9782 - val_loss: 0.0611 - val_accuracy: 0.9793
Epoch 4/5
540/540 - 3s - loss: 0.0537 - accuracy: 0.9829 - val_loss: 0.0587 - val_accuracy: 0.9815
Epoch 5/5
540/540 - 3s - loss: 0.0401 - accuracy: 0.9875 - val_loss: 0.0532 - val_accuracy: 0.9847


(<tensorflow.python.keras.engine.sequential.Sequential at 0x1e2e09432e0>,
 <BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>)

In [6]:
# adjusting the depth by adding a layer from 2 to 3 improved accuracy by 0.006 which without a big hit to time
# also improves the losses of early epochs by a good amount

In [7]:
# changing depth to 10
depth = 10

MNIST_Train(
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 5s - loss: 0.3208 - accuracy: 0.9035 - val_loss: 0.1514 - val_accuracy: 0.9570
Epoch 2/5
540/540 - 4s - loss: 0.1358 - accuracy: 0.9614 - val_loss: 0.1142 - val_accuracy: 0.9692
Epoch 3/5
540/540 - 4s - loss: 0.0996 - accuracy: 0.9724 - val_loss: 0.0871 - val_accuracy: 0.9767
Epoch 4/5
540/540 - 4s - loss: 0.0851 - accuracy: 0.9768 - val_loss: 0.0867 - val_accuracy: 0.9780
Epoch 5/5
540/540 - 4s - loss: 0.0703 - accuracy: 0.9809 - val_loss: 0.0760 - val_accuracy: 0.9782


(<tensorflow.python.keras.engine.sequential.Sequential at 0x1e2e717e190>,
 <BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>)

In [8]:
# by adding additional depth, the accuracy went down instead of increasing, suggesting that we might be overfitting

In [9]:
depth = 5

MNIST_Train(
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 5s - loss: 0.2730 - accuracy: 0.9165 - val_loss: 0.1169 - val_accuracy: 0.9675
Epoch 2/5
540/540 - 3s - loss: 0.1097 - accuracy: 0.9667 - val_loss: 0.0857 - val_accuracy: 0.9738
Epoch 3/5
540/540 - 3s - loss: 0.0765 - accuracy: 0.9762 - val_loss: 0.0869 - val_accuracy: 0.9728
Epoch 4/5
540/540 - 3s - loss: 0.0585 - accuracy: 0.9815 - val_loss: 0.0527 - val_accuracy: 0.9847
Epoch 5/5
540/540 - 3s - loss: 0.0519 - accuracy: 0.9838 - val_loss: 0.0699 - val_accuracy: 0.9800


(<tensorflow.python.keras.engine.sequential.Sequential at 0x1e2ec68a070>,
 <BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>)

In [10]:
layer_activation = 'sigmoid'

MNIST_Train(
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 4s - loss: 0.9286 - accuracy: 0.6876 - val_loss: 0.3647 - val_accuracy: 0.8947
Epoch 2/5
540/540 - 3s - loss: 0.2927 - accuracy: 0.9182 - val_loss: 0.2180 - val_accuracy: 0.9380
Epoch 3/5
540/540 - 3s - loss: 0.1973 - accuracy: 0.9449 - val_loss: 0.1542 - val_accuracy: 0.9582
Epoch 4/5
540/540 - 3s - loss: 0.1486 - accuracy: 0.9573 - val_loss: 0.1392 - val_accuracy: 0.9588
Epoch 5/5
540/540 - 3s - loss: 0.1197 - accuracy: 0.9651 - val_loss: 0.0954 - val_accuracy: 0.9727


(<tensorflow.python.keras.engine.sequential.Sequential at 0x1e2e5c37a60>,
 <BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>)

In [11]:
layer_activation = 'tanh'

MNIST_Train(
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 4s - loss: 0.2810 - accuracy: 0.9143 - val_loss: 0.1549 - val_accuracy: 0.9547
Epoch 2/5
540/540 - 3s - loss: 0.1413 - accuracy: 0.9555 - val_loss: 0.1280 - val_accuracy: 0.9612
Epoch 3/5
540/540 - 3s - loss: 0.1000 - accuracy: 0.9698 - val_loss: 0.0967 - val_accuracy: 0.9702
Epoch 4/5
540/540 - 3s - loss: 0.0785 - accuracy: 0.9749 - val_loss: 0.0930 - val_accuracy: 0.9700
Epoch 5/5
540/540 - 3s - loss: 0.0602 - accuracy: 0.9810 - val_loss: 0.0696 - val_accuracy: 0.9790


(<tensorflow.python.keras.engine.sequential.Sequential at 0x1e2e6115460>,
 <BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>)

In [12]:
BATCH_SIZE = 10000

MNIST_Train(
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
6/6 - 3s - loss: 1.6141 - accuracy: 0.5220 - val_loss: 0.8075 - val_accuracy: 0.7987
Epoch 2/5
6/6 - 2s - loss: 0.6455 - accuracy: 0.8289 - val_loss: 0.4667 - val_accuracy: 0.8690
Epoch 3/5
6/6 - 2s - loss: 0.4187 - accuracy: 0.8794 - val_loss: 0.3660 - val_accuracy: 0.8905
Epoch 4/5
6/6 - 2s - loss: 0.3427 - accuracy: 0.8983 - val_loss: 0.3169 - val_accuracy: 0.9052
Epoch 5/5
6/6 - 2s - loss: 0.3022 - accuracy: 0.9104 - val_loss: 0.2852 - val_accuracy: 0.9163


(<tensorflow.python.keras.engine.sequential.Sequential at 0x1e2e65f2eb0>,
 <BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>)

In [13]:
BATCH_SIZE = 10

MNIST_Train(
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
5400/5400 - 11s - loss: 0.3056 - accuracy: 0.9081 - val_loss: 0.1940 - val_accuracy: 0.9475
Epoch 2/5
5400/5400 - 10s - loss: 0.1741 - accuracy: 0.9499 - val_loss: 0.1445 - val_accuracy: 0.9623
Epoch 3/5
5400/5400 - 10s - loss: 0.1339 - accuracy: 0.9613 - val_loss: 0.1455 - val_accuracy: 0.9607
Epoch 4/5
5400/5400 - 9s - loss: 0.1129 - accuracy: 0.9678 - val_loss: 0.1176 - val_accuracy: 0.9675
Epoch 5/5
5400/5400 - 9s - loss: 0.0987 - accuracy: 0.9712 - val_loss: 0.1078 - val_accuracy: 0.9720


(<tensorflow.python.keras.engine.sequential.Sequential at 0x1e28009e0d0>,
 <BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>)

## Test the model

In [14]:
# The best model seems to be with hidden_layer_size of 200 and depth of 3 while everything else stays the same

name = 'mnist'
validation_per = 0.1
optimizer = 'adam'
loss = 'sparse_categorical_crossentropy'
metrics = 'accuracy'
hidden_layer_size = 200
depth = 3
layer_activation = 'relu'
BUFFER_SIZE = 10000
BATCH_SIZE = 100
NUM_EPOCHS = 5

model, test_data = MNIST_Train(
    validation_per, 
    BUFFER_SIZE, 
    BATCH_SIZE,
    hidden_layer_size,
    depth,
    layer_activation)

Epoch 1/5
540/540 - 4s - loss: 0.2726 - accuracy: 0.9190 - val_loss: 0.1284 - val_accuracy: 0.9605
Epoch 2/5
540/540 - 2s - loss: 0.1027 - accuracy: 0.9685 - val_loss: 0.0900 - val_accuracy: 0.9703
Epoch 3/5
540/540 - 2s - loss: 0.0693 - accuracy: 0.9784 - val_loss: 0.0739 - val_accuracy: 0.9783
Epoch 4/5
540/540 - 2s - loss: 0.0535 - accuracy: 0.9831 - val_loss: 0.0475 - val_accuracy: 0.9843
Epoch 5/5
540/540 - 3s - loss: 0.0441 - accuracy: 0.9855 - val_loss: 0.0565 - val_accuracy: 0.9827


In [15]:
test_loss, test_accuracy = model.evaluate(test_data)
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.08. Test accuracy: 97.55%


# The model has a accuracy of 97.94% in deployment