In [4]:
import mnist_loader
import numpy as np


training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

training_data = list(training_data)
validation_data = list(validation_data)
test_data = list(test_data)

train_inputs = []
train_labels = []


for i in training_data:
    train_inputs.append(i[0])
    x = list(i[1])
    y = x.index(1)
    train_labels.append(y)


test_inputs = []
test_labels = []

for i in test_data:
    test_inputs.append(i[0])
    test_labels.append(i[1])


In [18]:
# Initial Model

import tensorflow as tf
from tensorflow import keras

my_nn_model = keras.Sequential()
my_nn_model.add(keras.layers.Flatten(input_shape=(784, 1)))
my_nn_model.add(keras.layers.Dense(128, activation='sigmoid'))
my_nn_model.add(keras.layers.Dense(64, activation='sigmoid'))
my_nn_model.add(keras.layers.Dense(10, activation='softmax'))

my_nn_model.summary()


my_nn_model.compile(loss="sparse_categorical_crossentropy",
              optimizer="sgd",
              metrics=["accuracy"])
# sparse_categorical_crossentropy is used as the loss function since the classified outputs of the mode (0-9) are mutually exclusive


results = my_nn_model.fit(np.array(train_inputs), np.array(train_labels), epochs=10)

test_loss, test_acc = my_nn_model.evaluate(np.array(test_inputs), np.array(test_labels))
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_15 (Flatten)        (None, 784)               0         
                                                                 
 dense_45 (Dense)            (None, 128)               100480    
                                                                 
 dense_46 (Dense)            (None, 64)                8256      
                                                                 
 dense_47 (Dense)            (None, 10)                650       
                                                                 
Total params: 109386 (427.29 KB)
Trainable params: 109386 (427.29 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.3969677686691284
Test

In [17]:
# Changing number of epochs
# Would expect overfitting as epochs increase

import tensorflow as tf
from tensorflow import keras

my_nn_model = keras.Sequential()
my_nn_model.add(keras.layers.Flatten(input_shape=(784, 1)))
my_nn_model.add(keras.layers.Dense(128, activation='sigmoid'))
my_nn_model.add(keras.layers.Dense(64, activation='sigmoid'))
my_nn_model.add(keras.layers.Dense(10, activation='softmax'))

my_nn_model.summary()

my_nn_model.compile(loss="sparse_categorical_crossentropy",
            optimizer="sgd",
            metrics=["accuracy"])
# sparse_categorical_crossentropy is used as the loss function since the classified outputs of the mode (0-9) are mutually exclusive


num_epochs = [10, 20, 50, 100, 250, 500]

for i in num_epochs:
    results = my_nn_model.fit(np.array(train_inputs), np.array(train_labels), epochs=i, verbose=False)

    test_loss, test_acc = my_nn_model.evaluate(np.array(test_inputs), np.array(test_labels))
    print("Epochs trained:", i)
    print("Test Loss:", test_loss)
    print("Test Accuracy:", test_acc)

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_14 (Flatten)        (None, 784)               0         
                                                                 
 dense_42 (Dense)            (None, 128)               100480    
                                                                 
 dense_43 (Dense)            (None, 64)                8256      
                                                                 
 dense_44 (Dense)            (None, 10)                650       
                                                                 
Total params: 109386 (427.29 KB)
Trainable params: 109386 (427.29 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epochs trained: 10
Test Loss: 0.396740585565567
Test Accuracy: 0.8901000022888184
Epochs trained: 20
Test Loss: 0.2550284266471863
Test Accuracy:

In [25]:
# Changing number of hidden units in each layer

hiddenLayer_numNodes = [512, 256, 128, 64, 32]

for i in range(len(hiddenLayer_numNodes)):
    for j in range(i, len(hiddenLayer_numNodes)):
        print("Nodes in first hidden layer:", hiddenLayer_numNodes[i], " Nodes in second hidden layer", hiddenLayer_numNodes[j])

        my_nn_model = keras.Sequential()
        my_nn_model.add(keras.layers.Flatten(input_shape=(784, 1)))
        my_nn_model.add(keras.layers.Dense(hiddenLayer_numNodes[i], activation='sigmoid'))
        my_nn_model.add(keras.layers.Dense(hiddenLayer_numNodes[j], activation='sigmoid'))
        my_nn_model.add(keras.layers.Dense(10, activation='softmax'))

        # my_nn_model.summary()

        my_nn_model.compile(loss="sparse_categorical_crossentropy",
            optimizer="sgd",
            metrics=["accuracy"])
        # sparse_categorical_crossentropy is used as the loss function since the classified outputs of the mode (0-9) are mutually exclusive

        results = my_nn_model.fit(np.array(train_inputs), np.array(train_labels), epochs=10, verbose=False)

        test_loss, test_acc = my_nn_model.evaluate(np.array(test_inputs), np.array(test_labels))
        print("Test Loss:", test_loss)
        print("Test Accuracy:", test_acc)


Nodes in first hidden layer: 512  Nodes in second hidden layer 512
Test Loss: 0.3577618896961212
Test Accuracy: 0.897599995136261
Nodes in first hidden layer: 512  Nodes in second hidden layer 256
Test Loss: 0.3565708100795746
Test Accuracy: 0.8988999724388123
Nodes in first hidden layer: 512  Nodes in second hidden layer 128
Test Loss: 0.36094585061073303
Test Accuracy: 0.8978000283241272
Nodes in first hidden layer: 512  Nodes in second hidden layer 64
Test Loss: 0.3756021559238434
Test Accuracy: 0.8955000042915344
Nodes in first hidden layer: 512  Nodes in second hidden layer 32
Test Loss: 0.3941355049610138
Test Accuracy: 0.894599974155426
Nodes in first hidden layer: 256  Nodes in second hidden layer 256
Test Loss: 0.3649265766143799
Test Accuracy: 0.8984000086784363
Nodes in first hidden layer: 256  Nodes in second hidden layer 128
Test Loss: 0.3679578900337219
Test Accuracy: 0.8966000080108643
Nodes in first hidden layer: 256  Nodes in second hidden layer 64
Test Loss: 0.3788999

In [19]:
# Changing the weight/bias initializations

import tensorflow as tf
from tensorflow import keras

bias_inits = [keras.initializers.RandomNormal(mean=0.0, stddev=0.05, seed=2002), 
              keras.initializers.Zeros(), 
              keras.initializers.Ones(), 
              keras.initializers.GlorotNormal(seed=2002), 
              keras.initializers.HeNormal(seed=2002)]

weight_inits = [keras.initializers.RandomNormal(mean=0.0, stddev=0.05, seed=2002), 
              keras.initializers.Zeros(), 
              keras.initializers.Ones(), 
              keras.initializers.GlorotNormal(seed=2002), 
              keras.initializers.HeNormal(seed=2002)]


for i in bias_inits:
    for j in weight_inits:
        print("Bias:", i)
        print("Kernel:", j)
        my_nn_model = keras.Sequential()
        my_nn_model.add(keras.layers.Flatten(input_shape=(784, 1)))
        my_nn_model.add(keras.layers.Dense(128, activation='sigmoid', kernel_initializer=j, bias_initializer=i))
        my_nn_model.add(keras.layers.Dense(64, activation='sigmoid', kernel_initializer=j, bias_initializer=i))
        my_nn_model.add(keras.layers.Dense(10, activation='softmax'))

        # my_nn_model.summary()

        my_nn_model.compile(loss="sparse_categorical_crossentropy",
            optimizer="sgd",
            metrics=["accuracy"])
        # sparse_categorical_crossentropy is used as the loss function since the classified outputs of the mode (0-9) are mutually exclusive

        results = my_nn_model.fit(np.array(train_inputs), np.array(train_labels), epochs=10, verbose=False)

        test_loss, test_acc = my_nn_model.evaluate(np.array(test_inputs), np.array(test_labels))
        print("Test Loss:", test_loss)
        print("Test Accuracy:", test_acc)

Bias: <keras.src.initializers.initializers.RandomNormal object at 0x143f12050>
Kernel: <keras.src.initializers.initializers.RandomNormal object at 0x180aba450>
Test Loss: 0.4385152757167816
Test Accuracy: 0.8794000148773193
Bias: <keras.src.initializers.initializers.RandomNormal object at 0x143f12050>
Kernel: <keras.src.initializers.initializers.Zeros object at 0x180abaf90>
Test Loss: 1.7765247821807861
Test Accuracy: 0.295199990272522
Bias: <keras.src.initializers.initializers.RandomNormal object at 0x143f12050>
Kernel: <keras.src.initializers.initializers.Ones object at 0x180ab8490>
Test Loss: 2.3049395084381104
Test Accuracy: 0.11349999904632568
Bias: <keras.src.initializers.initializers.RandomNormal object at 0x143f12050>
Kernel: <keras.src.initializers.initializers.GlorotNormal object at 0x180abb210>
Test Loss: 0.3892819881439209
Test Accuracy: 0.8937000036239624
Bias: <keras.src.initializers.initializers.RandomNormal object at 0x143f12050>
Kernel: <keras.src.initializers.initiali

In [15]:
# Changing learning rates and optimizer

import tensorflow as tf
from tensorflow import keras

my_nn_model = keras.Sequential()
my_nn_model.add(keras.layers.Flatten(input_shape=(784, 1)))
my_nn_model.add(keras.layers.Dense(128, activation='sigmoid'))
my_nn_model.add(keras.layers.Dense(64, activation='sigmoid'))
my_nn_model.add(keras.layers.Dense(10, activation='softmax'))

my_nn_model.summary()

__optimizer = []
learning_rates = [0.01, 0.001, 0.0001]
for i in learning_rates:
    __optimizer.append(tf.keras.optimizers.Adam(learning_rate=i))
    __optimizer.append(tf.keras.optimizers.SGD(learning_rate=i))


for i in __optimizer:
    my_nn_model.compile(loss="sparse_categorical_crossentropy",
                optimizer=i,
                metrics=["accuracy"])
    # sparse_categorical_crossentropy is used as the loss function since the classified outputs of the mode (0-9) are mutually exclusive

    results = my_nn_model.fit(np.array(train_inputs), np.array(train_labels), epochs=10, verbose=False)

    test_loss, test_acc = my_nn_model.evaluate(np.array(test_inputs), np.array(test_labels))
    print("Optimizer used:", i, ". Learning rate used:", i.learning_rate)
    print("Test Loss:", test_loss)
    print("Test Accuracy:", test_acc)

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_12 (Flatten)        (None, 784)               0         
                                                                 
 dense_36 (Dense)            (None, 128)               100480    
                                                                 
 dense_37 (Dense)            (None, 64)                8256      
                                                                 
 dense_38 (Dense)            (None, 10)                650       
                                                                 
Total params: 109386 (427.29 KB)
Trainable params: 109386 (427.29 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Optimizer used: <keras.src.optimizers.adam.Adam object at 0x1191e7e10> . Learning rate used: <tf.Variable 'learning_rate:0' shape=() dtype=float3

In [39]:
# Combining all the changes to the models above, find the model with the best test accuracy:

# Will save all data to a file too:

from itertools import product
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers.legacy import SGD



__optimizer = []
learning_rates = [0.01, 0.001, 0.0001]
for i in learning_rates:
    __optimizer.append(tf.keras.optimizers.Adam(learning_rate=i))
    __optimizer.append(tf.keras.optimizers.SGD(learning_rate=i))


bias_inits = [keras.initializers.RandomNormal(mean=0.0, stddev=0.05, seed=2002), 
              keras.initializers.Zeros(), 
              keras.initializers.Ones(), 
              keras.initializers.GlorotNormal(seed=2002), 
              keras.initializers.HeNormal(seed=2002)]

weight_inits = [keras.initializers.RandomNormal(mean=0.0, stddev=0.05, seed=2002), 
              keras.initializers.Zeros(), 
              keras.initializers.Ones(), 
              keras.initializers.GlorotNormal(seed=2002), 
              keras.initializers.HeNormal(seed=2002)]


num_epochs = [10, 20, 50, 100, 250] # 500 epochs was removed because it would take too long to run with all these other combinations


hiddenLayer_numNodes = [512, 256, 128, 64, 32]
hiddenLayer_numNodes_pairs = []
for i in range(len(hiddenLayer_numNodes)):
    for j in range(i, len(hiddenLayer_numNodes)):
        # print("Nodes in first hidden layer:", hiddenLayer_numNodes[i], " Nodes in second hidden layer", hiddenLayer_numNodes[j])
        hiddenLayer_numNodes_pairs.append((hiddenLayer_numNodes[i], hiddenLayer_numNodes[j])) # len = n(n+1) / 2


combinations = list(product(__optimizer, bias_inits, weight_inits, num_epochs, hiddenLayer_numNodes_pairs))
# Order of combination - optimizer, bias init, weight init, num epochs, hiddenLayer numNodes pairs

for i in combinations:
    my_nn_model = keras.Sequential()
    my_nn_model.add(keras.layers.Flatten(input_shape=(784, 1)))
    my_nn_model.add(keras.layers.Dense(i[4][0], activation='sigmoid', kernel_initializer=i[2], bias_initializer=i[1]))
    my_nn_model.add(keras.layers.Dense(i[4][1], activation='sigmoid', kernel_initializer=i[2], bias_initializer=i[1]))
    my_nn_model.add(keras.layers.Dense(10, activation='softmax'))

    my_nn_model.summary()


    my_nn_model.compile(loss="sparse_categorical_crossentropy",
                optimizer=i[0],
                metrics=["accuracy"])
    # sparse_categorical_crossentropy is used as the loss function since the classified outputs of the mode (0-9) are mutually exclusive

    print("Model training params:")
    print("\tNumEpochs: ", i[3])
    print("\tHidden Layer Nodes", i[4])
    print("\tBias Init:", i[1])
    print("\tKernel Init:", i[2])
    print("Optimizer ")
    print("\tOptimizer used:", i[0], ". Learning rate used:", i[0].learning_rate)

    results = my_nn_model.fit(np.array(train_inputs), np.array(train_labels), epochs=i[3], verbose=False)

    test_loss, test_acc = my_nn_model.evaluate(np.array(test_inputs), np.array(test_labels))
    print("Test Loss:", test_loss)
    print("Test Accuracy:", test_acc)

Model: "sequential_62"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_62 (Flatten)        (None, 784)               0         
                                                                 
 dense_186 (Dense)           (None, 512)               401920    
                                                                 
 dense_187 (Dense)           (None, 512)               262656    
                                                                 
 dense_188 (Dense)           (None, 10)                5130      
                                                                 
Total params: 669706 (2.55 MB)
Trainable params: 669706 (2.55 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Model training params:
	NumEpochs:  10
	Hidden Layer Nodes (512, 512)
	Bias Init: <keras.src.initializers.initializers.RandomNormal object at 0x18c24

KeyboardInterrupt: 