In [1]:
!pip install idx2numpy

Collecting idx2numpy
  Downloading idx2numpy-1.2.3.tar.gz (6.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: idx2numpy
  Building wheel for idx2numpy (setup.py) ... [?25l[?25hdone
  Created wheel for idx2numpy: filename=idx2numpy-1.2.3-py3-none-any.whl size=7904 sha256=dc5bd6bc6f993ce19c8c259d69beb9396b2acc7af5dfe87fc891201f448c08b4
  Stored in directory: /root/.cache/pip/wheels/e0/f4/e7/643fc5f932ec2ff92997f43f007660feb23f948aa8486f1107
Successfully built idx2numpy
Installing collected packages: idx2numpy
Successfully installed idx2numpy-1.2.3


In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import idx2numpy
from tensorflow.keras.optimizers import Adam

In [3]:
def load_mnist():
  # Load MNIST dataset from idx3-ubyte files using idx2numpy converter
    train_images = idx2numpy.convert_from_file('train-images.idx3-ubyte')
    train_labels = idx2numpy.convert_from_file('train-labels.idx1-ubyte')
    test_images = idx2numpy.convert_from_file('t10k-images.idx3-ubyte')
    test_labels = idx2numpy.convert_from_file('t10k-labels.idx1-ubyte')

    # Normalize pixel values to range [0,1] by dividing by 255
    train_images = train_images.astype('float32') / 255
    test_images = test_images.astype('float32') / 255

    return (train_images, train_labels), (test_images, test_labels)

# Load and prepare MNIST dataset for training
(train_images, train_labels), (test_images, test_labels) = load_mnist()

In [7]:
# Clear any existing session
tf.keras.backend.clear_session()

def create_ffnn():
  # Create sequential model with architecture with fastest run time
    model = models.Sequential([
        layers.Flatten(input_shape=(28, 28)),
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    return model

# Lists to store results
test_accuracies = []
test_losses = []

# Run n times (currently set to 1)
for run in range(1):
    print(f"\nRun {run + 1}")
    print("-" * 20)

    # Create model
    ffnn_model = create_ffnn()

    # Configure model with Adam optimizer
    # Learning rate 0.0005 showed best results in Table 1
    optimizer = Adam(learning_rate=0.0005)
    ffnn_model.compile(optimizer=optimizer,
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

    # Train model for 10 epochs with 10% validation split
    ffnn_history = ffnn_model.fit(train_images, train_labels,
                                 epochs=10,
                                 validation_split=0.1,
                                 batch_size=32,
                                 verbose=1)

    # Evaluate the model
    test_loss, test_acc = ffnn_model.evaluate(test_images, test_labels, verbose=0)
    test_accuracies.append(test_acc)
    test_losses.append(test_loss)

    print(f"Run {run + 1} Test accuracy: {test_acc:.4f}")
    print(f"Run {run + 1} Test loss: {test_loss:.4f}")

# Calculate and print statistics
print("\nFinal Results:")
print("-" * 20)
print(f"Average Test Accuracy: {np.mean(test_accuracies):.4f}")
print(f"Standard Deviation of Test Accuracy: {np.std(test_accuracies):.4f}")
print(f"Average Test Loss: {np.mean(test_losses):.4f}")
print(f"Individual Run Accuracies: {[f'{acc:.4f}' for acc in test_accuracies]}")


Run 1
--------------------
Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - accuracy: 0.8855 - loss: 0.3867 - val_accuracy: 0.9670 - val_loss: 0.1102
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 9ms/step - accuracy: 0.9741 - loss: 0.0862 - val_accuracy: 0.9753 - val_loss: 0.0778
Epoch 3/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9836 - loss: 0.0512 - val_accuracy: 0.9783 - val_loss: 0.0728
Epoch 4/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 9ms/step - accuracy: 0.9888 - loss: 0.0333 - val_accuracy: 0.9797 - val_loss: 0.0740
Epoch 5/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 10ms/step - accuracy: 0.9918 - loss: 0.0252 - val_accuracy: 0.9800 - val_loss: 0.0727
Epoch 6/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 10ms/step - accuracy: 0.9943 - loss: 0.0174 - val_accuracy: 0.981

---
| Feed Forward Neural Network  |Nodes|Activation Function|
|----------|----------|----------|
| Hidden Layer 1 | 128 | Relu |
| Hidden Layer 2 | 64 | Relu |
| Output Layer  | 10 | Softmax |

Data from Run 1
---
|Epoch|Training Accuracy|Training Loss|Validation Accuracy|Validation Loss
|-----|----|---|-|-|
| 1 |0.8681|0.4547|0.9600 |0.1332
|2|0.9654|0.1184 |0.9760 |0.0836
|3|0.9781|0.0697|0.9770 |0.0739
|4|0.9842|0.0517 |0.9763 |0.0871
|5|0.9874|0.0392|0.9785 |0.0788
|6|0.9898|0.0322 |0.9813 |0.0779
|7|0.9913|0.0257|0.9788 |0.0877
|8|0.9931|0.0204 |0.9808 |0.0822
|9|0.9944|0.0162|0.9803 |0.0932
|10|0.9953|0.0149|0.9813 |0.0848

---
|Run| Test Accuracy| Loss
|-|-|-|
|1|0.9771|0.0876|
|2|0.9725|0.1199|
|3|0.9760|0.0985|
|4|0.9776|0.0929|
|5|0.9792|0.0889|
|Average|0.9765|0.0976|


---
|Run (n=5)|Hidden Layers Neuron Ratio| Test Accuracy  | Loss|Learning Rate|Time (Per Run)
|-|-|-|-|-|-|
|Average|512:256|0.9795|0.0893|0.0005|3:19|
|Average|256:128|0.9778|0.0971|0.0005|1:55|
|Average|128:64|0.9774|0.0849|0.0005|1:31|
|Average|512:256|0.9786|0.1045|0.001|3:03|
|Average|256:128|0.9766|0.1050|0.001|1:47|
|Average|128:64|0.9740|0.1071|0.001|1:28|

In [8]:
# Reshape input data for CNN
train_images_reshaped = train_images.reshape((60000, 28, 28, 1))
test_images_reshaped = test_images.reshape((10000, 28, 28, 1))

def create_cnn():
    model = models.Sequential([
        # Using 32->64 feature maps with (3,3) filter size (for shortest run time)
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    return model

# Lists to store results
test_accuracies = []
test_losses = []

# Run experiment n times (currently set to 1)
for run in range(1):
    print(f"\nRun {run + 1}")
    print("-" * 20)

    # Clear session
    tf.keras.backend.clear_session()

    # Create and compile model
    cnn_model = create_cnn()
    # Learning rate from best performing model
    optimizer = Adam(learning_rate=0.0005)
    cnn_model.compile(optimizer=optimizer,
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'])

    # Train model for 10 epochs with 10% validation split
    cnn_history = cnn_model.fit(train_images_reshaped, train_labels,
                               epochs=10,
                               validation_split=0.1,
                               batch_size=32,
                               verbose=1)

    # Evaluate the model
    test_loss, test_acc = cnn_model.evaluate(test_images_reshaped, test_labels, verbose=0)
    test_accuracies.append(test_acc)
    test_losses.append(test_loss)

    print(f"Run {run + 1} Test accuracy: {test_acc:.4f}")
    print(f"Run {run + 1} Test loss: {test_loss:.4f}")

# Calculate and print statistics
print("\nFinal Results:")
print("-" * 20)
print(f"Average Test Accuracy: {np.mean(test_accuracies):.4f}")
print(f"Standard Deviation of Test Accuracy: {np.std(test_accuracies):.4f}")
print(f"Average Test Loss: {np.mean(test_losses):.4f}")
print(f"Individual Run Accuracies: {[f'{acc:.4f}' for acc in test_accuracies]}")


Run 1
--------------------


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 30ms/step - accuracy: 0.8623 - loss: 0.4643 - val_accuracy: 0.9780 - val_loss: 0.0666
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 30ms/step - accuracy: 0.9779 - loss: 0.0717 - val_accuracy: 0.9855 - val_loss: 0.0503
Epoch 3/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 29ms/step - accuracy: 0.9853 - loss: 0.0457 - val_accuracy: 0.9855 - val_loss: 0.0454
Epoch 4/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 29ms/step - accuracy: 0.9900 - loss: 0.0330 - val_accuracy: 0.9872 - val_loss: 0.0464
Epoch 5/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 29ms/step - accuracy: 0.9907 - loss: 0.0283 - val_accuracy: 0.9880 - val_loss: 0.0394
Epoch 6/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 29ms/step - accuracy: 0.9929 - loss: 0.0221 - val_accuracy: 0.9883 - val_loss: 0.0399
Epoc

---
|Layer Type|Details|Activation Function|
|-----------------|----------|----------|
| Conv2D    | 32 filters (3x3) | Relu |
| MaxPooling2D    | 2x2 window |  |
| Conv2D  | 64 filters (3x3) | Relu |
| MaxPooling2D | 2x2 window |  |
| Flatten  | Converts 2D into 1D |  |
| Dense | 64 Nodes| Relu |
| Dense (Output)      | 10 Nodes| Softmax |


---
Data from Run 1
---
|Epoch|Training Accuracy|Training Loss|Validation Accuracy|Validation Loss
|-----|----|---|-|-|
|1|0.8887|0.3525|0.9860 |0.0492
|2|0.9826|0.0566 |0.9865 |0.0465
|3|0.9890|0.0369|0.9893 |0.0387
|4|0.9913|0.0259 |0.9860 |0.0455
|5|0.9940|0.0183|0.9875 |0.0400
|6|0.9958|0.0130 |0.9888 |0.0411
|7|0.9966|0.0105|0.9900 |0.0374
|8|0.9973|0.0090 |0.9905 |0.0366
|9|0.9985|0.0050|0.9913 |0.0352
|10|0.9979|0.0060|0.9892 |0.0485



