# Supervised Learning Project

## Import Modules

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam, SGD, Adagrad, AdamW

from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import time

import warnings
# warnings.simplefilter(action='ignore')

## Loading MNIST Data

In [3]:
# Load data from keras (already split)
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
# Finding the shape of individual sample
x_train[0].shape

(28, 28)

## Preprocessing Data

### Normalize to range [0, 1]

In [5]:
x_train = x_train / 255.0
x_test = x_test / 255.0

### Reshape for CNN


In [6]:
x_train_cnn = x_train.reshape(len(x_train), 28, 28, 1)
x_test_cnn = x_test.reshape(len(x_test), 28, 28, 1)

print(x_train.shape, x_test.shape, x_train_cnn.shape, x_test_cnn.shape)

(60000, 28, 28) (10000, 28, 28) (60000, 28, 28, 1) (10000, 28, 28, 1)


### One-hot encode labels for ANN/CNN


In [7]:
y_train_cat = to_categorical(y_train, 10)
y_test_cat = to_categorical(y_test, 10)

print(y_train_cat.shape, y_test_cat.shape)

(60000, 10) (10000, 10)


# Log Model Info

In [8]:
def log_model_info(model, history, x_test, y_test_cat):
    import datetime

    start_time = time.time()
    test_loss, test_acc = model.evaluate(x_test, y_test_cat, verbose=0,batch_size=128)
    end_time = time.time()

    print("\n=== Model Summary ===")
    model.summary()
    print("\nFinal Accuracy:", round(test_acc * 100, 2), "%")
    print("Accuracy in first 5 epochs:", history.history['accuracy'][:5])
    print("Number of parameters:", model.count_params())
    print("Average Train Time per Epoch: {:.2f}s".format(
        (history.history['val_loss'][-1] - history.history['val_loss'][0]) / len(history.history['loss'])
    ))
    print("Average Test Time:", round(end_time - start_time, 2), "s")
    print("Layers and Activations:")
    for layer in model.layers:
        print(f"{layer.name} - {layer.__class__.__name__} - Activation: {getattr(layer, 'activation', 'N/A')}")
    print("Optimizer Config:", model.optimizer.get_config())
    print("Timestamp:", datetime.datetime.now())


## Simple Models

### SVM

#### Convert from 2D to 1D vector for SVM

In [None]:
x_train_flat = x_train.reshape(-1, 28*28)
x_test_flat = x_test.reshape(-1, 28*28)

print(x_train_flat.shape, x_test_flat.shape)

(60000, 784) (10000, 784)


In [None]:
svm_model = svm.SVC()
svm_model.fit(x_train_flat[:10000], y_train[:10000])
svm_preds = svm_model.predict(x_test_flat)
print("SVM Accuracy:", accuracy_score(y_test, svm_preds))

SVM Accuracy: 0.9594


### ANN

In [None]:
def build_ann():
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

ann_model = build_ann()
ann_model.fit(x_train, y_train_cat, epochs=5, validation_split=0.2, batch_size=64)


  super().__init__(**kwargs)


Epoch 1/5
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.5512 - loss: 1.5644 - val_accuracy: 0.8832 - val_loss: 0.4538
Epoch 2/5
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8835 - loss: 0.4380 - val_accuracy: 0.9068 - val_loss: 0.3310
Epoch 3/5
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9050 - loss: 0.3392 - val_accuracy: 0.9153 - val_loss: 0.2940
Epoch 4/5
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9145 - loss: 0.3014 - val_accuracy: 0.9252 - val_loss: 0.2634
Epoch 5/5
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9243 - loss: 0.2689 - val_accuracy: 0.9302 - val_loss: 0.2459


<keras.src.callbacks.history.History at 0x7ed6a1646510>

In [None]:
def build_ann():
    model = Sequential([
        Flatten(input_shape=(28, 28)),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

ann_model = build_ann()
ann_model.fit(x_train, y_train_cat, epochs=10, validation_split=0.1)


Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.6687 - loss: 1.1498 - val_accuracy: 0.9223 - val_loss: 0.2836
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9062 - loss: 0.3247 - val_accuracy: 0.9342 - val_loss: 0.2261
Epoch 3/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9244 - loss: 0.2592 - val_accuracy: 0.9468 - val_loss: 0.1926
Epoch 4/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9340 - loss: 0.2298 - val_accuracy: 0.9543 - val_loss: 0.1701
Epoch 5/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9431 - loss: 0.1985 - val_accuracy: 0.9613 - val_loss: 0.1510
Epoch 6/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9480 - loss: 0.1775 - val_accuracy: 0.9612 - val_loss: 0.1452
Epoch 7/10
[1m1

<keras.src.callbacks.history.History at 0x7ed6a26b0c90>

In [None]:
ann_model.summary()

## CNN Model

### Initial 

In [None]:
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
# Epochs = 5
cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=5, batch_size=128, validation_split=0.1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.7657 - loss: 0.7932 - val_accuracy: 0.9732 - val_loss: 0.0985
Epoch 2/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9656 - loss: 0.1107 - val_accuracy: 0.9807 - val_loss: 0.0699
Epoch 3/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9776 - loss: 0.0696 - val_accuracy: 0.9822 - val_loss: 0.0682
Epoch 4/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9825 - loss: 0.0570 - val_accuracy: 0.9732 - val_loss: 0.0862
Epoch 5/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9858 - loss: 0.0454 - val_accuracy: 0.9852 - val_loss: 0.0547


In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.36 %
Accuracy in first 5 epochs: [0.8905370235443115, 0.9678703546524048, 0.9784629344940186, 0.9824444651603699, 0.9857777953147888]
Number of parameters: 1011466
Average Train Time per Epoch: -0.01s
Average Test Time: 0.74 s
Layers and Activations:
conv2d - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d - MaxPooling2D - Activation: N/A
conv2d_1 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_2 - Flatten - Activation: N/A
dense_6 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_7 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:40:53.366957


### Epochs

In [None]:
# Epochs = 10
cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=10, batch_size=128, validation_split=0.1)

Epoch 1/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.7619 - loss: 0.8247 - val_accuracy: 0.9690 - val_loss: 0.1190
Epoch 2/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9647 - loss: 0.1196 - val_accuracy: 0.9797 - val_loss: 0.0772
Epoch 3/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9769 - loss: 0.0742 - val_accuracy: 0.9783 - val_loss: 0.0733
Epoch 4/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9827 - loss: 0.0567 - val_accuracy: 0.9840 - val_loss: 0.0675
Epoch 5/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9860 - loss: 0.0432 - val_accuracy: 0.9842 - val_loss: 0.0606
Epoch 6/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9894 - loss: 0.0346 - val_accuracy: 0.9832 - val_loss: 0.0609
Epoch 7/10
[1m422/422[0m 

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.66 %
Accuracy in first 5 epochs: [0.8836110830307007, 0.9672777652740479, 0.977648138999939, 0.983129620552063, 0.9857592582702637]
Number of parameters: 1011466
Average Train Time per Epoch: -0.01s
Average Test Time: 0.73 s
Layers and Activations:
conv2d_2 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_1 - MaxPooling2D - Activation: N/A
conv2d_3 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_3 - Flatten - Activation: N/A
dense_8 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_9 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:41:18.920357


In [None]:
# Epochs = 15
cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=15, batch_size=128, validation_split=0.1)

Epoch 1/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.7563 - loss: 0.7947 - val_accuracy: 0.9687 - val_loss: 0.1055
Epoch 2/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9661 - loss: 0.1096 - val_accuracy: 0.9798 - val_loss: 0.0730
Epoch 3/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9789 - loss: 0.0697 - val_accuracy: 0.9812 - val_loss: 0.0662
Epoch 4/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9839 - loss: 0.0528 - val_accuracy: 0.9810 - val_loss: 0.0637
Epoch 5/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9864 - loss: 0.0433 - val_accuracy: 0.9865 - val_loss: 0.0509
Epoch 6/15
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9890 - loss: 0.0341 - val_accuracy: 0.9877 - val_loss: 0.0494
Epoch 7/15
[1m422/422[0m 

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.88 %
Accuracy in first 5 epochs: [0.8849444389343262, 0.9693148136138916, 0.9794814586639404, 0.9839259386062622, 0.9869444370269775]
Number of parameters: 1011466
Average Train Time per Epoch: -0.00s
Average Test Time: 0.73 s
Layers and Activations:
conv2d_4 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_2 - MaxPooling2D - Activation: N/A
conv2d_5 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_4 - Flatten - Activation: N/A
dense_10 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_11 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:41:54.659487


In [None]:
# Epochs = 20
cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=20, batch_size=128, validation_split=0.1)

Epoch 1/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.7582 - loss: 0.7911 - val_accuracy: 0.9708 - val_loss: 0.1044
Epoch 2/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9681 - loss: 0.1047 - val_accuracy: 0.9767 - val_loss: 0.0784
Epoch 3/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9784 - loss: 0.0724 - val_accuracy: 0.9843 - val_loss: 0.0558
Epoch 4/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9855 - loss: 0.0481 - val_accuracy: 0.9857 - val_loss: 0.0550
Epoch 5/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9869 - loss: 0.0417 - val_accuracy: 0.9850 - val_loss: 0.0549
Epoch 6/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9908 - loss: 0.0296 - val_accuracy: 0.9858 - val_loss: 0.0572
Epoch 7/20
[1m422/422[0m 

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat) # The highest accuracy is at 18 epochs


=== Model Summary ===



Final Accuracy: 98.96 %
Accuracy in first 5 epochs: [0.886888861656189, 0.9707037210464478, 0.9797777533531189, 0.9845740795135498, 0.9871110916137695]
Number of parameters: 1011466
Average Train Time per Epoch: -0.00s
Average Test Time: 0.73 s
Layers and Activations:
conv2d_6 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_3 - MaxPooling2D - Activation: N/A
conv2d_7 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_5 - Flatten - Activation: N/A
dense_12 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_13 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:42:41.867264


### Batch Size

In [None]:
# 18 Epochs - batch_size = 32
cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8763 - loss: 0.4042 - val_accuracy: 0.9848 - val_loss: 0.0598
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9813 - loss: 0.0557 - val_accuracy: 0.9832 - val_loss: 0.0568
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9891 - loss: 0.0331 - val_accuracy: 0.9870 - val_loss: 0.0443
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9929 - loss: 0.0233 - val_accuracy: 0.9905 - val_loss: 0.0385
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9954 - loss: 0.0154 - val_accuracy: 0.9887 - val_loss: 0.0400
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9970 - loss: 0.0105 - val_accuracy: 0.9898 - val_loss: 0.0377
Epoch 7/18
[1m1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat) # The highest accuracy is at 18 epochs


=== Model Summary ===



Final Accuracy: 99.11 %
Accuracy in first 5 epochs: [0.9440000057220459, 0.9830370545387268, 0.9894074201583862, 0.9926666617393494, 0.9952037334442139]
Number of parameters: 1011466
Average Train Time per Epoch: -0.00s
Average Test Time: 0.51 s
Layers and Activations:
conv2d_8 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_4 - MaxPooling2D - Activation: N/A
conv2d_9 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_6 - Flatten - Activation: N/A
dense_14 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_15 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:44:58.810731


In [None]:
# 18 Epochs - batch_size = 64
cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=64, validation_split=0.1)

Epoch 1/18
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.8140 - loss: 0.5763 - val_accuracy: 0.9777 - val_loss: 0.0745
Epoch 2/18
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9746 - loss: 0.0851 - val_accuracy: 0.9805 - val_loss: 0.0636
Epoch 3/18
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9840 - loss: 0.0530 - val_accuracy: 0.9852 - val_loss: 0.0513
Epoch 4/18
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9880 - loss: 0.0386 - val_accuracy: 0.9875 - val_loss: 0.0501
Epoch 5/18
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9908 - loss: 0.0291 - val_accuracy: 0.9882 - val_loss: 0.0458
Epoch 6/18
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9935 - loss: 0.0210 - val_accuracy: 0.9878 - val_loss: 0.0465
Epoch 7/18
[1m844/844[0m 

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat) # The highest accuracy is at 18 epochs


=== Model Summary ===



Final Accuracy: 99.09 %
Accuracy in first 5 epochs: [0.9184073805809021, 0.9765926003456116, 0.9837037324905396, 0.9877963066101074, 0.9902777671813965]
Number of parameters: 1011466
Average Train Time per Epoch: -0.00s
Average Test Time: 0.72 s
Layers and Activations:
conv2d_10 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_5 - MaxPooling2D - Activation: N/A
conv2d_11 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_7 - Flatten - Activation: N/A
dense_16 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_17 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:46:22.245184


In [None]:
# 18 Epochs - batch_size = 96
cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=96, validation_split=0.1)

Epoch 1/18
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.7997 - loss: 0.6872 - val_accuracy: 0.9768 - val_loss: 0.0817
Epoch 2/18
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9714 - loss: 0.0943 - val_accuracy: 0.9822 - val_loss: 0.0647
Epoch 3/18
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9812 - loss: 0.0597 - val_accuracy: 0.9850 - val_loss: 0.0571
Epoch 4/18
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9856 - loss: 0.0448 - val_accuracy: 0.9820 - val_loss: 0.0615
Epoch 5/18
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9898 - loss: 0.0334 - val_accuracy: 0.9862 - val_loss: 0.0506
Epoch 6/18
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.9918 - loss: 0.0256 - val_accuracy: 0.9863 - val_loss: 0.0517
Epoch 7/18
[1m563/563[0m 

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat) # The highest accuracy is at 18 epochs


=== Model Summary ===



Final Accuracy: 99.07 %
Accuracy in first 5 epochs: [0.9068333506584167, 0.9736481308937073, 0.9815185070037842, 0.9857407212257385, 0.9890184998512268]
Number of parameters: 1011466
Average Train Time per Epoch: -0.00s
Average Test Time: 0.68 s
Layers and Activations:
conv2d_12 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_6 - MaxPooling2D - Activation: N/A
conv2d_13 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_8 - Flatten - Activation: N/A
dense_18 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_19 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:47:17.438933


In [None]:
# 18 Epochs - batch_size = 128
cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=128, validation_split=0.1)

Epoch 1/18
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.7784 - loss: 0.7497 - val_accuracy: 0.9670 - val_loss: 0.1078
Epoch 2/18
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9654 - loss: 0.1122 - val_accuracy: 0.9782 - val_loss: 0.0754
Epoch 3/18
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9794 - loss: 0.0672 - val_accuracy: 0.9827 - val_loss: 0.0639
Epoch 4/18
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9833 - loss: 0.0544 - val_accuracy: 0.9847 - val_loss: 0.0568
Epoch 5/18
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9865 - loss: 0.0438 - val_accuracy: 0.9813 - val_loss: 0.0677
Epoch 6/18
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9890 - loss: 0.0353 - val_accuracy: 0.9858 - val_loss: 0.0551
Epoch 7/18
[1m422/422[0m 

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat) # The highest accuracy is at 18 epochs - batch_size = 32


=== Model Summary ===



Final Accuracy: 98.86 %
Accuracy in first 5 epochs: [0.89340740442276, 0.9690185189247131, 0.97901850938797, 0.9829074144363403, 0.9859259128570557]
Number of parameters: 1011466
Average Train Time per Epoch: -0.00s
Average Test Time: 0.51 s
Layers and Activations:
conv2d_14 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_7 - MaxPooling2D - Activation: N/A
conv2d_15 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_9 - Flatten - Activation: N/A
dense_20 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_21 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:47:57.997035


### Learning Rate

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.1
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.1, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8708 - loss: 0.4337 - val_accuracy: 0.9287 - val_loss: 0.2394
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.9315 - loss: 0.2712 - val_accuracy: 0.9422 - val_loss: 0.2229
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9283 - loss: 0.3136 - val_accuracy: 0.1113 - val_loss: 2.3252
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.1034 - loss: 2.3087 - val_accuracy: 0.1050 - val_loss: 2.3140
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.1060 - loss: 2.3093 - val_accuracy: 0.1113 - val_loss: 2.3085
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.1073 - loss: 2.3084 - val_accuracy: 0.1050 - val_loss: 2.3149
Epoch 7/18
[

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 11.35 %
Accuracy in first 5 epochs: [0.9205926060676575, 0.9258703589439392, 0.9109073877334595, 0.1032777801156044, 0.10599999874830246]
Number of parameters: 1011466
Average Train Time per Epoch: 0.12s
Average Test Time: 0.73 s
Layers and Activations:
conv2d_16 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_8 - MaxPooling2D - Activation: N/A
conv2d_17 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_10 - Flatten - Activation: N/A
dense_22 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_23 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.10000000149011612, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:50:32.548323


In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8645 - loss: 0.4246 - val_accuracy: 0.9828 - val_loss: 0.0616
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9799 - loss: 0.0650 - val_accuracy: 0.9822 - val_loss: 0.0626
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9871 - loss: 0.0387 - val_accuracy: 0.9890 - val_loss: 0.0431
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9927 - loss: 0.0235 - val_accuracy: 0.9870 - val_loss: 0.0461
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9945 - loss: 0.0170 - val_accuracy: 0.9902 - val_loss: 0.0389
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9958 - loss: 0.0136 - val_accuracy: 0.9900 - val_loss: 0.0414
Epoch 7/18
[1m

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.2 %
Accuracy in first 5 epochs: [0.9400185346603394, 0.9817407131195068, 0.9875555634498596, 0.9921851754188538, 0.9942222237586975]
Number of parameters: 1011466
Average Train Time per Epoch: -0.00s
Average Test Time: 0.74 s
Layers and Activations:
conv2d_18 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_9 - MaxPooling2D - Activation: N/A
conv2d_19 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_11 - Flatten - Activation: N/A
dense_24 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_25 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:52:43.908371


In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.05
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.05, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8973 - loss: 0.3324 - val_accuracy: 0.9818 - val_loss: 0.0629
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.9827 - loss: 0.0567 - val_accuracy: 0.9875 - val_loss: 0.0508
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9879 - loss: 0.0381 - val_accuracy: 0.9858 - val_loss: 0.0595
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.9923 - loss: 0.0246 - val_accuracy: 0.9873 - val_loss: 0.0617
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9931 - loss: 0.0197 - val_accuracy: 0.9863 - val_loss: 0.0677
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9931 - loss: 0.0231 - val_accuracy: 0.9885 - val_loss: 0.0586
Epoch 7/18
[1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.03 %
Accuracy in first 5 epochs: [0.952833354473114, 0.9837222099304199, 0.9880740642547607, 0.9912407398223877, 0.9912962913513184]
Number of parameters: 1011466
Average Train Time per Epoch: 0.00s
Average Test Time: 0.76 s
Layers and Activations:
conv2d_20 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_10 - MaxPooling2D - Activation: N/A
conv2d_21 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_12 - Flatten - Activation: N/A
dense_26 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_27 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.05000000074505806, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:55:09.080332


In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.001
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.001, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.7518 - loss: 0.8463 - val_accuracy: 0.9540 - val_loss: 0.1622
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9430 - loss: 0.1894 - val_accuracy: 0.9667 - val_loss: 0.1168
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9628 - loss: 0.1255 - val_accuracy: 0.9743 - val_loss: 0.0913
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9716 - loss: 0.0946 - val_accuracy: 0.9790 - val_loss: 0.0834
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9786 - loss: 0.0701 - val_accuracy: 0.9793 - val_loss: 0.0805
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9813 - loss: 0.0624 - val_accuracy: 0.9835 - val_loss: 0.0646
Epoch 7/18
[1m

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.68 %
Accuracy in first 5 epochs: [0.8675000071525574, 0.9490185379981995, 0.9643518328666687, 0.9725925922393799, 0.9780185222625732]
Number of parameters: 1011466
Average Train Time per Epoch: -0.01s
Average Test Time: 0.51 s
Layers and Activations:
conv2d_22 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_11 - MaxPooling2D - Activation: N/A
conv2d_23 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_13 - Flatten - Activation: N/A
dense_28 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_29 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.0010000000474974513, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:57:27.463148


In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.0001
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.0001, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.4376 - loss: 1.9846 - val_accuracy: 0.8917 - val_loss: 0.4497
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8706 - loss: 0.4664 - val_accuracy: 0.9218 - val_loss: 0.2848
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8978 - loss: 0.3444 - val_accuracy: 0.9280 - val_loss: 0.2481
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9106 - loss: 0.3072 - val_accuracy: 0.9388 - val_loss: 0.2210
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9194 - loss: 0.2750 - val_accuracy: 0.9437 - val_loss: 0.1982
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9281 - loss: 0.2475 - val_accuracy: 0.9493 - val_loss: 0.1870
Epoch 7/18
[1m

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat) # The highest accuracy is at 18 epochs - batch_size = 32 - Learning_rate = 0.01


=== Model Summary ===



Final Accuracy: 96.81 %
Accuracy in first 5 epochs: [0.648277759552002, 0.8813703656196594, 0.9024074077606201, 0.9129074215888977, 0.9214629530906677]
Number of parameters: 1011466
Average Train Time per Epoch: -0.02s
Average Test Time: 0.51 s
Layers and Activations:
conv2d_24 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_12 - MaxPooling2D - Activation: N/A
conv2d_25 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_14 - Flatten - Activation: N/A
dense_30 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_31 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 9.999999747378752e-05, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 04:59:32.480676


### Convolution Layers

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 1 Conv Layer
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8601 - loss: 0.4654 - val_accuracy: 0.9728 - val_loss: 0.0935
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9726 - loss: 0.0920 - val_accuracy: 0.9842 - val_loss: 0.0576
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9849 - loss: 0.0495 - val_accuracy: 0.9845 - val_loss: 0.0570
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9894 - loss: 0.0353 - val_accuracy: 0.9855 - val_loss: 0.0533
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9915 - loss: 0.0259 - val_accuracy: 0.9843 - val_loss: 0.0587
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9947 - loss: 0.0177 - val_accuracy: 0.9882 - val_loss: 0.0512
Epoch 7/18
[1m1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.86 %
Accuracy in first 5 epochs: [0.9242963194847107, 0.9751666784286499, 0.9840185046195984, 0.9885370135307312, 0.9912592768669128]
Number of parameters: 693962
Average Train Time per Epoch: -0.00s
Average Test Time: 0.62 s
Layers and Activations:
conv2d_26 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_13 - MaxPooling2D - Activation: N/A
flatten_15 - Flatten - Activation: N/A
dense_32 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_33 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:01:05.563352


In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8724 - loss: 0.4105 - val_accuracy: 0.9807 - val_loss: 0.0672
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9824 - loss: 0.0583 - val_accuracy: 0.9862 - val_loss: 0.0528
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9890 - loss: 0.0331 - val_accuracy: 0.9880 - val_loss: 0.0454
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9923 - loss: 0.0235 - val_accuracy: 0.9893 - val_loss: 0.0387
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9945 - loss: 0.0171 - val_accuracy: 0.9910 - val_loss: 0.0385
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9964 - loss: 0.0120 - val_accuracy: 0.9898 - val_loss: 0.0496
Epoch 7/18
[1m

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.24 %
Accuracy in first 5 epochs: [0.9423333406448364, 0.9832592606544495, 0.9886666536331177, 0.9918703436851501, 0.9944259524345398]
Number of parameters: 1011466
Average Train Time per Epoch: -0.00s
Average Test Time: 0.54 s
Layers and Activations:
conv2d_27 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_14 - MaxPooling2D - Activation: N/A
conv2d_28 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_16 - Flatten - Activation: N/A
dense_34 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_35 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:03:13.203989


In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 3 Conv Layer
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.8641 - loss: 0.4122 - val_accuracy: 0.9795 - val_loss: 0.0724
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9832 - loss: 0.0551 - val_accuracy: 0.9865 - val_loss: 0.0460
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9900 - loss: 0.0313 - val_accuracy: 0.9885 - val_loss: 0.0393
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9927 - loss: 0.0218 - val_accuracy: 0.9897 - val_loss: 0.0412
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9947 - loss: 0.0166 - val_accuracy: 0.9875 - val_loss: 0.0482
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9957 - loss: 0.0128 - val_accuracy: 0.9902 - val_loss: 0.0445
Epoch 7/18
[1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.15 %
Accuracy in first 5 epochs: [0.9430925846099854, 0.9836481213569641, 0.9898333549499512, 0.9923703670501709, 0.9945926070213318]
Number of parameters: 720714
Average Train Time per Epoch: -0.00s
Average Test Time: 1.42 s
Layers and Activations:
conv2d_29 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_15 - MaxPooling2D - Activation: N/A
conv2d_30 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
conv2d_31 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_17 - Flatten - Activation: N/A
dense_36 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_37 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8728 - loss: 0.4112 - val_accuracy: 0.9803 - val_loss: 0.0659
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9804 - loss: 0.0608 - val_accuracy: 0.9863 - val_loss: 0.0483
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9869 - loss: 0.0409 - val_accuracy: 0.9867 - val_loss: 0.0454
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9912 - loss: 0.0265 - val_accuracy: 0.9868 - val_loss: 0.0466
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9941 - loss: 0.0185 - val_accuracy: 0.9895 - val_loss: 0.0427
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9957 - loss: 0.0136 - val_accuracy: 0.9895 - val_loss: 0.0368
Epoch 7/18
[1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat) ### Best Accuracy (18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense)


=== Model Summary ===



Final Accuracy: 99.08 %
Accuracy in first 5 epochs: [0.9403889179229736, 0.9818888902664185, 0.9875741004943848, 0.9913703799247742, 0.9938518404960632]
Number of parameters: 515146
Average Train Time per Epoch: -0.00s
Average Test Time: 0.73 s
Layers and Activations:
conv2d_32 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_16 - MaxPooling2D - Activation: N/A
conv2d_33 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_18 - Flatten - Activation: N/A
dense_38 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_39 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:08:00.545185


In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Fewer Filters
def build_cnn():
    model = Sequential([
        Conv2D(16, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(32, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8689 - loss: 0.4154 - val_accuracy: 0.9753 - val_loss: 0.0907
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9808 - loss: 0.0643 - val_accuracy: 0.9850 - val_loss: 0.0525
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9874 - loss: 0.0417 - val_accuracy: 0.9888 - val_loss: 0.0470
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9923 - loss: 0.0266 - val_accuracy: 0.9852 - val_loss: 0.0504
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9934 - loss: 0.0217 - val_accuracy: 0.9870 - val_loss: 0.0490
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9950 - loss: 0.0156 - val_accuracy: 0.9887 - val_loss: 0.0484
Epoch 7/18
[1m1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.01 %
Accuracy in first 5 epochs: [0.9382777810096741, 0.9813148379325867, 0.9874814748764038, 0.9912592768669128, 0.9932777881622314]
Number of parameters: 501834
Average Train Time per Epoch: -0.00s
Average Test Time: 0.73 s
Layers and Activations:
conv2d_34 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_17 - MaxPooling2D - Activation: N/A
conv2d_35 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_19 - Flatten - Activation: N/A
dense_40 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_41 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:09:47.002777


### Activation Functions

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - relu
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8718 - loss: 0.4129 - val_accuracy: 0.9800 - val_loss: 0.0667
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9804 - loss: 0.0650 - val_accuracy: 0.9828 - val_loss: 0.0547
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9887 - loss: 0.0390 - val_accuracy: 0.9882 - val_loss: 0.0506
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.9915 - loss: 0.0260 - val_accuracy: 0.9882 - val_loss: 0.0440
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9934 - loss: 0.0194 - val_accuracy: 0.9885 - val_loss: 0.0473
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9952 - loss: 0.0148 - val_accuracy: 0.9900 - val_loss: 0.0475
Epoch 7/18
[1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)  ## Best Accuracy (# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - relu)


=== Model Summary ===



Final Accuracy: 98.95 %
Accuracy in first 5 epochs: [0.9393518567085266, 0.9810000061988831, 0.9876111149787903, 0.9907962679862976, 0.9928518533706665]
Number of parameters: 515146
Average Train Time per Epoch: -0.00s
Average Test Time: 0.77 s
Layers and Activations:
conv2d_36 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_18 - MaxPooling2D - Activation: N/A
conv2d_37 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_20 - Flatten - Activation: N/A
dense_42 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_43 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:12:14.737167


In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - sigmoid
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='sigmoid', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='sigmoid'),
        Flatten(),
        Dense(64, activation='sigmoid'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.1032 - loss: 2.3218 - val_accuracy: 0.0978 - val_loss: 2.3111
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.1060 - loss: 2.3121 - val_accuracy: 0.0978 - val_loss: 2.3050
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.2277 - loss: 2.0736 - val_accuracy: 0.9108 - val_loss: 0.3222
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8985 - loss: 0.3424 - val_accuracy: 0.9495 - val_loss: 0.1838
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9335 - loss: 0.2240 - val_accuracy: 0.9647 - val_loss: 0.1331
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9511 - loss: 0.1630 - val_accuracy: 0.9660 - val_loss: 0.1176
Epoch 7/18
[1m1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.23 %
Accuracy in first 5 epochs: [0.10572221875190735, 0.10577777773141861, 0.48172223567962646, 0.9103518724441528, 0.9388333559036255]
Number of parameters: 515146
Average Train Time per Epoch: -0.13s
Average Test Time: 0.73 s
Layers and Activations:
conv2d_38 - Conv2D - Activation: <function sigmoid at 0x7ed6aeee36a0>
max_pooling2d_19 - MaxPooling2D - Activation: N/A
conv2d_39 - Conv2D - Activation: <function sigmoid at 0x7ed6aeee36a0>
flatten_21 - Flatten - Activation: N/A
dense_44 - Dense - Activation: <function sigmoid at 0x7ed6aeee36a0>
dense_45 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:14:30.4

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - tanh
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='tanh', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='tanh'),
        Flatten(),
        Dense(64, activation='tanh'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8768 - loss: 0.4117 - val_accuracy: 0.9692 - val_loss: 0.1056
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9689 - loss: 0.1051 - val_accuracy: 0.9792 - val_loss: 0.0738
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9827 - loss: 0.0617 - val_accuracy: 0.9828 - val_loss: 0.0598
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9873 - loss: 0.0457 - val_accuracy: 0.9878 - val_loss: 0.0490
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9911 - loss: 0.0335 - val_accuracy: 0.9885 - val_loss: 0.0484
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9934 - loss: 0.0238 - val_accuracy: 0.9865 - val_loss: 0.0482
Epoch 7/18
[1m

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.76 %
Accuracy in first 5 epochs: [0.930055558681488, 0.9710184931755066, 0.9819814562797546, 0.9871296286582947, 0.9907407164573669]
Number of parameters: 515146
Average Train Time per Epoch: -0.00s
Average Test Time: 0.65 s
Layers and Activations:
conv2d_40 - Conv2D - Activation: <function tanh at 0x7ed6aeee3380>
max_pooling2d_20 - MaxPooling2D - Activation: N/A
conv2d_41 - Conv2D - Activation: <function tanh at 0x7ed6aeee3380>
flatten_22 - Flatten - Activation: N/A
dense_46 - Dense - Activation: <function tanh at 0x7ed6aeee3380>
dense_47 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:17:05.455550


In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - softplus
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='softplus', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='softplus'),
        Flatten(),
        Dense(64, activation='softplus'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.1810 - loss: 2.1651 - val_accuracy: 0.9302 - val_loss: 0.2386
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9316 - loss: 0.2253 - val_accuracy: 0.9685 - val_loss: 0.1064
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9633 - loss: 0.1178 - val_accuracy: 0.9742 - val_loss: 0.0883
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9744 - loss: 0.0833 - val_accuracy: 0.9837 - val_loss: 0.0660
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9798 - loss: 0.0649 - val_accuracy: 0.9840 - val_loss: 0.0622
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9832 - loss: 0.0521 - val_accuracy: 0.9833 - val_loss: 0.0661
Epoch 7/18
[1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.41 %
Accuracy in first 5 epochs: [0.41729629039764404, 0.9427592754364014, 0.9665370583534241, 0.975518524646759, 0.9794074296951294]
Number of parameters: 515146
Average Train Time per Epoch: -0.01s
Average Test Time: 0.77 s
Layers and Activations:
conv2d_42 - Conv2D - Activation: <function softplus at 0x7ed6aeee2de0>
max_pooling2d_21 - MaxPooling2D - Activation: N/A
conv2d_43 - Conv2D - Activation: <function softplus at 0x7ed6aeee2de0>
flatten_23 - Flatten - Activation: N/A
dense_48 - Dense - Activation: <function softplus at 0x7ed6aeee2de0>
dense_49 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:19:23.9

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - relu
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.8694 - loss: 0.4130 - val_accuracy: 0.9800 - val_loss: 0.0741
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.9797 - loss: 0.0671 - val_accuracy: 0.9877 - val_loss: 0.0512
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9883 - loss: 0.0380 - val_accuracy: 0.9902 - val_loss: 0.0413
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.9920 - loss: 0.0268 - val_accuracy: 0.9882 - val_loss: 0.0439
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9945 - loss: 0.0173 - val_accuracy: 0.9890 - val_loss: 0.0407
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9950 - loss: 0.0142 - val_accuracy: 0.9900 - val_loss: 0.0422
Epoch 7/18
[

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.05 %
Accuracy in first 5 epochs: [0.9395925998687744, 0.9820370078086853, 0.9879074096679688, 0.991611123085022, 0.9935370087623596]
Number of parameters: 515146
Average Train Time per Epoch: -0.00s
Average Test Time: 0.73 s
Layers and Activations:
conv2d_44 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_22 - MaxPooling2D - Activation: N/A
conv2d_45 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_24 - Flatten - Activation: N/A
dense_50 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_51 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:22:00.730970


### Fully Connected Layers

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer - 4 Fully connected Layers 128 - 96 - 64 - 32  - relu
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(96, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4ms/step - accuracy: 0.2919 - loss: 2.5821 - val_accuracy: 0.6013 - val_loss: 1.0618
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.6625 - loss: 0.9753 - val_accuracy: 0.8730 - val_loss: 0.4197
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.8689 - loss: 0.4267 - val_accuracy: 0.9170 - val_loss: 0.2809
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9066 - loss: 0.3090 - val_accuracy: 0.9185 - val_loss: 0.2746
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9008 - loss: 0.3148 - val_accuracy: 0.9263 - val_loss: 0.2522
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9215 - loss: 0.2505 - val_accuracy: 0.9310 - val_loss: 0.2252
Epoch 7/18


In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 95.29 %
Accuracy in first 5 epochs: [0.4050000011920929, 0.7361851930618286, 0.8822036981582642, 0.9048518538475037, 0.9063147902488708]
Number of parameters: 1031178
Average Train Time per Epoch: -0.05s
Average Test Time: 0.91 s
Layers and Activations:
conv2d_10 - Conv2D - Activation: <function relu at 0x7fc1dd536020>
max_pooling2d_5 - MaxPooling2D - Activation: N/A
conv2d_11 - Conv2D - Activation: <function relu at 0x7fc1dd536020>
flatten_5 - Flatten - Activation: N/A
dense_25 - Dense - Activation: <function relu at 0x7fc1dd536020>
dense_26 - Dense - Activation: <function relu at 0x7fc1dd536020>
dense_27 - Dense - Activation: <function relu at 0x7fc1dd536020>
dense_28 - Dense - Activation: <function relu at 0x7fc1dd536020>
dense_29 - Dense - Activation: <function softmax at 0x7fc1dcb6b6a0>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer - 3 Fully connected Layers 64 , 32 , 16 - relu
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.7906 - loss: 0.6139 - val_accuracy: 0.9787 - val_loss: 0.0691
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9790 - loss: 0.0677 - val_accuracy: 0.9875 - val_loss: 0.0461
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9875 - loss: 0.0424 - val_accuracy: 0.9877 - val_loss: 0.0504
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9909 - loss: 0.0283 - val_accuracy: 0.9880 - val_loss: 0.0455
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9929 - loss: 0.0213 - val_accuracy: 0.9892 - val_loss: 0.0484
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9951 - loss: 0.0156 - val_accuracy: 0.9885 - val_loss: 0.0464
Epoch 7/18


In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.87 %
Accuracy in first 5 epochs: [0.9138148427009583, 0.9797777533531189, 0.9872962832450867, 0.990574061870575, 0.9927963018417358]
Number of parameters: 517274
Average Train Time per Epoch: -0.00s
Average Test Time: 0.65 s
Layers and Activations:
conv2d_16 - Conv2D - Activation: <function relu at 0x7fc1dd536020>
max_pooling2d_8 - MaxPooling2D - Activation: N/A
conv2d_17 - Conv2D - Activation: <function relu at 0x7fc1dd536020>
flatten_8 - Flatten - Activation: N/A
dense_38 - Dense - Activation: <function relu at 0x7fc1dd536020>
dense_39 - Dense - Activation: <function relu at 0x7fc1dd536020>
dense_40 - Dense - Activation: <function relu at 0x7fc1dd536020>
dense_41 - Dense - Activation: <function softmax at 0x7fc1dcb6b6a0>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_facto

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer - 2 Fully connected Layers 96 , 32  - relu
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(96, activation='relu'),
        Dense(32, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8493 - loss: 0.4683 - val_accuracy: 0.9733 - val_loss: 0.0863
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9804 - loss: 0.0627 - val_accuracy: 0.9860 - val_loss: 0.0480
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9882 - loss: 0.0379 - val_accuracy: 0.9882 - val_loss: 0.0457
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9919 - loss: 0.0259 - val_accuracy: 0.9895 - val_loss: 0.0442
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9944 - loss: 0.0157 - val_accuracy: 0.9880 - val_loss: 0.0495
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9958 - loss: 0.0139 - val_accuracy: 0.9895 - val_loss: 0.0466
Epoch 7/18
[1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.04 %
Accuracy in first 5 epochs: [0.9330740571022034, 0.9816111326217651, 0.9879074096679688, 0.991518497467041, 0.9944444298744202]
Number of parameters: 765770
Average Train Time per Epoch: -0.00s
Average Test Time: 1.37 s
Layers and Activations:
conv2d_48 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_24 - MaxPooling2D - Activation: N/A
conv2d_49 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_26 - Flatten - Activation: N/A
dense_55 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_56 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_57 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9,

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - 1 Fully connected layer 64 - relu
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8615 - loss: 0.4313 - val_accuracy: 0.9785 - val_loss: 0.0714
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9783 - loss: 0.0689 - val_accuracy: 0.9847 - val_loss: 0.0551
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9870 - loss: 0.0415 - val_accuracy: 0.9863 - val_loss: 0.0538
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9915 - loss: 0.0261 - val_accuracy: 0.9868 - val_loss: 0.0481
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9936 - loss: 0.0204 - val_accuracy: 0.9890 - val_loss: 0.0426
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9955 - loss: 0.0140 - val_accuracy: 0.9882 - val_loss: 0.0470
Epoch 7/18
[

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.08 %
Accuracy in first 5 epochs: [0.9376296401023865, 0.9807222485542297, 0.9878148436546326, 0.9905925989151001, 0.9933518767356873]
Number of parameters: 515146
Average Train Time per Epoch: -0.00s
Average Test Time: 0.52 s
Layers and Activations:
conv2d_52 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_26 - MaxPooling2D - Activation: N/A
conv2d_53 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_28 - Flatten - Activation: N/A
dense_62 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_63 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:31:50.010737


### Optimizers

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - 1 Fully connected layer 64 - relu - optimizer : Adam
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, (3,3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = Adam(learning_rate=0.01)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8796 - loss: 0.3857 - val_accuracy: 0.9793 - val_loss: 0.0703
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9715 - loss: 0.0942 - val_accuracy: 0.9807 - val_loss: 0.0698
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9759 - loss: 0.0815 - val_accuracy: 0.9778 - val_loss: 0.0821
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9788 - loss: 0.0691 - val_accuracy: 0.9827 - val_loss: 0.0675
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9818 - loss: 0.0622 - val_accuracy: 0.9818 - val_loss: 0.0701
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9808 - loss: 0.0639 - val_accuracy: 0.9805 - val_loss: 0.0786
Epoch 7/18
[1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 97.27 %
Accuracy in first 5 epochs: [0.9421296119689941, 0.9708518385887146, 0.9752963185310364, 0.9761296510696411, 0.9790370464324951]
Number of parameters: 515146
Average Train Time per Epoch: 0.00s
Average Test Time: 0.76 s
Layers and Activations:
conv2d_54 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_27 - MaxPooling2D - Activation: N/A
conv2d_55 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_29 - Flatten - Activation: N/A
dense_64 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_65 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'adam', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
Timestamp: 2

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - 1 Fully connected layer 64 - relu - optimizer : Adagrad

def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, (3,3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = Adagrad(learning_rate=0.01)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8106 - loss: 0.5994 - val_accuracy: 0.9667 - val_loss: 0.1141
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9627 - loss: 0.1259 - val_accuracy: 0.9793 - val_loss: 0.0777
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9762 - loss: 0.0794 - val_accuracy: 0.9823 - val_loss: 0.0634
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9812 - loss: 0.0605 - val_accuracy: 0.9835 - val_loss: 0.0616
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9852 - loss: 0.0469 - val_accuracy: 0.9848 - val_loss: 0.0582
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9882 - loss: 0.0395 - val_accuracy: 0.9875 - val_loss: 0.0486
Epoch 7/18
[1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 98.66 %
Accuracy in first 5 epochs: [0.9045925736427307, 0.9673703908920288, 0.9777592420578003, 0.981925904750824, 0.984666645526886]
Number of parameters: 515146
Average Train Time per Epoch: -0.00s
Average Test Time: 0.77 s
Layers and Activations:
conv2d_56 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_28 - MaxPooling2D - Activation: N/A
conv2d_57 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_30 - Flatten - Activation: N/A
dense_66 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_67 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'adagrad', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'initial_accumulator_value': 0.1, 'epsilon': 1e-07}
Timestamp: 2025-05-19 05:3

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - 1 Fully connected layer 64 - relu - optimizer : AdamW

def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, (3,3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = AdamW(learning_rate=0.01)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.9029 - loss: 0.3196 - val_accuracy: 0.9832 - val_loss: 0.0606
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9728 - loss: 0.0883 - val_accuracy: 0.9815 - val_loss: 0.0702
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9794 - loss: 0.0732 - val_accuracy: 0.9812 - val_loss: 0.0702
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9823 - loss: 0.0598 - val_accuracy: 0.9752 - val_loss: 0.1025
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9835 - loss: 0.0549 - val_accuracy: 0.9802 - val_loss: 0.0816
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9835 - loss: 0.0590 - val_accuracy: 0.9810 - val_loss: 0.0775
Epoch 7/18
[

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 97.99 %
Accuracy in first 5 epochs: [0.951962947845459, 0.9740555286407471, 0.9785555601119995, 0.9807592630386353, 0.9820185303688049]
Number of parameters: 515146
Average Train Time per Epoch: 0.00s
Average Test Time: 0.51 s
Layers and Activations:
conv2d_58 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_29 - MaxPooling2D - Activation: N/A
conv2d_59 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_31 - Flatten - Activation: N/A
dense_68 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_69 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'adamw', 'learning_rate': 0.009999999776482582, 'weight_decay': 0.004, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
Timestamp: 

### Dropout

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - 2 Fully connected layer 64 - 10 - relu - optimizer : SGD - dropout half
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.8001 - loss: 0.6060 - val_accuracy: 0.9790 - val_loss: 0.0729
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9539 - loss: 0.1575 - val_accuracy: 0.9848 - val_loss: 0.0530
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9673 - loss: 0.1051 - val_accuracy: 0.9895 - val_loss: 0.0390
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9758 - loss: 0.0797 - val_accuracy: 0.9885 - val_loss: 0.0418
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9802 - loss: 0.0677 - val_accuracy: 0.9900 - val_loss: 0.0359
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9811 - loss: 0.0596 - val_accuracy: 0.9893 - val_loss: 0.0422
Epoch 7/18
[1m

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.17 %
Accuracy in first 5 epochs: [0.8917962908744812, 0.9577777981758118, 0.9691481590270996, 0.9756296277046204, 0.9792777895927429]
Number of parameters: 515146
Average Train Time per Epoch: -0.00s
Average Test Time: 0.74 s
Layers and Activations:
conv2d_60 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_30 - MaxPooling2D - Activation: N/A
conv2d_61 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_32 - Flatten - Activation: N/A
dense_70 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dropout - Dropout - Activation: N/A
dense_71 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestam

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - 2 Fully connected layer 64 - 10 - relu - optimizer : SGD - dropout 25%
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.25),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.8324 - loss: 0.5409 - val_accuracy: 0.9807 - val_loss: 0.0699
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9716 - loss: 0.0968 - val_accuracy: 0.9868 - val_loss: 0.0504
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9804 - loss: 0.0643 - val_accuracy: 0.9883 - val_loss: 0.0441
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9855 - loss: 0.0474 - val_accuracy: 0.9897 - val_loss: 0.0437
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9874 - loss: 0.0391 - val_accuracy: 0.9888 - val_loss: 0.0425
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9900 - loss: 0.0310 - val_accuracy: 0.9903 - val_loss: 0.0367
Epoch 7/18


In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.17 %
Accuracy in first 5 epochs: [0.9176111221313477, 0.9728703498840332, 0.9814629554748535, 0.9845370650291443, 0.9871851801872253]
Number of parameters: 515146
Average Train Time per Epoch: -0.00s
Average Test Time: 0.73 s
Layers and Activations:
conv2d_62 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_31 - MaxPooling2D - Activation: N/A
conv2d_63 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_33 - Flatten - Activation: N/A
dense_72 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dropout_1 - Dropout - Activation: N/A
dense_73 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timest

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - 2 Fully connected layer 64 - 10 - relu - optimizer : SGD - dropout 10%
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.1),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8593 - loss: 0.4366 - val_accuracy: 0.9832 - val_loss: 0.0634
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9785 - loss: 0.0718 - val_accuracy: 0.9835 - val_loss: 0.0624
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9857 - loss: 0.0481 - val_accuracy: 0.9885 - val_loss: 0.0398
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9887 - loss: 0.0355 - val_accuracy: 0.9878 - val_loss: 0.0460
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9911 - loss: 0.0274 - val_accuracy: 0.9905 - val_loss: 0.0412
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9937 - loss: 0.0208 - val_accuracy: 0.9903 - val_loss: 0.0391
Epoch 7/18
[1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.13 %
Accuracy in first 5 epochs: [0.931851863861084, 0.9786666631698608, 0.9851666688919067, 0.9886851906776428, 0.9907777905464172]
Number of parameters: 515146
Average Train Time per Epoch: -0.00s
Average Test Time: 0.73 s
Layers and Activations:
conv2d_64 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_32 - MaxPooling2D - Activation: N/A
conv2d_65 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_34 - Flatten - Activation: N/A
dense_74 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dropout_2 - Dropout - Activation: N/A
dense_75 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timesta

In [None]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense - 2 Fully connected layer 64 - 10 - relu - optimizer : SGD - no dropout
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1)

Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8566 - loss: 0.4397 - val_accuracy: 0.9832 - val_loss: 0.0609
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9810 - loss: 0.0617 - val_accuracy: 0.9867 - val_loss: 0.0473
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9881 - loss: 0.0383 - val_accuracy: 0.9878 - val_loss: 0.0447
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9914 - loss: 0.0262 - val_accuracy: 0.9888 - val_loss: 0.0409
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9933 - loss: 0.0202 - val_accuracy: 0.9903 - val_loss: 0.0408
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9960 - loss: 0.0129 - val_accuracy: 0.9903 - val_loss: 0.0402
Epoch 7/18
[1

In [None]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.11 %
Accuracy in first 5 epochs: [0.937333345413208, 0.9822962880134583, 0.9881296157836914, 0.991018533706665, 0.9931296110153198]
Number of parameters: 515146
Average Train Time per Epoch: 0.00s
Average Test Time: 0.5 s
Layers and Activations:
conv2d_66 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
max_pooling2d_33 - MaxPooling2D - Activation: N/A
conv2d_67 - Conv2D - Activation: <function relu at 0x7ed6af6b5580>
flatten_35 - Flatten - Activation: N/A
dense_76 - Dense - Activation: <function relu at 0x7ed6af6b5580>
dense_77 - Dense - Activation: <function softmax at 0x7ed6aeee2c00>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 05:49:38.998362


## **Best Model**

In [9]:
# 18 Epochs - batch_size = 32 - learning_rate = 0.01 - 2 Conv Layer Small dense (32,64) - 1 FC layer 64 - relu - optimizer : SGD - 25% dropout - shuffle
def build_cnn():
    model = Sequential([
        Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        MaxPooling2D(pool_size=(2,2),strides=(2,2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.25),
        Dense(10, activation='softmax')
    ])
    optimizer = SGD(learning_rate=0.01, momentum=0.9)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

cnn_model = build_cnn()
history = cnn_model.fit(x_train_cnn, y_train_cat, epochs=18, batch_size=32, validation_split=0.1, shuffle = True)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.8319 - loss: 0.5167 - val_accuracy: 0.9782 - val_loss: 0.0775
Epoch 2/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9700 - loss: 0.0989 - val_accuracy: 0.9867 - val_loss: 0.0505
Epoch 3/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9798 - loss: 0.0683 - val_accuracy: 0.9855 - val_loss: 0.0500
Epoch 4/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9843 - loss: 0.0487 - val_accuracy: 0.9892 - val_loss: 0.0379
Epoch 5/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 0.9877 - loss: 0.0409 - val_accuracy: 0.9905 - val_loss: 0.0407
Epoch 6/18
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9895 - loss: 0.0323 - val_accuracy: 0.9895 - val_loss: 0.0391
Epoch 7/18
[

In [10]:
log_model_info(cnn_model, history, x_test_cnn, y_test_cat)


=== Model Summary ===



Final Accuracy: 99.16 %
Accuracy in first 5 epochs: [0.9163888692855835, 0.9713703989982605, 0.9802407622337341, 0.9839259386062622, 0.987074077129364]
Number of parameters: 515146
Average Train Time per Epoch: -0.00s
Average Test Time: 1.68 s
Layers and Activations:
conv2d - Conv2D - Activation: <function relu at 0x7f2ec6f7be20>
max_pooling2d - MaxPooling2D - Activation: N/A
conv2d_1 - Conv2D - Activation: <function relu at 0x7f2ec6f7be20>
flatten - Flatten - Activation: N/A
dense - Dense - Activation: <function relu at 0x7f2ec6f7be20>
dropout - Dropout - Activation: N/A
dense_1 - Dense - Activation: <function softmax at 0x7f2ec65e94e0>
Optimizer Config: {'name': 'SGD', 'learning_rate': 0.009999999776482582, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.9, 'nesterov': False}
Timestamp: 2025-05-19 1