In [1]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1 (from tensorflow)
  Downloading tensorflow_io_gcs_filesystem-0.37.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Collecting wheel<1.0,>=0.23.0 (from astunparse>=1.6.0->tensorflow

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.utils import to_categorical
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
## Data Nomralization (0-1)
x_train = x_train.astype('float32') / 255.0
x_test  = x_test.astype('float32') / 255.0

In [5]:
x_train.shape

(60000, 28, 28)

##**SVM Model**

In [6]:
x_train_svm = x_train.reshape(-1,28*28).astype('float32') #make ot 1D
x_test_svm = x_test.reshape(-1,28*28).astype('float32')
x_train_sub = x_train_svm[:15000] #25% of the dataset for speed
x_test_sub = x_test_svm[:2500]
y_train_sub = y_train[:15000]
y_test_sub = y_test[:2500]
SVM_model = SVC()
svm_model = SVC(kernel='rbf', C=5)
svm_model.fit(x_train_sub, y_train_sub)
y_pred = svm_model.predict(x_test_sub)

# Metrics
print("Accuracy:", accuracy_score(y_test_sub, y_pred))

Accuracy: 0.9548


In [7]:
# Reshape to (28, 28, 1) => Adds a channel dimension at the end so the shape that represensts grayscale
import numpy as np
x_train = np.expand_dims(x_train, -1)
x_test  = np.expand_dims(x_test, -1)

In [8]:
x_train.shape, x_test.shape

((60000, 28, 28, 1), (10000, 28, 28, 1))

In [9]:
# mapping the labels as an array of 1 and 0's where 1 represnets the index of the correct class
from tensorflow.keras.utils import to_categorical
y_train_cat = to_categorical(y_train, 10)
y_test_cat  = to_categorical(y_test, 10)


In [10]:
y_train[0]

np.uint8(5)

In [11]:
y_train_cat[0]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.])

## Models Configuration and Settings builder function

In [12]:
# ANN model builder
def build_ann(hidden_units, optimizer, lr, activation="relu"):
    model = Sequential(
        [
            Flatten(input_shape=(28, 28, 1)),
            Dense(hidden_units, activation=activation),
            Dense(10, activation="softmax"),
        ]
    )
    model.compile(
        optimizer=optimizer(learning_rate=lr),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

In [13]:
# CNN model builder
def build_cnn(filters, lr, optimizer, activation="relu", use_dropout=False,  dropout_value=0.2,use_pooling=True):
    model = Sequential()
    model.add(Conv2D(filters, (3, 3), activation=activation, input_shape=(28, 28, 1)))
    if use_pooling:
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    if use_dropout:
        model.add(Dropout(dropout_value))
    model.add(Flatten())
    model.add(Dense(128, activation=activation))
    model.add(Dense(10, activation="softmax"))
    model.compile(
        optimizer=optimizer(learning_rate=lr),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

## ANN Model training for each different parameters




In [23]:
# ANN model 1 => 64 neuron unit / SGD optimizer / 0.01 LR
ann_model1 = build_ann(64, SGD, 0.01)
start_time = time.time()

ann_model1.fit(
    x_train, y_train_cat,
    batch_size=32,
    epochs=10,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/10:.2f} seconds")

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7223 - loss: 1.0435 - val_accuracy: 0.8956 - val_loss: 0.3720
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8940 - loss: 0.3716 - val_accuracy: 0.9109 - val_loss: 0.3129
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9066 - loss: 0.3233 - val_accuracy: 0.9211 - val_loss: 0.2858
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9163 - loss: 0.2929 - val_accuracy: 0.9228 - val_loss: 0.2695
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9233 - loss: 0.2750 - val_accuracy: 0.9286 - val_loss: 0.2508
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9284 - loss: 0.2530 - val_accuracy: 0.9313 - val_loss: 0.2380
Epoch 7/10
[1m1

In [24]:
val_loss, val_accuracy = ann_model1.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9336 - loss: 0.2219


In [25]:
# ANN model 2 => 128 neuron unit / SGD optimizer / 0.01 LR
ann_model2 = build_ann(128, SGD, 0.01)
start_time = time.time()

ann_model2.fit(
    x_train, y_train_cat,
    batch_size=32,
    epochs=10,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/10:.2f} seconds")

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.7283 - loss: 1.0345 - val_accuracy: 0.9036 - val_loss: 0.3586
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9007 - loss: 0.3539 - val_accuracy: 0.9140 - val_loss: 0.2978
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9179 - loss: 0.2923 - val_accuracy: 0.9267 - val_loss: 0.2641
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9248 - loss: 0.2667 - val_accuracy: 0.9329 - val_loss: 0.2421
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9299 - loss: 0.2457 - val_accuracy: 0.9357 - val_loss: 0.2247
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9368 - loss: 0.2247 - val_accuracy: 0.9388 - val_loss: 0.2116
Epoch 7/10
[1m1

In [26]:
val_loss, val_accuracy = ann_model2.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9436 - loss: 0.1931


In [27]:
# ANN model 3 => 128 neuron unit / Adam optimizer / 0.001 LR
ann_model3 = build_ann(128, Adam, 0.001)
start_time = time.time()

ann_model3.fit(
    x_train, y_train_cat,
    batch_size=32,
    epochs=10,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/10:.2f} seconds")

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8807 - loss: 0.4290 - val_accuracy: 0.9602 - val_loss: 0.1341
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9651 - loss: 0.1185 - val_accuracy: 0.9683 - val_loss: 0.1071
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9763 - loss: 0.0782 - val_accuracy: 0.9755 - val_loss: 0.0818
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9821 - loss: 0.0585 - val_accuracy: 0.9753 - val_loss: 0.0807
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9867 - loss: 0.0437 - val_accuracy: 0.9759 - val_loss: 0.0803
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9903 - loss: 0.0325 - val_accuracy: 0.9765 - val_loss: 0.0813
Epoch 7/10
[1m1

In [28]:
val_loss, val_accuracy = ann_model3.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9724 - loss: 0.1067


In [29]:
# ANN model 4 => 256 neuron unit / RMSprop optimizer / 0.001 LR
ann_model4 = build_ann(256, RMSprop, 0.001)
start_time = time.time()

ann_model4.fit(
    x_train, y_train_cat,
    batch_size=32,
    epochs=10,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/10:.2f} seconds")

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8936 - loss: 0.3692 - val_accuracy: 0.9654 - val_loss: 0.1142
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9694 - loss: 0.1033 - val_accuracy: 0.9728 - val_loss: 0.0933
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9789 - loss: 0.0682 - val_accuracy: 0.9735 - val_loss: 0.0878
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9845 - loss: 0.0528 - val_accuracy: 0.9789 - val_loss: 0.0743
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9883 - loss: 0.0421 - val_accuracy: 0.9789 - val_loss: 0.0722
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9907 - loss: 0.0319 - val_accuracy: 0.9801 - val_loss: 0.0737
Epoch 7/10
[1m1

In [30]:
val_loss, val_accuracy = ann_model4.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9736 - loss: 0.1268


In [31]:
# ANN model 5 => 128 neuron unit / SGD optimizer / 0.1 LR
ann_model5 = build_ann(128, SGD, 0.1)
start_time = time.time()

ann_model5.fit(
    x_train, y_train_cat,
    batch_size=32,
    epochs=10,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/10:.2f} seconds")

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.8698 - loss: 0.4580 - val_accuracy: 0.9515 - val_loss: 0.1651
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9567 - loss: 0.1515 - val_accuracy: 0.9645 - val_loss: 0.1171
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9693 - loss: 0.1050 - val_accuracy: 0.9670 - val_loss: 0.1060
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9772 - loss: 0.0798 - val_accuracy: 0.9741 - val_loss: 0.0859
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9832 - loss: 0.0628 - val_accuracy: 0.9785 - val_loss: 0.0734
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9849 - loss: 0.0517 - val_accuracy: 0.9770 - val_loss: 0.0756
Epoch 7/10
[1m1

In [32]:
val_loss, val_accuracy = ann_model5.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9707 - loss: 0.0932


### ann_model4 was the best with accuracy of 97.66% and loss of 0.1047

# CNN Model training for each different parameters


In [33]:
# testing with 10 epochs
cnn_model = build_cnn(16, 0.01, SGD, use_dropout=False)
start_time = time.time()

cnn_model.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=10,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/10:.2f} seconds")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.7050 - loss: 1.1401 - val_accuracy: 0.9170 - val_loss: 0.2836
Epoch 2/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.9180 - loss: 0.2802 - val_accuracy: 0.9334 - val_loss: 0.2290
Epoch 3/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9336 - loss: 0.2289 - val_accuracy: 0.9470 - val_loss: 0.1852
Epoch 4/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9445 - loss: 0.1910 - val_accuracy: 0.9527 - val_loss: 0.1623
Epoch 5/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9518 - loss: 0.1657 - val_accuracy: 0.9565 - val_loss: 0.1437
Epoch 6/10
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9558 - loss: 0.1480 - val_accuracy: 0.9619 - val_loss: 0.1275
Epoch 7/10
[1m9

In [34]:
val_loss, val_accuracy = cnn_model.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9657 - loss: 0.1128


In [35]:
# testing with 15 epochs
cnn_model2 = build_cnn(16, 0.01, SGD, use_dropout=False)
start_time = time.time()

cnn_model2.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.6764 - loss: 1.1994 - val_accuracy: 0.9127 - val_loss: 0.3063
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.9170 - loss: 0.2928 - val_accuracy: 0.9359 - val_loss: 0.2275
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9333 - loss: 0.2296 - val_accuracy: 0.9432 - val_loss: 0.1961
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.9434 - loss: 0.1948 - val_accuracy: 0.9486 - val_loss: 0.1795
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9500 - loss: 0.1730 - val_accuracy: 0.9565 - val_loss: 0.1503
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9570 - loss: 0.1469 - val_accuracy: 0.9595 - val_loss: 0.1345
Epoch 7/15
[1m9

In [36]:
val_loss, val_accuracy = cnn_model2.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9716 - loss: 0.0959


In [37]:
# testing with 5 epochs
cnn_model3 = build_cnn(16, 0.01, SGD, use_dropout=False)
start_time = time.time()

cnn_model3.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=5,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/5:.2f} seconds")

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.6840 - loss: 1.1520 - val_accuracy: 0.9142 - val_loss: 0.2972
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 16ms/step - accuracy: 0.9148 - loss: 0.2911 - val_accuracy: 0.9301 - val_loss: 0.2400
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9304 - loss: 0.2384 - val_accuracy: 0.9431 - val_loss: 0.2036
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9409 - loss: 0.1989 - val_accuracy: 0.9505 - val_loss: 0.1698
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9507 - loss: 0.1692 - val_accuracy: 0.9542 - val_loss: 0.1550

Average time per epoch: 14.02 seconds


In [38]:
val_loss, val_accuracy = cnn_model3.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9475 - loss: 0.1783


#found that 15 epochs is better and now trying different Learning rates(0.001,0.1)

In [39]:
cnn_model4 = build_cnn(16, 0.001, SGD, use_dropout=False)
start_time = time.time()

cnn_model4.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.2530 - loss: 2.2091 - val_accuracy: 0.7103 - val_loss: 1.7272
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.7360 - loss: 1.4583 - val_accuracy: 0.8333 - val_loss: 0.7612
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.8358 - loss: 0.6974 - val_accuracy: 0.8729 - val_loss: 0.5010
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.8696 - loss: 0.4984 - val_accuracy: 0.8895 - val_loss: 0.4129
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.8824 - loss: 0.4205 - val_accuracy: 0.8977 - val_loss: 0.3696
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.8915 - loss: 0.3821 - val_accuracy: 0.9037 - val_loss: 0.3435
Epoch 7/15
[1m9

In [40]:
val_loss, val_accuracy = cnn_model4.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9164 - loss: 0.2876


In [41]:
cnn_model5 = build_cnn(16, 0.1, SGD, use_dropout=False)
start_time = time.time()

cnn_model5.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.8326 - loss: 0.5459 - val_accuracy: 0.9637 - val_loss: 0.1149
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9671 - loss: 0.1101 - val_accuracy: 0.9664 - val_loss: 0.1020
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9785 - loss: 0.0724 - val_accuracy: 0.9804 - val_loss: 0.0590
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 15ms/step - accuracy: 0.9841 - loss: 0.0524 - val_accuracy: 0.9800 - val_loss: 0.0594
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9880 - loss: 0.0420 - val_accuracy: 0.9803 - val_loss: 0.0616
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 14ms/step - accuracy: 0.9903 - loss: 0.0325 - val_accuracy: 0.9842 - val_loss: 0.0537
Epoch 7/15
[1m9

In [42]:
val_loss, val_accuracy = cnn_model5.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9833 - loss: 0.0568


#we notice that lr=0.1 is better for the model accuracy

## now we try different Conv2D layers

In [43]:
# fitting with 2 Conv2D and 1 FC with the best parameters from the previous models (15 epoch, 0.1 LR)
cnn_model6 = Sequential()
cnn_model6.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model6.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model6.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model6.add(Flatten())
cnn_model6.add(Dense(10, activation="softmax"))
cnn_model6.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )

start_time = time.time()

cnn_model6.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 40ms/step - accuracy: 0.8331 - loss: 0.5514 - val_accuracy: 0.9698 - val_loss: 0.0971
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 39ms/step - accuracy: 0.9700 - loss: 0.0976 - val_accuracy: 0.9803 - val_loss: 0.0635
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 39ms/step - accuracy: 0.9789 - loss: 0.0666 - val_accuracy: 0.9817 - val_loss: 0.0606
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 39ms/step - accuracy: 0.9826 - loss: 0.0554 - val_accuracy: 0.9807 - val_loss: 0.0582
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 39ms/step - accuracy: 0.9862 - loss: 0.0447 - val_accuracy: 0.9832 - val_loss: 0.0532
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 39ms/step - accuracy: 0.9880 - loss: 0.0377 - val_accuracy: 0.9867 - val_loss: 0.0434
Epoch 7/15
[1m9

In [44]:
val_loss, val_accuracy = cnn_model6.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9793 - loss: 0.0715


In [45]:
# fitting with 2 Conv2D and 2 FC with the best parameters from the previous models (15 epoch, 0.1 LR)
cnn_model7 = Sequential()
cnn_model7.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model7.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model7.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model7.add(Flatten())
cnn_model7.add(Dense(128, activation="relu"))
cnn_model7.add(Dense(10, activation="softmax"))
cnn_model7.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )

start_time = time.time()

cnn_model7.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 44ms/step - accuracy: 0.8368 - loss: 0.5232 - val_accuracy: 0.9697 - val_loss: 0.0941
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 43ms/step - accuracy: 0.9780 - loss: 0.0724 - val_accuracy: 0.9774 - val_loss: 0.0732
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 43ms/step - accuracy: 0.9867 - loss: 0.0416 - val_accuracy: 0.9847 - val_loss: 0.0462
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 43ms/step - accuracy: 0.9912 - loss: 0.0300 - val_accuracy: 0.9869 - val_loss: 0.0398
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 43ms/step - accuracy: 0.9936 - loss: 0.0206 - val_accuracy: 0.9861 - val_loss: 0.0443
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 43ms/step - accuracy: 0.9951 - loss: 0.0164 - val_accuracy: 0.9884 - val_loss: 0.0384
Epoch 7/15
[1m9

In [46]:
val_loss, val_accuracy = cnn_model7.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9878 - loss: 0.0583


In [47]:
# fitting with 2 Conv2D and 3 FC with the best parameters from the previous models (15 epoch, 0.1 LR)
import time
cnn_model8 = Sequential()
cnn_model8.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model8.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model8.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model8.add(Flatten())
cnn_model8.add(Dense(256, activation="relu"))
cnn_model8.add(Dense(128, activation="relu"))
cnn_model8.add(Dense(10, activation="softmax"))
cnn_model8.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )

start_time = time.time()

cnn_model8.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")


Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 51ms/step - accuracy: 0.8125 - loss: 0.5982 - val_accuracy: 0.9692 - val_loss: 0.0911
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 51ms/step - accuracy: 0.9790 - loss: 0.0682 - val_accuracy: 0.9831 - val_loss: 0.0530
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 50ms/step - accuracy: 0.9871 - loss: 0.0413 - val_accuracy: 0.9843 - val_loss: 0.0458
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 50ms/step - accuracy: 0.9920 - loss: 0.0257 - val_accuracy: 0.9857 - val_loss: 0.0468
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 50ms/step - accuracy: 0.9937 - loss: 0.0200 - val_accuracy: 0.9850 - val_loss: 0.0485
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 50ms/step - accuracy: 0.9959 - loss: 0.0126 - val_accuracy: 0.9875 - val_loss: 0.0406
Epoch 7/15
[1m9

In [48]:
val_loss, val_accuracy = cnn_model8.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9872 - loss: 0.0596


## Observation
We reached the highest accuracy (98.73%) so far with the following parameters :
*   2 Conv2D layers
*   3 FC layers
*   15 epoch
*   0.1 LR
*   64 Batch Size

* `cnn_model8`

--------------

### Now we try for different batch sizes:

*   128
*   192



In [49]:
# fitting with 128 batch size with the best parameters from the previous models (15 epoch, 0.1 LR, 2 Conv2D and 3 FC )
cnn_model9 = Sequential()
cnn_model9.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model9.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model9.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model9.add(Flatten())
cnn_model9.add(Dense(256, activation="relu"))
cnn_model9.add(Dense(128, activation="relu"))
cnn_model9.add(Dense(10, activation="softmax"))
cnn_model9.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )


start_time = time.time()

cnn_model9.fit(
    x_train, y_train_cat,
    batch_size=128,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 99ms/step - accuracy: 0.7557 - loss: 0.7706 - val_accuracy: 0.9319 - val_loss: 0.2072
Epoch 2/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 98ms/step - accuracy: 0.9712 - loss: 0.0953 - val_accuracy: 0.9751 - val_loss: 0.0711
Epoch 3/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 98ms/step - accuracy: 0.9841 - loss: 0.0518 - val_accuracy: 0.9826 - val_loss: 0.0514
Epoch 4/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 98ms/step - accuracy: 0.9878 - loss: 0.0384 - val_accuracy: 0.9854 - val_loss: 0.0435
Epoch 5/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 98ms/step - accuracy: 0.9912 - loss: 0.0281 - val_accuracy: 0.9849 - val_loss: 0.0460
Epoch 6/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 99ms/step - accuracy: 0.9936 - loss: 0.0197 - val_accuracy: 0.9878 - val_loss: 0.0409
Epoch 7/15
[1m4

In [50]:
val_loss, val_accuracy = cnn_model9.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9855 - loss: 0.0592


In [51]:
# fitting with 192 batch size with the best parameters from the previous models (15 epoch, 0.1 LR, 2 Conv2D and 3 FC )
cnn_model10 = Sequential()
cnn_model10.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model10.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model10.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model10.add(Flatten())
cnn_model10.add(Dense(256, activation="relu"))
cnn_model10.add(Dense(128, activation="relu"))
cnn_model10.add(Dense(10, activation="softmax"))
cnn_model10.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )

start_time = time.time()

cnn_model10.fit(
    x_train, y_train_cat,
    batch_size=192,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 139ms/step - accuracy: 0.7276 - loss: 0.8831 - val_accuracy: 0.9499 - val_loss: 0.1558
Epoch 2/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 137ms/step - accuracy: 0.9568 - loss: 0.1374 - val_accuracy: 0.9732 - val_loss: 0.0874
Epoch 3/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 138ms/step - accuracy: 0.9741 - loss: 0.0843 - val_accuracy: 0.9738 - val_loss: 0.0805
Epoch 4/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 137ms/step - accuracy: 0.9813 - loss: 0.0592 - val_accuracy: 0.9805 - val_loss: 0.0600
Epoch 5/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 138ms/step - accuracy: 0.9864 - loss: 0.0452 - val_accuracy: 0.9810 - val_loss: 0.0574
Epoch 6/15
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 137ms/step - accuracy: 0.9875 - loss: 0.0392 - val_accuracy: 0.9861 - val_loss: 0.0434
Epoch 7/15

In [52]:
val_loss, val_accuracy = cnn_model10.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9852 - loss: 0.0493


## Observation :
We notice that when we raise the batch size the accuracy gets slightly lower so we will stick to the lowest batch size and the best parameters from the previous models:

* 2 Conv2D layers

* 3 FC layers

* 15 epoch

* 0.1 LR

* 64 Batch Size

---

### Now we try with different activation functions:

* sigmoid
* tanh
* leakyRelu



In [53]:
# fitting with sigmoid activation function with the best parameters from the previous models (15 epoch, 0.1 LR, 2 Conv2D, 3 FC, 64 batch size)
cnn_model11 = Sequential()
cnn_model11.add(Conv2D(16, (3, 3), activation='sigmoid', input_shape=(28, 28, 1)))
cnn_model11.add(Conv2D(16, (3, 3), activation='sigmoid', input_shape=(28, 28, 1)))
cnn_model11.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model11.add(Flatten())
cnn_model11.add(Dense(256, activation="sigmoid"))
cnn_model11.add(Dense(128, activation="sigmoid"))
cnn_model11.add(Dense(10, activation="softmax"))
cnn_model11.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )

start_time = time.time()

cnn_model11.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 57ms/step - accuracy: 0.1047 - loss: 2.3180 - val_accuracy: 0.1010 - val_loss: 2.3230
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 56ms/step - accuracy: 0.1072 - loss: 2.3070 - val_accuracy: 0.1135 - val_loss: 2.3019
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 56ms/step - accuracy: 0.2100 - loss: 2.1705 - val_accuracy: 0.8284 - val_loss: 0.6261
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 55ms/step - accuracy: 0.8537 - loss: 0.5155 - val_accuracy: 0.9012 - val_loss: 0.3410
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 56ms/step - accuracy: 0.9006 - loss: 0.3321 - val_accuracy: 0.9044 - val_loss: 0.3195
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 55ms/step - accuracy: 0.9172 - loss: 0.2766 - val_accuracy: 0.9300 - val_loss: 0.2459
Epoch 7/15
[1m9

In [54]:
val_loss, val_accuracy = cnn_model11.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9627 - loss: 0.1250


In [55]:
# fitting with tanh activation function with the best parameters from the previous models (15 epoch, 0.1 LR, 2 Conv2D, 3 FC, 64 batch size)
cnn_model11 = Sequential()
cnn_model11.add(Conv2D(16, (3, 3), activation='tanh', input_shape=(28, 28, 1)))
cnn_model11.add(Conv2D(16, (3, 3), activation='tanh', input_shape=(28, 28, 1)))
cnn_model11.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model11.add(Flatten())
cnn_model11.add(Dense(256, activation="tanh"))
cnn_model11.add(Dense(128, activation="tanh"))
cnn_model11.add(Dense(10, activation="softmax"))
cnn_model11.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )

start_time = time.time()

cnn_model11.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 56ms/step - accuracy: 0.8806 - loss: 0.3901 - val_accuracy: 0.9657 - val_loss: 0.1228
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 55ms/step - accuracy: 0.9761 - loss: 0.0832 - val_accuracy: 0.9664 - val_loss: 0.1018
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 56ms/step - accuracy: 0.9862 - loss: 0.0482 - val_accuracy: 0.9852 - val_loss: 0.0414
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 56ms/step - accuracy: 0.9914 - loss: 0.0314 - val_accuracy: 0.9862 - val_loss: 0.0411
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 56ms/step - accuracy: 0.9939 - loss: 0.0233 - val_accuracy: 0.9861 - val_loss: 0.0378
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 56ms/step - accuracy: 0.9963 - loss: 0.0154 - val_accuracy: 0.9869 - val_loss: 0.0372
Epoch 7/15
[1m9

In [56]:
val_loss, val_accuracy = cnn_model11.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9866 - loss: 0.0408


In [57]:
# fitting with relu6 activation function with the best parameters from the previous models (15 epoch, 0.1 LR, 2 Conv2D, 3 FC, 64 batch size)
cnn_model13 = Sequential()
cnn_model13.add(Conv2D(16, (3, 3), activation='relu6', input_shape=(28, 28, 1)))
cnn_model13.add(Conv2D(16, (3, 3), activation='relu6', input_shape=(28, 28, 1)))
cnn_model13.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model13.add(Flatten())
cnn_model13.add(Dense(256, activation="relu6"))
cnn_model13.add(Dense(128, activation="relu6"))
cnn_model13.add(Dense(10, activation="softmax"))
cnn_model13.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )

start_time = time.time()

cnn_model13.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 52ms/step - accuracy: 0.8276 - loss: 0.5390 - val_accuracy: 0.9740 - val_loss: 0.0742
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 52ms/step - accuracy: 0.9790 - loss: 0.0686 - val_accuracy: 0.9808 - val_loss: 0.0609
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 51ms/step - accuracy: 0.9871 - loss: 0.0403 - val_accuracy: 0.9859 - val_loss: 0.0417
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 51ms/step - accuracy: 0.9929 - loss: 0.0240 - val_accuracy: 0.9885 - val_loss: 0.0371
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 51ms/step - accuracy: 0.9951 - loss: 0.0160 - val_accuracy: 0.9854 - val_loss: 0.0456
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 51ms/step - accuracy: 0.9964 - loss: 0.0115 - val_accuracy: 0.9874 - val_loss: 0.0407
Epoch 7/15
[1m9

In [58]:
val_loss, val_accuracy = cnn_model13.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9876 - loss: 0.0471


## Observation:
we notice that `sigmoid` has lowered the accuracy with around 1% percent, and `tanh` lowered it with a slight amount same as `relu6`, so we stick to the same parameters from the previous phase:

* 2 Conv2D layers

* 3 FC layers

* 15 epoch

* 0.1 LR

* 64 Batch Size

* relu activation function

---


### Now we try with different optimizers:
* adam
* RMSprop

In [59]:
# fitting with adam optimizer with the best parameters from the previous models (15 epoch, 0.1 LR, 2 Conv2D, 3 FC, 64 batch size, relu activation)
cnn_model14 = Sequential()
cnn_model14.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model14.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model14.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model14.add(Flatten())
cnn_model14.add(Dense(256, activation="relu"))
cnn_model14.add(Dense(128, activation="relu"))
cnn_model14.add(Dense(10, activation="softmax"))
cnn_model14.compile(
optimizer = Adam(learning_rate=0.1),
loss="categorical_crossentropy",
metrics = ["accuracy"],
    )

start_time = time.time()

cnn_model14.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 63ms/step - accuracy: 0.1015 - loss: 4.1179 - val_accuracy: 0.1009 - val_loss: 2.3075
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 62ms/step - accuracy: 0.1052 - loss: 2.3095 - val_accuracy: 0.1135 - val_loss: 2.3060
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 62ms/step - accuracy: 0.1017 - loss: 2.3101 - val_accuracy: 0.0958 - val_loss: 2.3064
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 62ms/step - accuracy: 0.1023 - loss: 2.3103 - val_accuracy: 0.1032 - val_loss: 2.3067
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 62ms/step - accuracy: 0.1018 - loss: 2.3103 - val_accuracy: 0.1135 - val_loss: 2.3056
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 62ms/step - accuracy: 0.1065 - loss: 2.3101 - val_accuracy: 0.0974 - val_loss: 2.3069
Epoch 7/15
[1m9

In [60]:
val_loss, val_accuracy = cnn_model14.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.1160 - loss: 2.3038


In [61]:
# fitting with RMSprop optimizer with the best parameters from the previous models (15 epoch, 0.1 LR, 2 Conv2D, 3 FC, 64 batch size, relu activation)
cnn_model15 = Sequential()
cnn_model15.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model15.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model15.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model15.add(Flatten())
cnn_model15.add(Dense(256, activation="relu"))
cnn_model15.add(Dense(128, activation="relu"))
cnn_model15.add(Dense(10, activation="softmax"))
cnn_model15.compile(
optimizer = RMSprop(learning_rate=0.1),
loss="categorical_crossentropy",
metrics = ["accuracy"],
    )

start_time = time.time()

cnn_model15.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 57ms/step - accuracy: 0.1060 - loss: 95.7635 - val_accuracy: 0.1009 - val_loss: 2.3197
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 57ms/step - accuracy: 0.1044 - loss: 2.3104 - val_accuracy: 0.1032 - val_loss: 2.3205
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 56ms/step - accuracy: 0.1016 - loss: 2.3117 - val_accuracy: 0.1028 - val_loss: 2.3149
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 56ms/step - accuracy: 0.1035 - loss: 2.3109 - val_accuracy: 0.0982 - val_loss: 2.3083
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 56ms/step - accuracy: 0.1051 - loss: 2.3109 - val_accuracy: 0.1010 - val_loss: 2.3308
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 56ms/step - accuracy: 0.1007 - loss: 2.3116 - val_accuracy: 0.1135 - val_loss: 2.3194
Epoch 7/15
[1m

In [62]:
val_loss, val_accuracy = cnn_model15.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.0941 - loss: 2.3224


## Observation:
we noticed that the 2 different optimizers has lowered the accuracy to 1% which is not acceptable so we will get back to SGD optimizer:

* 2 Conv2D layers

* 3 FC layers

* 15 epoch

* 0.1 LR

* 64 Batch Size

* relu activation function

* SGD Optimizer

---


### Now we try with dropouts in different places:

* Dropout(0.3) after first FC

* Dropout(0.5) after first FC

* Dropout(0.3) after last CNN

In [63]:
# fitting with dropout of (0.3) after the first FC layer with the best parameters from the previous models (15 epoch, 0.1 LR, 2 Conv2D, 3 FC, 64 batch size, relu activation function, SGD optimizer,)
cnn_model16 = Sequential()
cnn_model16.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model16.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model16.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model16.add(Flatten())
cnn_model16.add(Dense(256, activation="relu"))
cnn_model16.add(Dropout(0.3))
cnn_model16.add(Dense(128, activation="relu"))
cnn_model16.add(Dense(10, activation="softmax"))
cnn_model16.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )

start_time = time.time()

cnn_model16.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 51ms/step - accuracy: 0.8026 - loss: 0.6181 - val_accuracy: 0.9737 - val_loss: 0.0823
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 51ms/step - accuracy: 0.9702 - loss: 0.0962 - val_accuracy: 0.9843 - val_loss: 0.0503
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 51ms/step - accuracy: 0.9812 - loss: 0.0642 - val_accuracy: 0.9846 - val_loss: 0.0438
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 51ms/step - accuracy: 0.9860 - loss: 0.0462 - val_accuracy: 0.9874 - val_loss: 0.0385
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 51ms/step - accuracy: 0.9889 - loss: 0.0352 - val_accuracy: 0.9887 - val_loss: 0.0332
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 51ms/step - accuracy: 0.9901 - loss: 0.0314 - val_accuracy: 0.9895 - val_loss: 0.0318
Epoch 7/15
[1m9

In [64]:
val_loss, val_accuracy = cnn_model16.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9884 - loss: 0.0446


In [65]:
# fitting with dropout of (0.5) after the first FC layer with the best parameters from the previous models (15 epoch, 0.1 LR, 2 Conv2D, 3 FC, 64 batch size, relu activation function, SGD optimizer,)
cnn_model17 = Sequential()
cnn_model17.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model17.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model17.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model17.add(Flatten())
cnn_model17.add(Dense(256, activation="relu"))
cnn_model17.add(Dropout(0.5))
cnn_model17.add(Dense(128, activation="relu"))
cnn_model17.add(Dense(10, activation="softmax"))
cnn_model17.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )

start_time = time.time()

cnn_model17.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 52ms/step - accuracy: 0.7791 - loss: 0.6716 - val_accuracy: 0.9689 - val_loss: 0.0959
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 51ms/step - accuracy: 0.9634 - loss: 0.1219 - val_accuracy: 0.9826 - val_loss: 0.0513
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 51ms/step - accuracy: 0.9748 - loss: 0.0837 - val_accuracy: 0.9843 - val_loss: 0.0469
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 51ms/step - accuracy: 0.9810 - loss: 0.0641 - val_accuracy: 0.9738 - val_loss: 0.0762
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 51ms/step - accuracy: 0.9846 - loss: 0.0506 - val_accuracy: 0.9895 - val_loss: 0.0351
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 50ms/step - accuracy: 0.9865 - loss: 0.0424 - val_accuracy: 0.9882 - val_loss: 0.0352
Epoch 7/15
[1m9

In [66]:
val_loss, val_accuracy = cnn_model17.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9898 - loss: 0.0371


In [67]:
# fitting with dropout of (0.3) after the last CNN layer with the best parameters from the previous models (15 epoch, 0.1 LR, 2 Conv2D, 3 FC, 64 batch size, relu activation function, SGD optimizer,)

cnn_model18 = Sequential()
cnn_model18.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model18.add(Conv2D(16, (3, 3), activation='relu', input_shape=(28, 28, 1)))
cnn_model18.add(Dropout(0.3))
cnn_model18.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
cnn_model18.add(Flatten())
cnn_model18.add(Dense(256, activation="relu"))
cnn_model18.add(Dense(128, activation="relu"))
cnn_model18.add(Dense(10, activation="softmax"))
cnn_model18.compile(
optimizer=SGD(learning_rate=0.1),
loss="categorical_crossentropy",
metrics=["accuracy"],
    )

start_time = time.time()

cnn_model18.fit(
    x_train, y_train_cat,
    batch_size=64,
    epochs=15,
    validation_data=(x_test, y_test_cat),
    shuffle=True
)

total_time = time.time() - start_time
print(f"\nAverage time per epoch: {total_time/15:.2f} seconds")

Epoch 1/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 59ms/step - accuracy: 0.8348 - loss: 0.5254 - val_accuracy: 0.9733 - val_loss: 0.0903
Epoch 2/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 58ms/step - accuracy: 0.9760 - loss: 0.0772 - val_accuracy: 0.9846 - val_loss: 0.0528
Epoch 3/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 58ms/step - accuracy: 0.9850 - loss: 0.0486 - val_accuracy: 0.9823 - val_loss: 0.0550
Epoch 4/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 58ms/step - accuracy: 0.9896 - loss: 0.0326 - val_accuracy: 0.9846 - val_loss: 0.0459
Epoch 5/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 58ms/step - accuracy: 0.9925 - loss: 0.0236 - val_accuracy: 0.9856 - val_loss: 0.0398
Epoch 6/15
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 58ms/step - accuracy: 0.9941 - loss: 0.0184 - val_accuracy: 0.9877 - val_loss: 0.0374
Epoch 7/15
[1m9

In [68]:
val_loss, val_accuracy = cnn_model18.evaluate(x_test, y_test_cat)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9870 - loss: 0.0484
