<a href="https://colab.research.google.com/github/TisSeferi/proj1_hci/blob/main/proj1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Loading MNIST

In [1]:
from keras.datasets import mnist
import numpy as np

# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalize the images.
train_images = (train_images / 255)
test_images = (test_images / 255)

# Reshape the images.
train_images = np.expand_dims(train_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


## Baseline CNN Model

In [2]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten

first_filter = 8
second_filter = 16
third_filter = 32
filter_size = 3
pool_size = 2

baseline_model = Sequential([
    Conv2D(first_filter, filter_size, input_shape=(28, 28, 1), padding="same", activation='relu'),
    MaxPooling2D(pool_size=pool_size),
    Conv2D(second_filter, filter_size, padding="same", activation='relu'),
    MaxPooling2D(pool_size=pool_size),
    Conv2D(third_filter, filter_size, padding="same", activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax'),
])

baseline_model.compile(
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'],
)

history_baseline = baseline_model.fit(
    train_images,
    train_labels,
    epochs=5,
    validation_split=0.1
)

test_loss, test_acc = baseline_model.evaluate(test_images, test_labels)
print(f"Baseline Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.8790 - loss: 0.3833 - val_accuracy: 0.9818 - val_loss: 0.0592
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9815 - loss: 0.0575 - val_accuracy: 0.9882 - val_loss: 0.0451
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9890 - loss: 0.0379 - val_accuracy: 0.9892 - val_loss: 0.0434
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9900 - loss: 0.0318 - val_accuracy: 0.9902 - val_loss: 0.0362
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9918 - loss: 0.0250 - val_accuracy: 0.9900 - val_loss: 0.0429
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9868 - loss: 0.0416
Baseline Test Accuracy: 0.9885, Test Loss: 0.0360


## Tuning Hyperparameters

In [3]:
# Training with 10 epochs
history_epochs10 = baseline_model.fit(
    train_images,
    train_labels,
    epochs=10,
    validation_split=0.1
)
test_loss, test_acc = baseline_model.evaluate(test_images, test_labels)
print(f"Epochs=10 - Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9940 - loss: 0.0199 - val_accuracy: 0.9915 - val_loss: 0.0384
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9957 - loss: 0.0142 - val_accuracy: 0.9913 - val_loss: 0.0385
Epoch 3/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9961 - loss: 0.0134 - val_accuracy: 0.9898 - val_loss: 0.0510
Epoch 4/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9969 - loss: 0.0105 - val_accuracy: 0.9885 - val_loss: 0.0632
Epoch 5/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9961 - loss: 0.0122 - val_accuracy: 0.9920 - val_loss: 0.0397
Epoch 6/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9969 - loss: 0.0095 - val_accuracy: 0.9918 - val_loss: 0.0491
Epoch 7/10
[1m1

In [4]:
# Batch size = 64
history_batch64 = baseline_model.fit(
    train_images,
    train_labels,
    batch_size=64,
    epochs=5,
    validation_split=0.1
)
test_loss, test_acc = baseline_model.evaluate(test_images, test_labels)
print(f"Batch=64 - Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

# Batch size = 128
history_batch128 = baseline_model.fit(
    train_images,
    train_labels,
    batch_size=128,
    epochs=5,
    validation_split=0.1
)
test_loss, test_acc = baseline_model.evaluate(test_images, test_labels)
print(f"Batch=128 - Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

Epoch 1/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.9986 - loss: 0.0044 - val_accuracy: 0.9917 - val_loss: 0.0631
Epoch 2/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9992 - loss: 0.0028 - val_accuracy: 0.9915 - val_loss: 0.0824
Epoch 3/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9992 - loss: 0.0021 - val_accuracy: 0.9920 - val_loss: 0.0658
Epoch 4/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9992 - loss: 0.0019 - val_accuracy: 0.9918 - val_loss: 0.0715
Epoch 5/5
[1m844/844[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.9995 - loss: 0.0014 - val_accuracy: 0.9912 - val_loss: 0.0780
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9900 - loss: 0.0769
Batch=64 - Test Accuracy: 0.9922, Test Loss: 0.0567
Epoch 1/5
[1m422/422[0m [32m━━━

In [5]:
# Filters = 16, 32, 64
filters_model = Sequential([
    Conv2D(16, filter_size, input_shape=(28, 28, 1), padding="same", activation='relu'),
    MaxPooling2D(pool_size=pool_size),
    Conv2D(32, filter_size, padding="same", activation='relu'),
    MaxPooling2D(pool_size=pool_size),
    Conv2D(64, filter_size, padding="same", activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax'),
])

filters_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'],
)

history_filters = filters_model.fit(
    train_images,
    train_labels,
    epochs=5,
    validation_split=0.1
)

test_loss, test_acc = filters_model.evaluate(test_images, test_labels)
print(f"Filters=16-32-64 - Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")


Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - accuracy: 0.8979 - loss: 0.3289 - val_accuracy: 0.9850 - val_loss: 0.0531
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9840 - loss: 0.0503 - val_accuracy: 0.9892 - val_loss: 0.0408
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9897 - loss: 0.0310 - val_accuracy: 0.9903 - val_loss: 0.0329
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9930 - loss: 0.0229 - val_accuracy: 0.9908 - val_loss: 0.0305
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9953 - loss: 0.0169 - val_accuracy: 0.9908 - val_loss: 0.0343
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9895 - loss: 0.0359
Filters=16-32-64 - Test Accuracy: 0.9913, Test Loss: 0.0291


## Different Optimizers

In [6]:
def make_cnn():
    model = Sequential([
        Conv2D(8, 3, input_shape=(28, 28, 1), padding="same", activation='relu'),
        MaxPooling2D(pool_size=2),
        Conv2D(16, 3, padding="same", activation='relu'),
        MaxPooling2D(pool_size=2),
        Conv2D(32, 3, padding="same", activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(10, activation='softmax'),
    ])
    return model

# Adam version
adam_model = make_cnn()
adam_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
adam_model.fit(train_images, train_labels, epochs=5, validation_split=0.1)
print("Adam Test:", adam_model.evaluate(test_images, test_labels))

# SGD version
sgd_model = make_cnn()
sgd_model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
sgd_model.fit(train_images, train_labels, epochs=5, validation_split=0.1)
print("SGD Test:", sgd_model.evaluate(test_images, test_labels))


Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - accuracy: 0.8793 - loss: 0.3956 - val_accuracy: 0.9822 - val_loss: 0.0662
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9805 - loss: 0.0612 - val_accuracy: 0.9850 - val_loss: 0.0560
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9873 - loss: 0.0419 - val_accuracy: 0.9890 - val_loss: 0.0362
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9918 - loss: 0.0276 - val_accuracy: 0.9883 - val_loss: 0.0350
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9928 - loss: 0.0236 - val_accuracy: 0.9892 - val_loss: 0.0366
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9845 - loss: 0.0452
Adam Test: [0.03307317569851875, 0.9890999794006348]
Epoch 1/5
[1m1688/168

## Regularization Methods

In [7]:
from keras.regularizers import l2
from keras.layers import Dropout

reg_model = Sequential([
    Conv2D(first_filter, filter_size, input_shape=(28, 28, 1), padding="same", activation='relu', kernel_regularizer=l2(0.001)),
    MaxPooling2D(pool_size=pool_size),
    Dropout(0.25),
    Conv2D(second_filter, filter_size, padding="same", activation='relu', kernel_regularizer=l2(0.001)),
    MaxPooling2D(pool_size=pool_size),
    Conv2D(third_filter, filter_size, padding="same", activation='relu', kernel_regularizer=l2(0.001)),
    Flatten(),
    Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.5),
    Dense(10, activation='softmax'),
])

reg_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'],
)

history_reg = reg_model.fit(
    train_images,
    train_labels,
    epochs=5,
    validation_split=0.1
)

test_loss, test_acc = reg_model.evaluate(test_images, test_labels)
print(f"Test Accuracy with Dropout + L2: {test_acc:.4f}, Test Loss: {test_loss:.4f}")


Epoch 1/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 6ms/step - accuracy: 0.7804 - loss: 0.7934 - val_accuracy: 0.9777 - val_loss: 0.2012
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.9470 - loss: 0.3060 - val_accuracy: 0.9825 - val_loss: 0.1730
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9578 - loss: 0.2546 - val_accuracy: 0.9848 - val_loss: 0.1565
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9622 - loss: 0.2370 - val_accuracy: 0.9832 - val_loss: 0.1539
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9659 - loss: 0.2188 - val_accuracy: 0.9852 - val_loss: 0.1518
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9847 - loss: 0.1489
Test Accuracy with Dropout + L2: 0.9868, Test Loss: 0.1421


In [8]:
#Model definition function for clarity
def build_final_model():
    model = Sequential([
        Conv2D(16, filter_size, input_shape=(28, 28, 1), padding="same", activation='relu'),
        MaxPooling2D(pool_size=pool_size),
        Dropout(0.25),
        Conv2D(32, filter_size, padding="same", activation='relu'),
        MaxPooling2D(pool_size=pool_size),
        Conv2D(64, filter_size, padding="same", activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax'),
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

final_model = build_final_model()
final_model.fit(train_images, train_labels, epochs=10, batch_size=128, validation_split=0.1)
test_loss, test_acc = final_model.evaluate(test_images, test_labels)
print(f"10 Epochs - Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

Epoch 1/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step - accuracy: 0.7117 - loss: 0.8603 - val_accuracy: 0.9770 - val_loss: 0.0807
Epoch 2/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9422 - loss: 0.1950 - val_accuracy: 0.9837 - val_loss: 0.0595
Epoch 3/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9614 - loss: 0.1323 - val_accuracy: 0.9870 - val_loss: 0.0481
Epoch 4/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9693 - loss: 0.1051 - val_accuracy: 0.9885 - val_loss: 0.0437
Epoch 5/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9723 - loss: 0.0933 - val_accuracy: 0.9892 - val_loss: 0.0414
Epoch 6/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9771 - loss: 0.0798 - val_accuracy: 0.9883 - val_loss: 0.0416
Epoch 7/10
[1m422/422[0m

## Final Model Summary

In [9]:
final_model.summary()

## Saved SGD Model

In [10]:
final_model.save("cnn_mnist_model.keras")

## Resizing MNIST for Pretrained Models

In [16]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist
import numpy as np

# Load the MNIST dataset
(train_images_64, train_labels_64), (test_images_64, test_labels_64) = mnist.load_data()

# expand new axis, channel axis
train_images_64 = np.expand_dims(train_images_64, axis=-1)
train_images_64 = np.repeat(train_images_64, 3, axis=-1)
train_images_64 = train_images_64.astype('float32') / 255.0
train_images_64 = tf.image.resize(train_images_64, (64, 64))
train_labels_64 = to_categorical(train_labels_64, num_classes=10)

test_images_64 = np.expand_dims(test_images_64, axis=-1)
test_images_64 = np.repeat(test_images_64, 3, axis=-1)
test_images_64 = test_images_64.astype('float32') / 255.0
test_images_64 = tf.image.resize(test_images_64, (64, 64))
test_labels_64 = to_categorical(test_labels_64, num_classes=10)



print("Training set:", train_images_64.shape)
print("Testing set:", test_images_64.shape)

Training set: (60000, 64, 64, 3)
Testing set: (10000, 64, 64, 3)


## Testing ResNet50

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

input = tf.keras.Input(shape=(64, 64, 3))
resnet_base = ResNet50(weights='imagenet', include_top=False, input_tensor=input)

# Freeze most of the ResNet50 layers
for layer in resnet_base.layers[:-10]:  # last 10 layers retrained
    layer.trainable = False

gap = layers.GlobalAveragePooling2D()(resnet_base.output)
output = layers.Dense(10, activation='softmax')(gap)

resnet_model = models.Model(inputs=input, outputs=output)

resnet_model.compile(
          loss  = tf.keras.losses.CategoricalCrossentropy(),
          metrics = [tf.keras.metrics.CategoricalAccuracy()],
          optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4))

resnet_model.fit(
    train_images_64,
    train_labels_64,
    batch_size=128,
    epochs=10,
    validation_split=0.1
)

resnet_test_loss, resnet_test_acc = resnet_model.evaluate(test_images_64, test_labels_64)
print(f"ResNet50 (64x64x3) - Test Accuracy: {resnet_test_acc:.4f}, Test Loss: {resnet_test_loss:.4f}")

Epoch 1/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 64ms/step - categorical_accuracy: 0.8664 - loss: 0.4358 - val_categorical_accuracy: 0.9170 - val_loss: 0.2519
Epoch 2/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 43ms/step - categorical_accuracy: 0.9766 - loss: 0.0728 - val_categorical_accuracy: 0.9785 - val_loss: 0.0660
Epoch 3/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 40ms/step - categorical_accuracy: 0.9834 - loss: 0.0510 - val_categorical_accuracy: 0.9838 - val_loss: 0.0507
Epoch 4/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 40ms/step - categorical_accuracy: 0.9883 - loss: 0.0367 - val_categorical_accuracy: 0.9785 - val_loss: 0.0708
Epoch 5/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 39ms/step - categorical_accuracy: 0.9900 - loss: 0.0300 - val_categorical_accuracy: 0.9788 - val_loss: 0.0703
Epoch 6/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [None]:
resnet_base.summary()  # check which layers are frozen
print(f"Trainable layers: {len([l for l in resnet_base.layers if l.trainable])}")

Trainable layers: 10


## Testing MobileNetV3

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras import layers, models

# Input
input = tf.keras.Input(shape=(64, 64, 3))

# Load MobileNetV3 (pretrained on ImageNet), without top classifier
mobilenet_base = MobileNetV3Small(
    weights="imagenet",
    include_top=False,
    input_tensor=input
)

# Freeze most layers, retrain only the last 10
for layer in mobilenet_base.layers[:-10]:
    layer.trainable = False

# Add custom classifier
gap = layers.GlobalAveragePooling2D()(mobilenet_base.output)
fc1 = layers.Dense(128, activation='relu')(gap)
dropout = layers.Dropout(0.5)(fc1)
output = layers.Dense(10, activation='softmax')(dropout)

mobilenet_model = models.Model(inputs=input, outputs=output)

# Compile
mobilenet_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.CategoricalAccuracy()]
)

# Train
history_mobilenet = mobilenet_model.fit(
    train_images_64,
    train_labels_64,
    epochs=10,
    batch_size=128,
    validation_split=0.1
)

# Evaluate
mobilenet_test_loss, mobilenet_test_acc = mobilenet_model.evaluate(test_images_64, test_labels_64)
print(f"MobileNetV3-Small (64x64x3) - Test Accuracy: {mobilenet_test_acc:.4f}, Test Loss: {mobilenet_test_loss:.4f}")


## Testing VGG16

In [17]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

# Load VGG16 base
vgg_base = VGG16(weights="imagenet", include_top=False, input_shape=(64, 64, 3))

# Freeze most layers, fine-tune last few
for layer in vgg_base.layers[:-4]:
    layer.trainable = False

# Add classifier
vgg_model = models.Sequential([
    vgg_base,
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

# Compile
vgg_model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

# Train
history_vgg = vgg_model.fit(
    train_images_64, train_labels_64,
    batch_size=128,
    epochs=5,
    validation_split=0.1
)

# Evaluate
vgg_test_loss, vgg_test_acc = vgg_model.evaluate(test_images_64, test_labels_64)
print(f"VGG16 (64x64x3) - Test Accuracy: {vgg_test_acc:.4f}, Test Loss: {vgg_test_loss:.4f}")


Epoch 1/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 106ms/step - accuracy: 0.8939 - loss: 0.3311 - val_accuracy: 0.9895 - val_loss: 0.0324
Epoch 2/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 86ms/step - accuracy: 0.9901 - loss: 0.0348 - val_accuracy: 0.9898 - val_loss: 0.0368
Epoch 3/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 87ms/step - accuracy: 0.9920 - loss: 0.0264 - val_accuracy: 0.9932 - val_loss: 0.0274
Epoch 4/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 88ms/step - accuracy: 0.9942 - loss: 0.0190 - val_accuracy: 0.9913 - val_loss: 0.0361
Epoch 5/5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 88ms/step - accuracy: 0.9936 - loss: 0.0206 - val_accuracy: 0.9945 - val_loss: 0.0224
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 24ms/step - accuracy: 0.9889 - loss: 0.0365
VGG16 (64x64x3) - Test Accuracy: 0.9920, Test Loss: 0.0272


In [22]:
from google.colab.patches import cv2_imshow
import cv2
import numpy as np
import tensorflow as tf

# 1. Load the pre-trained model
model = tf.keras.models.load_model('cnn_mnist_model.keras')

# 2. Load the video file
video_path = 'digits_video.mp4'
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video.")
else:
    print("Video loaded successfully. Processing frames...")
    # 3. Loop through the video frames
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # Break the loop if the video has ended

        # Process the Frame
        output_frame = frame.copy()
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
        thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
        contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        for c in contours:
            if cv2.contourArea(c) > 50:
                (x, y, w, h) = cv2.boundingRect(c)


                roi = thresh[y:y + h, x:x + w]
                roi_resized = cv2.resize(roi, (28, 28), interpolation=cv2.INTER_AREA)
                roi_normalized = roi_resized.astype('float32') / 255.0
                roi_input = np.reshape(roi_normalized, (1, 28, 28, 1)) # Shape for the model

                # Get the model's prediction
                prediction = model.predict(roi_input, verbose=0)
                probability = np.max(prediction)
                predicted_digit = np.argmax(prediction)


                if probability > 0.85:
                    cv2.rectangle(output_frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    label = f"Digit: {predicted_digit} ({probability:.2f})"
                    cv2.putText(output_frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        # Display the frame in the Colab output
        cv2_imshow(output_frame)

    # Release resources
    cap.release()
    print("Finished processing video.")

Video loaded successfully. Processing frames...
Finished processing video.
