# CNN Experiments with MNIST Dataset
In this notebook, we explore the MNIST dataset with CNN models by performing various experiments:
- Changing the number of filters in Conv2D layers.
- Adding or removing layers.
- Trying different optimizers and learning rates.
- Modifying the number of epochs.
- Adding dropout layers to prevent overfitting.

In [1]:
import gzip
import struct
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

# Helper function to load MNIST images
def load_mnist_images(file_path):
    with open(file_path, 'rb') as f:
        _, num_images, rows, cols = struct.unpack('>IIII', f.read(16))
        data = np.fromfile(f, dtype=np.uint8).reshape(num_images, rows, cols)
    return data / 255.0

# Helper function to load MNIST labels
def load_mnist_labels(file_path):
    with open(file_path, 'rb') as f:
        _, num_labels = struct.unpack('>II', f.read(8))
        labels = np.fromfile(f, dtype=np.uint8)
    return labels

# File paths (update these to your local paths)
train_images_path = 'train-images.idx3-ubyte'
train_labels_path = 'train-labels.idx1-ubyte'
test_images_path = 't10k-images.idx3-ubyte'
test_labels_path = 't10k-labels.idx1-ubyte'

# Load datasets
train_images = load_mnist_images(train_images_path)
train_labels = load_mnist_labels(train_labels_path)
test_images = load_mnist_images(test_images_path)
test_labels = load_mnist_labels(test_labels_path)

# Reshape images to add channel dimension
train_images = train_images[..., np.newaxis]
test_images = test_images[..., np.newaxis]

print(train_images.shape, train_labels.shape, test_images.shape, test_labels.shape)

(60000, 28, 28, 1) (60000,) (10000, 28, 28, 1) (10000,)


## Base Model

In [2]:
# Define a CNN model with more filters
def create_cnn_model_more_filters():
    model = models.Sequential([
        layers.Conv2D(64, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    return model

# Compile and train the model
model = create_cnn_model_more_filters()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy with more filters: {test_acc}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 13ms/step - accuracy: 0.9155 - loss: 0.2659 - val_accuracy: 0.9865 - val_loss: 0.0421
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 13ms/step - accuracy: 0.9880 - loss: 0.0403 - val_accuracy: 0.9878 - val_loss: 0.0385
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - accuracy: 0.9908 - loss: 0.0284 - val_accuracy: 0.9919 - val_loss: 0.0277
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - accuracy: 0.9939 - loss: 0.0189 - val_accuracy: 0.9885 - val_loss: 0.0398
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 13ms/step - accuracy: 0.9946 - loss: 0.0157 - val_accuracy: 0.9916 - val_loss: 0.0288
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - accuracy: 0.9966 - loss: 0.0113 - val_accuracy: 0.9925 - val_loss: 0.0285
Epoc

## Experiment 1: Changing number of filters

In [5]:
# Define a CNN model with more filters
def create_cnn_model_extreme_filters():
    model = models.Sequential([
        layers.Conv2D(512, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(512, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(512, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(15, activation='relu'),
        layers.Dense(15, activation='softmax')
    ])
    return model

# Compile and train the model
model = create_cnn_model_extreme_filters()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy with more filters: {test_acc}')

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m428s[0m 227ms/step - accuracy: 0.8522 - loss: 0.4775 - val_accuracy: 0.9863 - val_loss: 0.0479
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m427s[0m 228ms/step - accuracy: 0.9863 - loss: 0.0487 - val_accuracy: 0.9877 - val_loss: 0.0414
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m411s[0m 219ms/step - accuracy: 0.9918 - loss: 0.0289 - val_accuracy: 0.9886 - val_loss: 0.0396
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m414s[0m 221ms/step - accuracy: 0.9943 - loss: 0.0194 - val_accuracy: 0.9915 - val_loss: 0.0299
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m412s[0m 220ms/step - accuracy: 0.9955 - loss: 0.0136 - val_accuracy: 0.9879 - val_loss: 0.0489
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m408s[0m 218ms/step - accuracy: 0.9959 - loss: 0.0150 - val_accuracy: 0.9898 - val_loss:

## Experiment 2: Adding Dropout Layers

In [3]:
# Define a CNN model with dropout layers
def create_cnn_model_with_dropout():
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(10, activation='softmax')
    ])
    return model

# Compile and train the model
model = create_cnn_model_with_dropout()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy with dropout layers: {test_acc}')

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 7ms/step - accuracy: 0.8385 - loss: 0.4993 - val_accuracy: 0.9818 - val_loss: 0.0550
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.9682 - loss: 0.1049 - val_accuracy: 0.9885 - val_loss: 0.0350
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.9772 - loss: 0.0775 - val_accuracy: 0.9911 - val_loss: 0.0262
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9806 - loss: 0.0634 - val_accuracy: 0.9917 - val_loss: 0.0277
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.9822 - loss: 0.0605 - val_accuracy: 0.9907 - val_loss: 0.0264
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.9844 - loss: 0.0496 - val_accuracy: 0.9919 - val_loss: 0.0233
Epoch 7/10

## Experiment 3: Trying SGD optimizer

In [6]:
# Define a CNN model with SGD optimizer
model = create_cnn_model_more_filters()
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy with SGD optimizer: {test_acc}')

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 13ms/step - accuracy: 0.6857 - loss: 1.0646 - val_accuracy: 0.9583 - val_loss: 0.1365
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - accuracy: 0.9577 - loss: 0.1410 - val_accuracy: 0.9759 - val_loss: 0.0804
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 12ms/step - accuracy: 0.9721 - loss: 0.0903 - val_accuracy: 0.9809 - val_loss: 0.0632
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - accuracy: 0.9779 - loss: 0.0697 - val_accuracy: 0.9830 - val_loss: 0.0525
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 13ms/step - accuracy: 0.9832 - loss: 0.0558 - val_accuracy: 0.9838 - val_loss: 0.0499
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 13ms/step - accuracy: 0.9851 - loss: 0.0481 - val_accuracy: 0.9855 - val_loss: 0.0464
Epoc

## Experiment 4: Trying RMSprop Optimizer

In [7]:
# Define a CNN model with RMSprop optimizer
model = create_cnn_model_more_filters()
model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy with SGD optimizer: {test_acc}')

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 14ms/step - accuracy: 0.9148 - loss: 0.2639 - val_accuracy: 0.9884 - val_loss: 0.0342
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 13ms/step - accuracy: 0.9882 - loss: 0.0384 - val_accuracy: 0.9899 - val_loss: 0.0362
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 14ms/step - accuracy: 0.9925 - loss: 0.0253 - val_accuracy: 0.9902 - val_loss: 0.0362
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 13ms/step - accuracy: 0.9945 - loss: 0.0183 - val_accuracy: 0.9915 - val_loss: 0.0339
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 13ms/step - accuracy: 0.9957 - loss: 0.0145 - val_accuracy: 0.9923 - val_loss: 0.0318
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 13ms/step - accuracy: 0.9973 - loss: 0.0093 - val_accuracy: 0.9911 - val_loss: 0.0407
Epoc

## Experiment 5: Trying SGD Optimizer with Dropout Layer

In [4]:
# Define a CNN model with SGD optimizer
model = create_cnn_model_with_dropout()
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy with SGD optimizer: {test_acc}')

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.5268 - loss: 1.3730 - val_accuracy: 0.9423 - val_loss: 0.2051
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 6ms/step - accuracy: 0.8932 - loss: 0.3430 - val_accuracy: 0.9630 - val_loss: 0.1262
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.9290 - loss: 0.2378 - val_accuracy: 0.9698 - val_loss: 0.1033
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.9384 - loss: 0.2044 - val_accuracy: 0.9738 - val_loss: 0.0796
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.9499 - loss: 0.1697 - val_accuracy: 0.9760 - val_loss: 0.0724
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 6ms/step - accuracy: 0.9569 - loss: 0.1426 - val_accuracy: 0.9793 - val_loss: 0.0628
Epoch 7/10

## Experiment 6: Weak CNN Model Architecture
This model has fewer layers and uses minimal filters to ensure low performance.

1. Reduce Model Complexity:

    Use fewer layers and filters.

    Avoid deeper architectures.

2. Poor Training Configuration:
    
    Use a high learning rate or no learning rate tuning.
    
    Use a non-optimal optimizer.

3. Minimal Training:
    
    Train the model for very few epochs.

4. No Regularization:
    
    Avoid techniques like dropout or L2 regularization that help generalization.

5. Poor Input Representation:
    
    Avoid normalizing input properly (optional, but usually not necessary for weak performance).

Here’s a simple weak model configuration:

1 Convolutional Layer: With only 4 filters.
No Pooling Layer: Skipping pooling can reduce the feature extraction power.
directly flatten without pooling
Small Dense Layer: Use only a small dense layer for prediction.


In [9]:
# Define a weak CNN model
def create_very_weak_cnn_model():
    model = models.Sequential([
        layers.Conv2D(4, (3, 3), activation='relu', input_shape=(28, 28, 1)),  # Only 4 filters
        layers.Flatten(),  # Directly flatten without pooling
        layers.Dense(10, activation='relu'),  # Only 10 neurons in dense layer
        layers.Dense(10, activation='softmax')  # Output layer
    ])
    return model

# Compile the weak model
model = create_very_weak_cnn_model()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the weak model for few epochs
history = model.fit(train_images, train_labels, epochs=3, validation_data=(test_images, test_labels))

# Evaluate the weak model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'Test accuracy of weak model: {test_acc}')


Epoch 1/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.7653 - loss: 0.7419 - val_accuracy: 0.9212 - val_loss: 0.2782
Epoch 2/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9266 - loss: 0.2581 - val_accuracy: 0.9451 - val_loss: 0.1878
Epoch 3/3
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9513 - loss: 0.1681 - val_accuracy: 0.9608 - val_loss: 0.1393
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9549 - loss: 0.1608
Test accuracy of weak model: 0.9607999920845032
