# Convolutional Neural Network (CNN)

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.datasets import mnist

# Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values to [0,1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Reshape for CNN input (28x28x1)
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Build CNN model
cnn_model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

# Compile and train model
cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

# Evaluate model
test_loss, test_acc = cnn_model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 19ms/step - accuracy: 0.9138 - loss: 0.2803 - val_accuracy: 0.9823 - val_loss: 0.0530
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 20ms/step - accuracy: 0.9860 - loss: 0.0420 - val_accuracy: 0.9905 - val_loss: 0.0297
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 19ms/step - accuracy: 0.9923 - loss: 0.0264 - val_accuracy: 0.9903 - val_loss: 0.0298
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 19ms/step - accuracy: 0.9937 - loss: 0.0199 - val_accuracy: 0.9897 - val_loss: 0.0313
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 20ms/step - accuracy: 0.9953 - loss: 0.0139 - val_accuracy: 0.9889 - val_loss: 0.0374
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.9853 - loss: 0.0464
Test accuracy: 0.9889


### Add Batch Normalization

In [2]:
from tensorflow.keras.layers import BatchNormalization

# Build improved CNN model with Batch Normalization
cnn_model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dense(10, activation='softmax')
])

# Compile and train model
cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

# Evaluate model
test_loss, test_acc = cnn_model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 36ms/step - accuracy: 0.9483 - loss: 0.1704 - val_accuracy: 0.9863 - val_loss: 0.0403
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 34ms/step - accuracy: 0.9876 - loss: 0.0402 - val_accuracy: 0.9874 - val_loss: 0.0410
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 37ms/step - accuracy: 0.9895 - loss: 0.0311 - val_accuracy: 0.9874 - val_loss: 0.0408
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 34ms/step - accuracy: 0.9934 - loss: 0.0206 - val_accuracy: 0.9867 - val_loss: 0.0429
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 34ms/step - accuracy: 0.9946 - loss: 0.0175 - val_accuracy: 0.9907 - val_loss: 0.0307
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9884 - loss: 0.0398
Test accuracy: 0.9907


### Add Dropout for Regularization

In [3]:
from tensorflow.keras.layers import Dropout

# Build CNN model with Dropout
cnn_model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),  # Dropout added

    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),  # Dropout added

    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),  # Dropout added
    Dense(10, activation='softmax')
])

# Compile and train model
cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

# Evaluate model
test_loss, test_acc = cnn_model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 37ms/step - accuracy: 0.8727 - loss: 0.4229 - val_accuracy: 0.9842 - val_loss: 0.0510
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 43ms/step - accuracy: 0.9716 - loss: 0.0970 - val_accuracy: 0.9870 - val_loss: 0.0386
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 37ms/step - accuracy: 0.9763 - loss: 0.0759 - val_accuracy: 0.9886 - val_loss: 0.0348
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 35ms/step - accuracy: 0.9802 - loss: 0.0663 - val_accuracy: 0.9881 - val_loss: 0.0354
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 35ms/step - accuracy: 0.9820 - loss: 0.0589 - val_accuracy: 0.9906 - val_loss: 0.0281
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9881 - loss: 0.0336 
Test accuracy: 0.9906


### Use Data Augmentation & Learning Rate Scheduler

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras import layers, models

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize the pixel values (0-1 range)
x_train, x_test = x_train / 255.0, x_test / 255.0

# Reshape for CNN input (28x28x1)
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Convert labels to categorical (one-hot encoding)
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)

# Fit the generator
datagen.fit(x_train)

# Define CNN model
cnn_model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    
    layers.Conv2D(64, (3, 3), activation='relu'),
    
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# **Compile the model**
cnn_model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

# Define learning rate scheduler function
def lr_schedule(epoch):
    initial_lr = 0.001  # Initial learning rate
    decay_factor = 0.5  # Reduce learning rate by 50%
    drop_every = 2  # Drop learning rate every 2 epochs
    
    new_lr = initial_lr * (decay_factor ** (epoch // drop_every))
    print(f"Epoch {epoch+1}: Learning Rate = {new_lr:.6f}")
    return new_lr

# Create LearningRateScheduler callback
lr_scheduler = LearningRateScheduler(lr_schedule)

# Train the model with augmented data and learning rate scheduler
cnn_model.fit(
    datagen.flow(x_train, y_train, batch_size=32),
    epochs=5,
    validation_data=(x_test, y_test),
    callbacks=[lr_scheduler]  # Add learning rate scheduler
)

# Evaluate model
test_loss, test_acc = cnn_model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")


Epoch 1: Learning Rate = 0.001000
Epoch 1/5


  self._warn_if_super_not_called()


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 33ms/step - accuracy: 0.8084 - loss: 0.5758 - val_accuracy: 0.9848 - val_loss: 0.0459 - learning_rate: 0.0010
Epoch 2: Learning Rate = 0.001000
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 35ms/step - accuracy: 0.9689 - loss: 0.0963 - val_accuracy: 0.9887 - val_loss: 0.0350 - learning_rate: 0.0010
Epoch 3: Learning Rate = 0.000500
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 33ms/step - accuracy: 0.9815 - loss: 0.0584 - val_accuracy: 0.9916 - val_loss: 0.0262 - learning_rate: 5.0000e-04
Epoch 4: Learning Rate = 0.000500
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 33ms/step - accuracy: 0.9838 - loss: 0.0531 - val_accuracy: 0.9918 - val_loss: 0.0261 - learning_rate: 5.0000e-04
Epoch 5: Learning Rate = 0.000250
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 33ms/step - accuracy: 0.9870 - 

### Try Different Optimizers - SGD

In [7]:
##SDG final
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize the pixel values (0-1 range)
x_train, x_test = x_train / 255.0, x_test / 255.0

# Reshape for CNN input (28x28x1)
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)


# Build CNN model with Dropout and Batch Normalization
cnn_model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),

    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),

    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(10, activation='softmax')
])


# Compile and train the model with SGD optimizer
cnn_model.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn_model.fit(datagen.flow(x_train, y_train, batch_size=32), epochs=10, validation_data=(x_test, y_test))

# Learning rate reduction when validation loss stops improving
#lr_scheduler = ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.5, verbose=1)

# Train the model with data augmentation and learning rate scheduler
#cnn_model.fit(datagen.flow(x_train, y_train, batch_size=32), epochs=10, validation_data=(x_test, y_test), callbacks=[lr_scheduler])

# Evaluate model
test_loss, test_acc = cnn_model.evaluate(x_test, y_test)
print(f"Final Test Accuracy: {test_acc:.4f}")
#print(f"Final Test Loss: {test_loss:.4f}")

# Compile and train model
#cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
#cnn_model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

# Evaluate model
#test_loss, test_acc = cnn_model.evaluate(x_test, y_test)
#print(f"Test accuracy: {test_acc:.4f}")

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 50ms/step - accuracy: 0.7816 - loss: 0.6926 - val_accuracy: 0.9777 - val_loss: 0.0717
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 45ms/step - accuracy: 0.9281 - loss: 0.2309 - val_accuracy: 0.9879 - val_loss: 0.0364
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 44ms/step - accuracy: 0.9442 - loss: 0.1816 - val_accuracy: 0.9892 - val_loss: 0.0308
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 52ms/step - accuracy: 0.9523 - loss: 0.1553 - val_accuracy: 0.9923 - val_loss: 0.0240
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 54ms/step - accuracy: 0.9571 - loss: 0.1412 - val_accuracy: 0.9909 - val_loss: 0.0282
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 57ms/step - accuracy: 0.9600 - loss: 0.1293 - val_accuracy: 0.9913 - val_loss: 0.0240
Ep

### Optimized CNN Model

In [6]:
# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize the pixel values (0-1 range)
x_train, x_test = x_train / 255.0, x_test / 255.0

# Reshape for CNN input (28x28x1)
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Define optimized CNN model
cnn_model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),

    Conv2D(64, (3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),
    Dropout(0.25),

    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

# Use SGD optimizer with momentum
optimizer = SGD(learning_rate=0.01, momentum=0.9)

# Compile model
cnn_model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)
datagen.fit(x_train)

# Learning Rate Scheduler
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', patience=3, factor=0.5, verbose=1)

# Train the model
cnn_model.fit(datagen.flow(x_train, y_train, batch_size=32), epochs=15, validation_data=(x_test, y_test), callbacks=[lr_scheduler])

# Evaluate the model
test_loss, test_acc = cnn_model.evaluate(x_test, y_test)
print(f"Optimized Model Test Accuracy: {test_acc:.4f}")


Epoch 1/15
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 52ms/step - accuracy: 0.7797 - loss: 0.7085 - val_accuracy: 0.9866 - val_loss: 0.0427 - learning_rate: 0.0100
Epoch 2/15
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 54ms/step - accuracy: 0.9289 - loss: 0.2274 - val_accuracy: 0.9868 - val_loss: 0.0373 - learning_rate: 0.0100
Epoch 3/15
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 59ms/step - accuracy: 0.9438 - loss: 0.1858 - val_accuracy: 0.9887 - val_loss: 0.0338 - learning_rate: 0.0100
Epoch 4/15
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 54ms/step - accuracy: 0.9506 - loss: 0.1607 - val_accuracy: 0.9877 - val_loss: 0.0401 - learning_rate: 0.0100
Epoch 5/15
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 50ms/step - accuracy: 0.9548 - loss: 0.1508 - val_accuracy: 0.9910 - val_loss: 0.0259 - learning_rate: 0.0100
Epoch 6/15
[1m1875/1875[0m [32m━━━━━━━━━━━━