In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Load and preprocess data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
X_test = X_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [5]:
# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1
)
datagen.fit(X_train)

In [6]:
# Model Architecture
def build_model():
    model = models.Sequential()
    model.add(layers.Input(shape=(28, 28, 1)))

    # Conv Block 1
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.25))

    # Conv Block 2
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.25))

    # Dense Block
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(10, activation='softmax'))

    return model

model = build_model()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [7]:
# Callbacks
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3),
    EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
]

from sklearn.model_selection import train_test_split

# Manually split into train and validation sets
X_train_new, X_val, y_train_new, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

history = model.fit(X_train_new, y_train_new,
                    validation_data=(X_val, y_val),
                    batch_size=64,
                    epochs=20,
                    callbacks=callbacks,
                    verbose=2)


Epoch 1/20
844/844 - 28s - 33ms/step - accuracy: 0.9599 - loss: 0.4422 - val_accuracy: 0.9775 - val_loss: 0.2267 - learning_rate: 1.0000e-03
Epoch 2/20
844/844 - 9s - 11ms/step - accuracy: 0.9806 - loss: 0.1848 - val_accuracy: 0.9765 - val_loss: 0.1886 - learning_rate: 1.0000e-03
Epoch 3/20
844/844 - 10s - 12ms/step - accuracy: 0.9832 - loss: 0.1731 - val_accuracy: 0.9812 - val_loss: 0.1802 - learning_rate: 1.0000e-03
Epoch 4/20
844/844 - 11s - 13ms/step - accuracy: 0.9831 - loss: 0.1821 - val_accuracy: 0.9907 - val_loss: 0.1571 - learning_rate: 1.0000e-03
Epoch 5/20
844/844 - 9s - 11ms/step - accuracy: 0.9857 - loss: 0.1624 - val_accuracy: 0.9865 - val_loss: 0.1515 - learning_rate: 1.0000e-03
Epoch 6/20
844/844 - 9s - 11ms/step - accuracy: 0.9861 - loss: 0.1622 - val_accuracy: 0.9887 - val_loss: 0.1569 - learning_rate: 1.0000e-03
Epoch 7/20
844/844 - 10s - 12ms/step - accuracy: 0.9872 - loss: 0.1541 - val_accuracy: 0.9877 - val_loss: 0.1646 - learning_rate: 1.0000e-03
Epoch 8/20
844/8

In [8]:
# Evaluation
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_acc:.4f}")

Test Accuracy: 0.9945


# MNIST Digit Classification
**Started with the Basics:**

Understood how to load the MNIST dataset, normalize pixel values (0 to 1), and reshape images into the correct format for CNNs (28, 28, 1).

**One-Hot Encoding:**

Learned that label vectors must be converted to one-hot encoding for use with categorical_crossentropy loss in multi-class classification.

**Simple Neural Networks (DNN):**

Began with fully connected dense layers using ReLU and softmax. Achieved ~98% accuracy but noticed overfitting and poor generalization.

**Introduced CNN Architecture:**

Switched to convolutional neural networks which extract spatial features better.

Used Conv2D, MaxPooling2D, and Flatten layers

Accuracy improved significantly (~99.1%)

**Added Regularization Techniques:**

Dropout layers were added after convolution and dense layers to reduce overfitting.

L2 Regularization was used in the dense layer to penalize large weights.

**Used Batch Normalization:**

Added after most layers to stabilize and accelerate training by normalizing activations.

Helped the model converge faster and improved accuracy.

**Applied Data Augmentation:**

Used ImageDataGenerator to introduce variability (rotations, shifts, zooms) into training data.

Improved robustness and generalization of the model (~99.4–99.5% accuracy).

**Used Learning Rate Scheduling:**

Added ReduceLROnPlateau callback to reduce learning rate when validation loss plateaued.

Helped in fine-tuning the model in later epochs.

**Implemented Early Stopping:**

Prevented overfitting by stopping training when validation loss stopped improving.

**Final Testing and Evaluation:**

Evaluated the trained model on the test set using model.evaluate()

Achieved high accuracy (~99.4%) without using ensembles or transfer learning.