Library Imports

In [None]:
import numpy as np
import seaborn as sns
import os, gdown
from matplotlib import pyplot as plt
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import confusion_matrix
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Input, Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam


NumPy Random Seed

In [None]:
# Set the seed for NumPy’s pseudo random number generator so its the same for the whole run
np.random.seed(20)

Downloading & Loading the SVHN Dataset

In [None]:

# ensure cache dir exists
cache_dir = "/tmp/svhn_data"
os.makedirs(cache_dir, exist_ok=True)

# download both train and test .mat files from google drive
gdown.download("https://drive.google.com/uc?id=1FSwCLJzPzL4ZmihRgjIkh3DImRwM0UHb",
               os.path.join(cache_dir, "train_32x32.mat"), quiet=False)
gdown.download("https://drive.google.com/uc?id=10E7i5m1HaGWZkte4P8WMouNoFj0dFHmv",
               os.path.join(cache_dir, "test_32x32.mat"), quiet=False)

# load into memory
train_raw = loadmat(os.path.join(cache_dir, "train_32x32.mat"))
test_raw  = loadmat(os.path.join(cache_dir, "test_32x32.mat"))


Load Images & Labels

In [None]:
# Convert image data into NumPy arrays
train_images = np.array(train_raw['X'])
test_images = np.array(test_raw['X'])

# Extract corresponding label arrays
train_labels = train_raw['y']
test_labels = test_raw['y']

In [None]:
# Original shape from .mat: (height, width, channels, num_images)
# Keras expects: (num_images, height, width, channels)
# So we move the last axis (–1) to the front (0)
train_images = np.moveaxis(train_images, -1, 0)  # (N, 32, 32, 3)
test_images  = np.moveaxis(test_images,  -1, 0)  # (M, 32, 32, 3)

# Print to verify: now (num_samples, height, width, channels)
print("Train images shape:", train_images.shape)
print("Test  images shape:", test_images.shape)

In [None]:
# Plot a random image and its label
plt.imshow(train_images[13529])
plt.show()

print('Label: ', train_labels[13529])

Data Type Conversion

In [None]:
# Cast image arrays to float64 to retain precision in operations like normalization
train_images = train_images.astype('float64')
test_images  = test_images.astype('float64')

# Cast label arrays to int64 so they’re recognized as integer classes by ML tools
train_labels = train_labels.astype('int64')
test_labels  = test_labels.astype('int64')

Normalize Pixel Values

In [None]:
# Before scaling, check pixel intensity range, should be 0–255
print(f"Min: {train_images.min()}, Max: {train_images.max()}")

# Scale all pixels to [0, 1] – this helps the network train faster
train_images /= 255.0
test_images  /= 255.0

Remap 10 Labels to 0

In [None]:
"""
In the SVHN dataset, the digit “0” is encoded as label 10.
We need to convert these to 0 so that labels run 0–9 as usual.
"""

# Replace all occurrences of 10 with 0 in both train and test label arrays
train_labels[train_labels == 10] = 0
test_labels[test_labels == 10]   = 0


One-Hot Encoder Labels

In [None]:
# Initialize one-hot encoder
lb = LabelBinarizer()

# Fit to training labels and convert to one-hot vectors
train_labels = lb.fit_transform(train_labels)

# Convert test labels using the fitted encoder
test_labels = lb.transform(test_labels)

Split into Train & Validation Sets

In [None]:
# 4000 samples was the maximum samples that could be selected due to memory issues in the github workspace
X_train, X_val, y_train, y_val = train_test_split(train_images[:4000], train_labels[:4000],
                                                  test_size=0.15, random_state=22)
# Hold out 15% for validation and used a fixed seed for reproducibility

Data Augmentation Setup

In [None]:
# initialize data augmenter with common transforms
datagen = ImageDataGenerator(
    rotation_range=8,         # rotate images up to ±8°
    zoom_range=[0.95, 1.05],  # zoom in/out by up to 5%
    height_shift_range=0.10,  # shift vertically by up to 10% of image height
    shear_range=0.15          # apply shear transformations up to 15%
)

In [None]:
# Before training: set up a list + callback to log the LR
lrs = []

class LRTLogger(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        # grab the current learning rate from the optimizer and append it
        lr = float(keras.backend.get_value(self.model.optimizer.learning_rate))
        lrs.append(lr)

Define the Auxiliary CNN Model

In [None]:
# Reset any existing Keras state (clear old layers, optimizers, etc.)
keras.backend.clear_session()

# Build a sequential model block by block
aux_model = keras.Sequential([
    # Block 1: two 32-filter conv layers, then downsample
    Conv2D(32, (3, 3), padding='same', activation='relu',
           input_shape=(32, 32, 3)),
    BatchNormalization(),
    Conv2D(32, (3, 3), padding='same', activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    # Block 2: two 64-filter conv layers, then downsample
    Conv2D(64, (3, 3), padding='same', activation='relu'),
    BatchNormalization(),
    Conv2D(64, (3, 3), padding='same', activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    # Block 3: two 128-filter conv layers, then downsample
    Conv2D(128, (3, 3), padding='same', activation='relu'),
    BatchNormalization(),
    Conv2D(128, (3, 3), padding='same', activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    # Global pooling replaces Flatten to cut activations drastically
    keras.layers.GlobalAveragePooling2D(),

    # Smaller dense head to further reduce parameters
    Dense(32, activation='relu'),
    Dropout(0.4),

    # Final classification layer (10 classes, softmax)
    Dense(10, activation='softmax')
])

# Learning rate schedule: 1e-4 × 10^(epoch/10)
lr_schedule = keras.callbacks.LearningRateScheduler(
    lambda epoch: 1e-4 * 10**(epoch / 10)
)

# Adam optimizer with AMSGrad variant
optimizer = Adam(learning_rate=1e-4, amsgrad=True)

# Compile model with categorical crossentropy and accuracy metric
aux_model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

lr_schedule = keras.callbacks.LearningRateScheduler(
              lambda epoch: 1e-4 * 10**(epoch / 10))
optimizer = keras.optimizers.Adam(learning_rate=1e-4, amsgrad=True)
aux_model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                 metrics=['accuracy'])

Train Auxiliary Model for LR Tuning

In [None]:
# Instantiate our callback to log the LR after each epoch
lr_logger = LRTLogger()

# Begin training
history = aux_model.fit(
    # Use our ImageDataGenerator for on-the-fly augmentation
    datagen.flow(X_train, y_train, batch_size=128),
    
    epochs=30,                 # Train for 30 epochs to sweep learning rates
    validation_data=(X_val, y_val),  # Evaluate on held-out validation set
    
    # Apply the LR schedule and record the actual LR values
    callbacks=[lr_schedule, lr_logger]
)

Plot LR vs Loss graph

In [None]:
# Plot training loss against learning rate 
plt.semilogx(lrs, history.history['loss'])
# Define plot range: x from min→max LR, y from 0→max loss
plt.axis([min(lrs), max(lrs), 0, max(history.history['loss'])])
plt.xlabel('Learning Rate')
plt.ylabel('Training Loss')
plt.title('Learning Rate vs. Loss')
plt.show()

Define the Main CNN Model

In [None]:
# Clear any previous Keras state
keras.backend.clear_session()

# Build the sequential CNN
model = keras.Sequential([
    # Conv block 1: 32 filters
    Conv2D(32, (3, 3), padding='same', activation='relu',
           input_shape=(32, 32, 3)),
    BatchNormalization(),
    Conv2D(32, (3, 3), padding='same', activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    # Conv block 2: 64 filters
    Conv2D(64, (3, 3), padding='same', activation='relu'),
    BatchNormalization(),
    Conv2D(64, (3, 3), padding='same', activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    # Conv block 3: 128 filters
    Conv2D(128, (3, 3), padding='same', activation='relu'),
    BatchNormalization(),
    Conv2D(128, (3, 3), padding='same', activation='relu'),
    MaxPooling2D((2, 2)),
    Dropout(0.3),

    # Reduce spatial dims via global average pooling
    keras.layers.GlobalAveragePooling2D(),

    # Dense head with 32 units
    Dense(32, activation='relu'),
    Dropout(0.4),

    # Output layer: 10 classes with softmax
    Dense(10, activation='softmax')
])

# Callback: stop training if val_loss doesn't improve for 8 epochs
early_stopping = keras.callbacks.EarlyStopping(patience=8)

# Optimizer: Adam with a higher learning rate
optimizer = Adam(learning_rate=1e-3, amsgrad=True)

# Callback: save only the best model to disk
model_checkpoint = keras.callbacks.ModelCheckpoint(
    "best_cnn.keras", save_best_only=True
)

# Compile the model with categorical crossentropy and accuracy metric
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


Showing the model

In [None]:
# Print the model as a table below
model.summary()

Train the Main Model for Final Predictions

In [None]:
# Fit the model on augmented data, with early stopping and best‐model checkpointing
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=128),  # augmented training batches
    epochs=60,                                       # train up to 60 epochs
    validation_data=(X_val, y_val),                  # evaluate on validation set
    callbacks=[early_stopping, model_checkpoint]     # stop early and save best weights
)

Extract Training & Validation data

In [None]:
# Evaluate train and validation accuracies and losses
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

train_loss = history.history['loss']
val_loss = history.history['val_loss']

Plot the Training and Validation accuracys and losses

In [None]:
# Visualize epochs vs. train and validation accuracies and losses
plt.figure(figsize=(20, 10))

plt.subplot(1, 2, 1)
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend()
plt.title('Epochs vs. Training and Validation Accuracy')
    
plt.subplot(1, 2, 2)
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend()
plt.title('Epochs vs. Training and Validation Loss')

plt.show()

Evaluate Model on Test Set

In [None]:
# Evaluate in batches of 16 samples as memory is low
test_loss, test_acc = model.evaluate(
    x=test_images,
    y=test_labels,
    batch_size=16,  
    verbose=0
)

print(f"Test accuracy: {test_acc:.4f}")
print(f"Test loss:     {test_loss:.4f}")




Generate Predictions & Decode Labels

In [None]:
# Get predictions and apply inverse transformation to the labels
y_pred = model.predict(X_train)

# Convert one-hot training labels back to integers
y_pred = lb.inverse_transform(y_pred, lb.classes_)
y_train = lb.inverse_transform(y_train, lb.classes_)

Plot Confusion Matrix

In [None]:
# compute confusion matrix for training set
matrix = confusion_matrix(y_train, y_pred, labels=lb.classes_)

# plot as heatmap
fig, ax = plt.subplots(figsize=(14,12))
sns.heatmap(matrix, annot=True, cmap='Greens', fmt='d', ax=ax)
ax.set(title='Confusion Matrix for Training Dataset',
       xlabel='Predicted label',
       ylabel='True label')
plt.show()

In [None]:
# Ignore the errors in the plots

np.seterr(all='ignore')

Build an Activation Model

In [None]:
# Find all your conv layers
conv_layers   = [l for l in model.layers if isinstance(l, Conv2D)]
layer_outputs = [l.output for l in conv_layers]

# Grab the input tensor from the very first layer
input_tensor = model.layers[0].input

# Build your activation model
activation_model = Model(inputs=input_tensor, outputs=layer_outputs)



Function that plots the convolutional filters

In [None]:
# Define a function which will plot the convolutional filters

def plot_convolutional_filters(img):
    # Add a batch dimension: (height, width, channels) → (1, h, w, c)
    img = np.expand_dims(img, axis=0)

    # Run the activation model to get feature maps from each conv layer
    activations = activation_model.predict(img)

    # Number of filter activations to display per row
    images_per_row = 9

    # Loop over each layer’s activations
    for layer_name, layer_activation in zip(layer_names, activations):
        # Total number of filters in this layer
        n_features = layer_activation.shape[-1]
        # Spatial size of each feature map (height = width)
        size = layer_activation.shape[1]
        # Number of rows needed in our display grid
        n_cols = n_features // images_per_row

        # Prepare a grid to hold all the filter images
        display_grid = np.zeros((size * n_cols, images_per_row * size))

        # Populate the grid with each filter’s activation map
        for col in range(n_cols):
            for row in range(images_per_row):
                # Extract the activation of one filter
                channel_image = layer_activation[
                    0,               # first (and only) image in batch
                    :,               # all rows
                    :,               # all cols
                    col * images_per_row + row  # specific filter index
                ]

                # Normalize the activation for better contrast
                channel_image -= channel_image.mean()
                channel_image /= (channel_image.std() + 1e-5)
                channel_image *= 64
                channel_image += 128
                channel_image = np.clip(channel_image, 0, 255).astype('uint8')

                # Place the processed image into the display grid
                display_grid[
                    col * size : (col + 1) * size,
                    row * size : (row + 1) * size
                ] = channel_image

        # Plot the grid for this layer
        scale = 1.0 / size
        plt.figure(figsize=(
            scale * display_grid.shape[1],
            scale * display_grid.shape[0]
        ))
        plt.title(layer_name)
        plt.grid(False)
        plt.imshow(display_grid, aspect='auto', cmap='plasma')

In [None]:
print("X_train.shape:", X_train.shape)
# selecting what image to show
img = X_train[41]
plt.imshow(img)
plt.axis("off")
plt.show()


Plotting with the convolutional filters

In [None]:
plot_convolutional_filters(img)
