In [None]:
import time
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.optimizers import Adam
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

NUM_CLASSES = 10
y_train_encoded = to_categorical(y_train, NUM_CLASSES)
y_test_encoded = to_categorical(y_test, NUM_CLASSES)

print(f"Training data shape: {x_train.shape}")

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step
Training data shape: (50000, 32, 32, 3)


In [None]:
class EpochMetricsCallback(Callback):
    def on_train_begin(self, logs=None):
        self.epoch_times = []
        self.metrics_history = []
        print("\nEpoch | Train Loss | Train Acc | Time (s)")
        print("------|------------|-----------|---------")

    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_start_time = time.time()

    def on_epoch_end(self, epoch, logs=None):
        epoch_time = time.time() - self.epoch_start_time
        self.epoch_times.append(epoch_time)

        train_loss = logs.get('loss')
        train_acc = logs.get('accuracy')
        val_loss = logs.get('val_loss')
        val_acc = logs.get('val_accuracy')

        print(f"{epoch+1:5d} | {train_loss:.4f}     | {train_acc:.4f}    | "
              f" {epoch_time:.2f}")

In [11]:
#1a.
model = Sequential([
    Conv2D(32, (3,3), activation='relu', padding="same", input_shape=x_train.shape[1:]),
    MaxPooling2D((2,2)),

    Conv2D(64, (3,3), activation='relu', padding="same"),
    MaxPooling2D((2,2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dense(NUM_CLASSES, activation='softmax')
])

optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

print("Total params:", model.count_params())

EPOCHS = 300
BATCH_SIZE = 128
VAL_SPLIT = 0.2

metrics_callback = EpochMetricsCallback()

total_train_start = time.time()
history = model.fit(
    x_train, y_train_encoded,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_split=VAL_SPLIT,
    verbose=0,
    callbacks=[metrics_callback]
)
total_train_time = time.time() - total_train_start
print(f"Total Training Time: {total_train_time:.2f} seconds")

# Evaluate
test_loss, test_acc = model.evaluate(x_test, y_test_encoded, verbose=0)
print(f"Final Test Accuracy: {test_acc:.4f}")

Total params: 545098

Epoch | Train Loss | Train Acc | Time (s)
------|------------|-----------|---------


KeyboardInterrupt: 

In [9]:

#1b
model = Sequential([

    Conv2D(32, (3,3), activation='relu', input_shape=x_train.shape[1:]),
    MaxPooling2D((2,2)),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),

    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D((2,2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dense(NUM_CLASSES, activation='softmax')
])
print("Total params:", model.count_params())
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])



epochs = 300
batch_size = 64

metrics_callback = EpochMetricsCallback()

print("\nStarting training...\n")

start = time.time()
history = model.fit(
    x_train, y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_split=0.2,
    callbacks=[metrics_callback],
    verbose=0   # <-- your callback handles printing
)
end = time.time()

print("\nTotal training time (seconds):", end - start)

test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print("Final test accuracy:", test_acc)


Total params: 160202

Starting training...


Epoch | Train Loss | Train Acc | Time (s)
------|------------|-----------|---------


KeyboardInterrupt: 

In [10]:
#2a.
def residual_block(x, filters, downsample=False):
    shortcut = x

    stride = 2 if downsample else 1

    # First conv
    x = layers.Conv2D(filters, kernel_size=3, strides=stride, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    # Second conv
    x = layers.Conv2D(filters, kernel_size=3, padding='same')(x)
    x = layers.BatchNormalization()(x)

    # Match dimensions for skip connection
    if downsample or shortcut.shape[-1] != filters:
        shortcut = layers.Conv2D(filters, kernel_size=1, strides=stride)(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)

    # Add skip connection
    x = layers.Add()([x, shortcut])
    x = layers.Activation('relu')(x)

    return x

def build_resnet10(input_shape=(32, 32, 3), num_classes=10):

    inputs = layers.Input(shape=input_shape)

    # Initial conv
    x = layers.Conv2D(32, kernel_size=3, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)

    for _ in range(3):
        x = residual_block(x, 32, downsample=False)

    # Group 2 – 64 filters (3 blocks, first block downsamples)
    x = residual_block(x, 64, downsample=True)
    for _ in range(2):
        x = residual_block(x, 64)

    # Group 3 – 128 filters (2 blocks, first block downsamples)
    x = residual_block(x, 128, downsample=True)
    x = residual_block(x, 128)

    # Group 4 – 256 filters (2 blocks, first block downsamples)
    x = residual_block(x, 256, downsample=True)
    x = residual_block(x, 256)

    # Final layers
    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model

model = build_resnet10()
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
print("Total params:", model.count_params())

EPOCHS = 300
BATCH_SIZE = 64
metrics_callback = EpochMetricsCallback()

print("\nStarting ResNet-10 training...\n")

start = time.time()
history = model.fit(
    x_train, y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_split=0.2,
    callbacks=[metrics_callback],
    verbose=0
)
end = time.time()

print("\nTotal training time (seconds):", end - start)

test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print("Final test accuracy:", test_acc)

Total params: 2897930

Starting ResNet-10 training...


Epoch | Train Loss | Train Acc | Time (s)
------|------------|-----------|---------


KeyboardInterrupt: 

In [12]:
#2b. Weight Decay
import tensorflow as tf
from tensorflow.keras import layers, regularizers
import time

# ---- ResNet-10 with L2 Weight Decay ----
def build_resnet10_weight_decay(input_shape=(32,32,3), num_classes=10, wd=0.001):

    def residual_block_wd(x, filters, downsample=False):
        shortcut = x
        stride = 2 if downsample else 1

        x = layers.Conv2D(filters, 3, strides=stride, padding='same',
                          kernel_regularizer=regularizers.l2(wd))(x)
        x = layers.BatchNormalization()(x)
        x = layers.ReLU()(x)

        x = layers.Conv2D(filters, 3, padding='same',
                          kernel_regularizer=regularizers.l2(wd))(x)
        x = layers.BatchNormalization()(x)

        if downsample or shortcut.shape[-1] != filters:
            shortcut = layers.Conv2D(filters, 1, strides=stride,
                                     kernel_regularizer=regularizers.l2(wd))(shortcut)
            shortcut = layers.BatchNormalization()(shortcut)

        x = layers.Add()([x, shortcut])
        x = layers.ReLU()(x)
        return x

    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(32, 3, padding='same',
                      kernel_regularizer=regularizers.l2(wd))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # 10 blocks: 3 → 3 → 2 → 2
    for _ in range(3):
        x = residual_block_wd(x, 32)

    x = residual_block_wd(x, 64, downsample=True)
    for _ in range(2):
        x = residual_block_wd(x, 64)

    x = residual_block_wd(x, 128, downsample=True)
    x = residual_block_wd(x, 128)

    x = residual_block_wd(x, 256, downsample=True)
    x = residual_block_wd(x, 256)

    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    return tf.keras.Model(inputs, outputs)



# ---- Train Weight Decay Version ----
model_wd = build_resnet10_weight_decay()

model_wd.compile(optimizer=tf.keras.optimizers.Adam(),
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])

print("\nTraining ResNet-10 + Weight Decay λ=0.001\n")

metrics_wd = EpochMetricsCallback()
start = time.time()

history_wd = model_wd.fit(
    x_train, y_train,
    epochs=300,
    batch_size=64,
    validation_split=0.2,
    callbacks=[metrics_wd],
    verbose=0
)

end = time.time()
print("Total training time (Weight Decay):", end - start)

wd_test_loss, wd_test_acc = model_wd.evaluate(x_test, y_test, verbose=0)
print("Final Test Accuracy (Weight Decay):", wd_test_acc)


Training ResNet-10 + Weight Decay λ=0.001


Epoch | Train Loss | Train Acc | Time (s)
------|------------|-----------|---------
    1 | 2.7969     | 0.5186    |  37.42
    2 | 1.6103     | 0.6774    |  7.11
    3 | 1.3016     | 0.7285    |  7.55
    4 | 1.1725     | 0.7531    |  7.45
    5 | 1.1103     | 0.7700    |  7.41
    6 | 1.0679     | 0.7841    |  7.55
    7 | 1.0308     | 0.7942    |  7.47
    8 | 0.9923     | 0.8065    |  7.44
    9 | 0.9615     | 0.8183    |  7.45
   10 | 0.9457     | 0.8206    |  7.47
   11 | 0.9107     | 0.8336    |  7.41
   12 | 0.8866     | 0.8404    |  7.40
   13 | 0.8687     | 0.8459    |  7.29
   14 | 0.8429     | 0.8555    |  7.47
   15 | 0.8344     | 0.8590    |  7.39
   16 | 0.8135     | 0.8653    |  7.48
   17 | 0.7983     | 0.8687    |  7.39
   18 | 0.7862     | 0.8743    |  7.41
   19 | 0.7722     | 0.8773    |  7.33
   20 | 0.7613     | 0.8800    |  7.38
   21 | 0.7489     | 0.8858    |  7.38
   22 | 0.7408     | 0.8881    |  7.43
   23 | 0.72

In [None]:
#2b. Dropout
def build_resnet10_dropout(input_shape=(32,32,3), num_classes=10, p=0.3):

    def residual_block_do(x, filters, downsample=False):
        shortcut = x
        stride = 2 if downsample else 1

        x = layers.Conv2D(filters, 3, strides=stride, padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.ReLU()(x)

        x = layers.Conv2D(filters, 3, padding='same')(x)
        x = layers.BatchNormalization()(x)

        if downsample or shortcut.shape[-1] != filters:
            shortcut = layers.Conv2D(filters, 1, strides=stride)(shortcut)
            shortcut = layers.BatchNormalization()(shortcut)

        x = layers.Add()([x, shortcut])
        x = layers.ReLU()(x)

        # Dropout here
        x = layers.Dropout(p)(x)

        return x

    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(32, 3, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # 10 blocks: 3 → 3 → 2 → 2
    for _ in range(3):
        x = residual_block_do(x, 32)

    x = residual_block_do(x, 64, downsample=True)
    for _ in range(2):
        x = residual_block_do(x, 64)

    x = residual_block_do(x, 128, downsample=True)
    x = residual_block_do(x, 128)

    x = residual_block_do(x, 256, downsample=True)
    x = residual_block_do(x, 256)

    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    return tf.keras.Model(inputs, outputs)


# ---- Train Dropout Version ----
model_do = build_resnet10_dropout()

model_do.compile(optimizer=tf.keras.optimizers.Adam(),
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])

print("\nTraining ResNet-10 + Dropout p=0.3\n")

metrics_do = EpochMetricsCallback()
start = time.time()

history_do = model_do.fit(
    x_train, y_train,
    epochs=300,
    batch_size=64,
    validation_split=0.2,
    callbacks=[metrics_do],
    verbose=0
)

end = time.time()
print("Total training time (Dropout):", end - start)

do_test_loss, do_test_acc = model_do.evaluate(x_test, y_test, verbose=0)
print("Final Test Accuracy (Dropout):", do_test_acc)