In [7]:
import tensorflow as tf
from keras import layers, models, callbacks
import numpy as np
import matplotlib.pyplot as plt


In [8]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(-1,28,28,1).astype('float32') / 255.0
x_test  = x_test.reshape(-1,28,28,1).astype('float32') / 255.0

y_train_oh = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test_oh  = tf.keras.utils.to_categorical(y_test,  num_classes=10)


In [9]:
def residual_block(x, filters, dropout_rate=0.3):
    shortcut = x
    reg = tf.keras.regularizers.l2(0.001)

    if shortcut.shape[-1] != filters:
        shortcut = layers.Conv2D(filters, 1, padding='same', kernel_regularizer=reg)(shortcut)

    x = layers.Conv2D(filters, 3, padding='same', activation='relu', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv2D(filters, 3, padding='same', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)

    x = layers.Add()([x, shortcut])
    x = layers.Activation('relu')(x)
    x = layers.Dropout(dropout_rate)(x)

    return x


def build_cnn():
    inputs = tf.keras.Input(shape=(28, 28, 1))
    reg = tf.keras.regularizers.l2(0.001)

    x = layers.Conv2D(32, 3, padding='same', activation='relu', kernel_regularizer=reg)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D()(x)

    x = residual_block(x, 32)
    x = layers.MaxPooling2D()(x)

    x = residual_block(x, 64)
    x = layers.MaxPooling2D()(x)

    x = residual_block(x, 64)
    x = layers.MaxPooling2D()(x)

    x = layers.Flatten()(x)
    x = layers.Dense(64, activation='relu', kernel_regularizer=reg)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)

    outputs = layers.Dense(10, activation='softmax')(x)

    return models.Model(inputs, outputs)


In [10]:
# model = build_cnn()
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
# loss_fn = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05)

# model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

# early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
# lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 0.001 * 0.95**epoch)

# model.fit(
#     x_train, y_train_oh,
#     validation_split=0.1,
#     epochs=50,
#     batch_size=128,
#     callbacks=[lr_schedule, early_stop],
#     verbose=2
# )

In [11]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.95, nesterov=True)
lr_schedule1 = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1)
lr_schedule2 = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 0.01 * 0.95**epoch, verbose=1)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
loss_fn = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05)
regularizer = tf.keras.regularizers.L2(0.001)

model = build_cnn()
model.compile(optimizer=optimizer, loss=loss_fn , metrics=['accuracy'])

model.fit(x_train, y_train_oh, epochs=40, batch_size=512, validation_split=0.1, callbacks=[lr_schedule1, early_stop])


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 23: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 33: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 38: ReduceLROnPlateau reducing learning rate to 0.0012499999720603228.
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x16648e44fa0>

In [12]:
test_loss, test_acc = model.evaluate(x_test, y_test_oh, verbose=2)
print(f"\n✅ Test Accuracy: {test_acc*100:.4f}")


313/313 - 1s - loss: 0.3910 - accuracy: 0.9954 - 1s/epoch - 5ms/step

✅ Test Accuracy: 99.5400
