In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from sklearn.model_selection import train_test_split
import numpy as np

In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

x_train_part, x_val, y_train_part, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [8]:
# Data Augmentation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
aug = ImageDataGenerator(
    rotation_range=15,
    zoom_range=0.15,
    width_shift_range=0.15,
    height_shift_range=0.15,
    shear_range=10,
    horizontal_flip=False
)
aug.fit(x_train_part)

In [9]:
def build_strongest_model():
    model = models.Sequential([
        layers.Conv2D(64, (3,3), activation='relu', padding='same', input_shape=(28,28,1)),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3,3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2)),
        layers.Dropout(0.3),

        layers.Conv2D(128, (3,3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3,3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2)),
        layers.Dropout(0.35),

        layers.Conv2D(256, (3,3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3,3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2)),
        layers.Dropout(0.5),

        layers.Flatten(),
        layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.0005)),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(10, activation='softmax')
    ])
    return model

In [10]:
model = build_strongest_model()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [11]:
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True)

history = model.fit(
    aug.flow(x_train_part, y_train_part, batch_size=128),
    epochs=100,
    validation_data=(x_val, y_val),
    callbacks=[early_stop],
    verbose=2
)

test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f"Final Test Accuracy: {test_acc * 100:.2f}%")

Epoch 1/100
422/422 - 44s - 105ms/step - accuracy: 0.7602 - loss: 1.2159 - val_accuracy: 0.4725 - val_loss: 2.0580
Epoch 2/100
422/422 - 21s - 50ms/step - accuracy: 0.9393 - loss: 0.5887 - val_accuracy: 0.9872 - val_loss: 0.4158
Epoch 3/100
422/422 - 22s - 52ms/step - accuracy: 0.9589 - loss: 0.4832 - val_accuracy: 0.9893 - val_loss: 0.3722
Epoch 4/100
422/422 - 22s - 52ms/step - accuracy: 0.9688 - loss: 0.4119 - val_accuracy: 0.9893 - val_loss: 0.3226
Epoch 5/100
422/422 - 22s - 52ms/step - accuracy: 0.9743 - loss: 0.3521 - val_accuracy: 0.9892 - val_loss: 0.2823
Epoch 6/100
422/422 - 21s - 49ms/step - accuracy: 0.9775 - loss: 0.3007 - val_accuracy: 0.9895 - val_loss: 0.2417
Epoch 7/100
422/422 - 21s - 50ms/step - accuracy: 0.9790 - loss: 0.2586 - val_accuracy: 0.9905 - val_loss: 0.2027
Epoch 8/100
422/422 - 22s - 52ms/step - accuracy: 0.9813 - loss: 0.2198 - val_accuracy: 0.9905 - val_loss: 0.1752
Epoch 9/100
422/422 - 21s - 49ms/step - accuracy: 0.9831 - loss: 0.1820 - val_accuracy: