In [8]:
import tensorflow as tf
from tensorflow.keras import layers, models, datasets
from tensorflow.keras.utils import to_categorical

In [9]:
# Load and preprocess the MNIST dataset
def load_and_preprocess_data():
    (x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0  # Normalize the images to [0, 1] range

    # Reshape the data to include channel dimension (28, 28, 1)
    x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
    x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

    # Convert class vectors to binary class matrices (one-hot encoding)
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)

    return (x_train, y_train), (x_test, y_test)


In [10]:
# Define a residual block
def residual_block(x, filters, kernel_size=3, stride=1):
    shortcut = x
    x = layers.Conv2D(filters, kernel_size=kernel_size, strides=stride, padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(filters, kernel_size=kernel_size, strides=stride, padding='same')(x)
    x = layers.BatchNormalization()(x)

    if stride != 1:
        shortcut = layers.Conv2D(filters, kernel_size=1, strides=stride, padding='same')(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)

    x = layers.add([x, shortcut])
    x = layers.Activation('relu')(x)
    return x

In [11]:

# Define the ResNet model
def create_resnet_model(input_shape=(28, 28, 1), num_classes=10):
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(64, kernel_size=3, strides=1, padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)

    # Add residual blocks
    x = residual_block(x, filters=64)
    x = residual_block(x, filters=64)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, x)
    return model

In [12]:
# Compile and train the model
def train_model(model, x_train, y_train, x_test, y_test):
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.fit(x_train, y_train, epochs=30, batch_size=128, validation_data=(x_test, y_test))

    return model

In [13]:
# Main function to run the training
if __name__ == '__main__':
    (x_train, y_train), (x_test, y_test) = load_and_preprocess_data()
    model = create_resnet_model()
    trained_model = train_model(model, x_train, y_train, x_test, y_test)
    trained_model.evaluate(x_test, y_test, verbose=2)

Epoch 1/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 37ms/step - accuracy: 0.8113 - loss: 0.8758 - val_accuracy: 0.1326 - val_loss: 9.7944
Epoch 2/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 29ms/step - accuracy: 0.9830 - loss: 0.0972 - val_accuracy: 0.9722 - val_loss: 0.1070
Epoch 3/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 30ms/step - accuracy: 0.9874 - loss: 0.0587 - val_accuracy: 0.9438 - val_loss: 0.1851
Epoch 4/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 31ms/step - accuracy: 0.9887 - loss: 0.0469 - val_accuracy: 0.9802 - val_loss: 0.0658
Epoch 5/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - accuracy: 0.9907 - loss: 0.0382 - val_accuracy: 0.9775 - val_loss: 0.0765
Epoch 6/30
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 30ms/step - accuracy: 0.9918 - loss: 0.0317 - val_accuracy: 0.9785 - val_loss: 0.0719
Epoch 7/30
[1m4

In [14]:
model.save('resnet_mnist_model.h5')

# Evaluate the model and print the final accuracy
loss, accuracy = trained_model.evaluate(x_test, y_test, verbose=2)
print(f"Final test accuracy: {accuracy:.4f}")



313/313 - 1s - 3ms/step - accuracy: 0.9918 - loss: 0.0296
Final test accuracy: 0.9918
