In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Function to preprocess a single image
def preprocess_image(image, target_size=(112, 112)):
    image = tf.expand_dims(image, axis=-1)  # Add channel dimension
    image = tf.image.grayscale_to_rgb(image)  # Convert to RGB
    image = tf.image.resize(image, target_size)  # Resize image
    return image / 255.0  # Normalize

# Convert dataset into TensorFlow Dataset objects for efficient streaming
batch_size = 32
target_size = (112, 112)

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.map(lambda x, y: (preprocess_image(x, target_size), tf.one_hot(y, 10)))
train_dataset = train_dataset.shuffle(buffer_size=10000).batch(batch_size).prefetch(tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.map(lambda x, y: (preprocess_image(x, target_size), tf.one_hot(y, 10)))
test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [2]:
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, ReLU, Add, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Lightweight ResNet Block
def resnet_block(x, filters, stride=1):
    shortcut = x

    # First convolution
    x = Conv2D(filters, (3, 3), strides=stride, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    # Second convolution
    x = Conv2D(filters, (3, 3), strides=1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    # Adjust shortcut dimensions if necessary
    if stride != 1 or x.shape[-1] != shortcut.shape[-1]:
        shortcut = Conv2D(filters, (1, 1), strides=stride, use_bias=False)(shortcut)
        shortcut = BatchNormalization()(shortcut)

    x = Add()([x, shortcut])
    x = ReLU()(x)
    return x

# Build Lightweight ResNet Model with Dropout and LR scheduler
def build_lightweight_resnet(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    # Initial Conv Layer
    x = Conv2D(32, (3, 3), strides=1, padding='same', use_bias=False)(inputs)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    # Residual Blocks
    x = resnet_block(x, 32)
    x = resnet_block(x, 64, stride=2)
    x = resnet_block(x, 128, stride=2)

    # Global Pooling and Dropout Layer
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)  # Added Dropout to prevent overfitting

    # Output Layer
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model

# Build and compile the model
input_shape = (112, 112, 3)
num_classes = 10
model = build_lightweight_resnet(input_shape, num_classes)

# Learning rate scheduler to reduce learning rate on plateau
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1, min_lr=1e-6)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [3]:
# Train the model with the LR scheduler
history = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=5,
    callbacks=[lr_scheduler]
)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 59ms/step - accuracy: 0.6548 - loss: 1.0611 - val_accuracy: 0.9669 - val_loss: 0.1664 - learning_rate: 0.0010
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 58ms/step - accuracy: 0.9420 - loss: 0.2062 - val_accuracy: 0.9571 - val_loss: 0.1455 - learning_rate: 0.0010
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 58ms/step - accuracy: 0.9608 - loss: 0.1408 - val_accuracy: 0.9556 - val_loss: 0.1450 - learning_rate: 0.0010
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 58ms/step - accuracy: 0.9635 - loss: 0.1231 - val_accuracy: 0.9720 - val_loss: 0.0933 - learning_rate: 0.0010
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 59ms/step - accuracy: 0.9710 - loss: 0.1008 - val_accuracy: 0.9787 - val_loss: 0.0731 - learning_rate: 0.0010


In [4]:
# Evaluate the model on the test dataset
final_loss, final_accuracy = model.evaluate(test_dataset, verbose=2)
print(f"Final Test Loss: {final_loss:.4f}")
print(f"Final Test Accuracy: {final_accuracy:.4f}")

# Save the model in TensorFlow's SavedModel format
saved_model_dir = "/content/lightweight_resnet_mnist.h5"
model.save(saved_model_dir)
print(f"Model saved to {saved_model_dir}. You can now download it manually.")


313/313 - 4s - 12ms/step - accuracy: 0.9787 - loss: 0.0731




Final Test Loss: 0.0731
Final Test Accuracy: 0.9787
Model saved to /content/lightweight_resnet_mnist.h5. You can now download it manually.
