In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist

# Load the data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 10us/step


In [2]:
# Normalize pixel values
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

# Reshape data to add a channel dimension (for CNN)
# -1 means 'keep the existing dimension size'
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Note: We don't need to one-hot encode the labels (y_train, y_test) 
# because we will use the `sparse_categorical_crossentropy` loss function.

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

model = Sequential([
    # 1. Convolutional Block
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    
    # 2. Transition to Classification
    Flatten(),
    
    # 3. Dense (Classification) Layers
    Dense(128, activation='relu'), # A hidden layer to learn complex patterns
    Dense(10, activation='softmax')  # Output layer: 10 neurons for digits 0-9
])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [4]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

print("Starting model training...")

# Training the model
history = model.fit(x_train, y_train, 
                    epochs=10,        # 10 epochs is usually sufficient for MNIST
                    batch_size=32,    # A common batch size
                    validation_data=(x_test, y_test))

print("Model training complete.")

Starting model training...
Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 13ms/step - accuracy: 0.9552 - loss: 0.1485 - val_accuracy: 0.9789 - val_loss: 0.0697
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 12ms/step - accuracy: 0.9840 - loss: 0.0516 - val_accuracy: 0.9846 - val_loss: 0.0450
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 13ms/step - accuracy: 0.9895 - loss: 0.0329 - val_accuracy: 0.9836 - val_loss: 0.0494
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 12ms/step - accuracy: 0.9935 - loss: 0.0207 - val_accuracy: 0.9833 - val_loss: 0.0496
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 12ms/step - accuracy: 0.9952 - loss: 0.0155 - val_accuracy: 0.9856 - val_loss: 0.0444
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 12ms/step - accuracy: 0.9970 - loss: 0.0100 - val_accuracy: 0.9

In [5]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy*100:.2f}%")

# Save the trained model
model.save('mnist_cnn_model.h5') 
print("Model saved as 'mnist_cnn_model.h5'")



Test Loss: 0.0610
Test Accuracy: 98.54%
Model saved as 'mnist_cnn_model.h5'
