In [1]:
# Step 1: Import all necessary libraries
import numpy as np                     # For numerical operations and array handling
import matplotlib.pyplot as plt         # For displaying images and graphs
import random                           # For selecting random test images
from tensorflow.keras.datasets import mnist        # For loading the MNIST dataset
from tensorflow.keras.utils import to_categorical  # For converting labels into one-hot format
from tensorflow.keras.models import Sequential     # For creating a linear (stacked) model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense  # CNN layers

In [2]:
# Step 2: Load the MNIST dataset (images of handwritten digits 0–9)
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

In [3]:
# Reshape to (28,28,1) because MNIST is grayscale
#The “-1” automatically adjusts the first dimension (number of samples).
#For training → 60,000 images → becomes (60000, 28, 28, 1)
#For testing → 10,000 images → becomes (10000, 28, 28, 1)
X_train = X_train.reshape(-1,28,28,1)
X_test = X_test.reshape(-1,28,28,1)

In [4]:
# Normalize (0–255 → 0–1)
X_train = X_train / 255.0
X_test = X_test / 255.0

In [5]:
# Step 5: Convert labels (0–9) into one-hot encoded form
# Example: 3 → [0,0,0,1,0,0,0,0,0,0]
Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)

In [6]:
print("Training data shape:",X_train.shape)
print("Testing data shape:", X_test.shape)

Training data shape: (60000, 28, 28, 1)
Testing data shape: (10000, 28, 28, 1)


In [7]:
# Step 6: Define the CNN model architecture
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),  # First conv layer - 32 filters, kernel size 3×3. Removes negative values, adds non-linearity.edges, corners, and simple textures.
    MaxPooling2D((2,2)),                                          # Downsample feature maps by 2×2.say, 28×28 → becomes 14×14.
    Conv2D(64, (3,3), activation='relu'),                         # Second conv layer shapes, textures, or parts of objects.
    MaxPooling2D((2,2)),                                          # Downsample again 14×14, it becomes 7×7.
    Flatten(),                                                    # Flatten to 1D vector 64×7×7 = 3136.
    Dense(64, activation='relu'),                                 # Hidden layer with 64 neurons Learns to combine the extracted image features into meaningful representations
    Dense(10, activation='softmax')                               
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# 3. Compile Model
model.compile(optimizer = 'adam',
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

In [9]:
# Step 8: Train the model using training data
# - epochs: number of training rounds
# - batch_size: number of samples processed before model updates weights
# - validation_data: test data used to check accuracy after each epoch
history = model.fit(X_train, Y_train,
                    epochs=5,
                    batch_size=64,
                    validation_data=(X_test, Y_test))

Epoch 1/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11ms/step - accuracy: 0.8832 - loss: 0.4086 - val_accuracy: 0.9839 - val_loss: 0.0499
Epoch 2/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11ms/step - accuracy: 0.9833 - loss: 0.0541 - val_accuracy: 0.9862 - val_loss: 0.0418
Epoch 3/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9884 - loss: 0.0369 - val_accuracy: 0.9899 - val_loss: 0.0314
Epoch 4/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.9918 - loss: 0.0263 - val_accuracy: 0.9901 - val_loss: 0.0282
Epoch 5/5
[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - accuracy: 0.9933 - loss: 0.0199 - val_accuracy: 0.9900 - val_loss: 0.0313


In [10]:
# 5. Evaluate Model
test_loss, test_acc = model.evaluate(X_test, Y_test)
print("Test Accuracy:", test_acc)
print("Test Loss:", test_loss)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9871 - loss: 0.0380
Test Accuracy: 0.9900000095367432
Test Loss: 0.031336214393377304


In [14]:
# Step 10: Make a prediction on a random test image
idx2 = random.randint(0, len(Y_test)-1)   # Select a random test image
plt.imshow(X_test[idx2].reshape(28,28), cmap='gray')  # Display the image
plt.title("Test Image")
plt.show()

# Predict the digit
y_pred = model.predict(X_test[idx2].reshape(1,28,28,1))  # Model prediction
predicted_label = np.argmax(y_pred)   # Digit with highest probability
true_label = np.argmax(Y_test[idx2])  # Actual correct label

print(f"Our model says it is: {predicted_label}")
print(f"Actual label was: {true_label}")
