# MNIST Handwritten Digits Classification using CNN

This notebook implements a Convolutional Neural Network (CNN) to classify handwritten digits from the MNIST dataset. Our goal is to achieve >95% test accuracy.

In [None]:
# Import required libraries
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt 

# For reproducibility
np.random.seed(42)
tf.random.set_seed(42)

: 

In [None]:
# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize pixel values
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Reshape data to include channel dimension
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Convert labels to one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

print("Training data shape:", x_train.shape)
print("Testing data shape:", x_test.shape)

: 

In [4]:
# Define the CNN model
model = tf.keras.Sequential([
    # First Convolutional Block
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    # Second Convolutional Block
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    # Third Convolutional Block
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    
    # Flatten and Dense layers
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Display model summary
model.summary()

NameError: name 'tf' is not defined

In [5]:
# Train the model
history = model.fit(x_train, y_train,
                   batch_size=128,
                   epochs=15,
                   validation_split=0.2,
                   verbose=1)

NameError: name 'model' is not defined

In [6]:
# Plot training history
plt.figure(figsize=(12, 4))

# Plot training & validation accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.show()

NameError: name 'plt' is not defined

In [None]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=0)
print(f"\nTest accuracy: {test_accuracy:.4f}")
print(f"Test loss: {test_loss:.4f}")

In [None]:
# Function to visualize predictions
def visualize_predictions(model, x_test, y_test, num_images=5):
    # Get random sample indices
    indices = np.random.choice(len(x_test), num_images, replace=False)
    
    # Make predictions
    predictions = model.predict(x_test[indices])
    predicted_labels = np.argmax(predictions, axis=1)
    true_labels = np.argmax(y_test[indices], axis=1)
    
    # Plot the images and predictions
    plt.figure(figsize=(15, 3))
    for i in range(num_images):
        plt.subplot(1, num_images, i + 1)
        plt.imshow(x_test[indices[i]].reshape(28, 28), cmap='gray')
        plt.title(f'Pred: {predicted_labels[i]}\nTrue: {true_labels[i]}')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Visualize 5 sample predictions
visualize_predictions(model, x_test, y_test)