In [None]:
# --- 1. Import necessary libraries ---
# TensorFlow is the core library for building and training neural networks.
import tensorflow as tf

In [None]:
# numpy is used for numerical operations, especially with arrays.
import numpy as np

# matplotlib is used to visualize the image for our prediction.
import matplotlib.pyplot as plt

print("--- TensorFlow and Libraries Loaded ---")
print(f"TensorFlow version: {tf.__version__}")
print("-" * 35)

In [None]:
# --- 2. Load the MNIST dataset ---
# The dataset is conveniently available within Keras and is split into
# training and testing sets automatically.
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

print("\n--- Dataset Loaded ---")
print(f"Training data shape (images): {x_train.shape}")
print(f"Training data shape (labels): {y_train.shape}")
print(f"Testing data shape (images):  {x_test.shape}")
print(f"Testing data shape (labels):  {y_test.shape}")
print("-" * 35)

In [None]:
# --- 3. Preprocess the data ---
# Deep learning models work best with normalized data.
# We'll normalize the pixel values from the range [0, 255] to [0, 1].
x_train = x_train / 255.0
x_test = x_test / 255.0

In [None]:
# CNNs expect a 4-dimensional input: (samples, height, width, channels).
# Our images are currently 3D: (samples, height, width).
# We'll add a channel dimension for grayscale (1 channel).
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

In [None]:
# The labels (y_train, y_test) are single integers (e.g., 5).
# We need to one-hot encode them for multi-class classification.
# For example, the digit 5 becomes a vector [0, 0, 0, 0, 0, 1, 0, 0, 0, 0].
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

print("\n--- Data Preprocessing Complete ---")
print(f"Reshaped training images shape: {x_train.shape}")
print(f"One-hot encoded training labels shape: {y_train.shape}")
print("-" * 35)

In [None]:
# --- 4. Build the Convolutional Neural Network (CNN) model ---
# A sequential model is a linear stack of layers.
model = tf.keras.models.Sequential([
    # First convolutional layer with 32 filters, a 3x3 kernel,
    # and ReLU activation. The input shape is specified for the first layer.
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    # Max pooling layer to downsample the feature maps.
    tf.keras.layers.MaxPooling2D((2, 2)),

    # Second convolutional layer with 64 filters.
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    # Another max pooling layer.
    tf.keras.layers.MaxPooling2D((2, 2)),

    # Flatten the 2D feature maps to a 1D vector to feed into the dense layers.
    tf.keras.layers.Flatten(),

    # Dense hidden layer with 128 units and ReLU activation.
    tf.keras.layers.Dense(128, activation='relu'),
    # Dropout layer to help prevent overfitting.
    tf.keras.layers.Dropout(0.5),

    # Output layer with 10 units (for digits 0-9) and softmax activation.
    # Softmax ensures the outputs sum to 1, representing probabilities.
    tf.keras.layers.Dense(10, activation='softmax')
])


In [None]:
print("\n--- Model Architecture Defined ---")
model.summary()
print("-" * 35)
