# Importing necessary libraries

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt

In [None]:
# Step 1: Load and preprocess the dataset
# The MNIST dataset comes preloaded in Keras

Loading the MNIST Dataset:

The dataset consists of 28x28 grayscale images of handwritten digits (0-9).
We split the dataset into training and testing sets. The training set has 60,000 images and the test set has 10,000 images.

(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize the pixel values to the range [0, 1]

Preprocessing:

The pixel values of the images are originally between 0 and 255, so we normalize them by dividing by 255 to scale them to the range [0, 1]. This improves the performance of neural networks.

In [None]:
x_train = x_train / 255.0
x_test = x_test / 255.0

# Step 2: Build the model

In [None]:
model = models.Sequential([
    layers.Flatten(input_shape=(28, 28)),  # Flatten the 28x28 images into a 1D vector of 784 pixels
    layers.Dense(128, activation='relu'),  # Fully connected layer with 128 neurons and ReLU activation
    layers.Dropout(0.2),  # Dropout layer to prevent overfitting
    layers.Dense(10, activation='softmax')  # Output layer with 10 neurons (one for each digit) and softmax activation
])


Flatten(): The input images are 2D (28x28 pixels). We flatten them into 1D arrays of 784 elements to input into the neural network.

Dense(128, activation='relu'): This is a fully connected hidden layer with 128 neurons. We use the ReLU activation function to introduce non-linearity into the network.

Dropout(0.2): Dropout randomly disables 20% of the neurons during training, which helps prevent overfitting by ensuring the model doesn’t become overly dependent on specific neurons.

Dense(10, activation='softmax'): The output layer has 10 neurons (one for each digit 0-9). The softmax activation function ensures that the output is a probability distribution over the 10 possible classes.

# Step 3: Compile the model
# Use sparse_categorical_crossentropy because we have integer labels (0-9)

We compile the model using the Adam optimizer, which is a popular optimizer for deep learning.

The loss function is sparse_categorical_crossentropy, which is suitable for multi-class classification problems when the labels are integers.

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Step 4: Train the model

We train the model for 5 epochs using the training data and validate it using the test data. An epoch is one complete pass through the training data.
During training, the model updates its weights to minimize the loss.

history = model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))


# Step 5: Evaluate the model on the test set

After training, we evaluate the model on the test set to see how well it performs on unseen data. The test accuracy is displayed.

In [None]:
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f"\nTest accuracy: {test_acc}")

# Step 6: Make predictions

We use the trained model to predict the labels for the test images.
We display some test images along with their predicted and true labels for visual inspection.

In [None]:
predictions = model.predict(x_test)

# Step 7: Display some test images with their predicted labels

In [None]:
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(x_test[i], cmap=plt.cm.binary)
    predicted_label = tf.argmax(predictions[i]).numpy()
    true_label = y_test[i]
    plt.xlabel(f"Pred: {predicted_label}, True: {true_label}")
plt.show()