Installing TensorFlow and numpy

Importing Libraries and Define Parameters.

Useing input dimension of 784 and  output dimension of 10. Along with two hidden layers

In [9]:
!pip install tensorflow



In [8]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import mnist

# Load the dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Output check:
print(f"Original x_train shape: {x_train.shape}")

Original x_train shape: (60000, 28, 28)


Normalization and flattening

In [9]:
# Convert to float32 and normalize
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

# Calculate input dimension
input_dim = x_train.shape[1] * x_train.shape[2]  # This will be 784

# Flatten the images (28x28 -> 784)
x_train_flattened = x_train.reshape((x_train.shape[0], input_dim))
x_test_flattened = x_test.reshape((x_test.shape[0], input_dim))

# Output check:
print(f"Flattened x_train shape: {x_train_flattened.shape}")

Flattened x_train shape: (60000, 784)


Preparting the target labels for the model

In [11]:
# Get the number of classes
num_classes = 10

# One-hot encode the labels
y_train_encoded = tf.keras.utils.to_categorical(y_train, num_classes)
y_test_encoded = tf.keras.utils.to_categorical(y_test, num_classes)

Defines the structure of the neural network

The input Accepts the flattened image.
The 1st hiddlen layer learns features from the input (ReLU)
The 2nd hidden layer learns more complex features (ReLU)
The output generates class probabilities (Softmax)

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

# ... build and compile your model ...

model = Sequential([
    Input(shape=(input_dim,), name='input_layer'),
    Dense(128, activation='relu', name='hidden_layer_1'),
    Dense(64, activation='relu', name='hidden_layer_2'),
    Dense(num_classes, activation='softmax', name='output_layer')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Starts the learnings process

Training the model using the model.fit() function to initiate the training look.

Using the following parameters:
x & y
Usinig 10 epoches so the model will iterate over the entire training dataset 10 times.
Batch size 32
validation split to help detect overfitting by monitoring a percentage of loss and accuracy during training.

In [13]:
# ... training code ...
history = model.fit(
    x_train_flattened,        
    y_train_encoded,          
    epochs=10,
    batch_size=32,
    validation_split=0.1
)

Epoch 1/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9241 - loss: 0.2557 - val_accuracy: 0.9628 - val_loss: 0.1217
Epoch 2/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.9676 - loss: 0.1062 - val_accuracy: 0.9727 - val_loss: 0.0959
Epoch 3/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9781 - loss: 0.0727 - val_accuracy: 0.9738 - val_loss: 0.0877
Epoch 4/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9826 - loss: 0.0549 - val_accuracy: 0.9755 - val_loss: 0.0910
Epoch 5/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9860 - loss: 0.0435 - val_accuracy: 0.9752 - val_loss: 0.0930
Epoch 6/10
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9888 - loss: 0.0340 - val_accuracy: 0.9782 - val_loss: 0.0817
Epoch 7/10
[1m1

#Compiling the model by defining the loss fucntion (categorical_crossentropy), the optimizer (adam), and the metrics (accuracy) to track. This ensures the model knows how to learn

At this point we have prepared the data, the model architecture, and trained the model. Now it's time to evaluate the performanc by feeling the testing images from x to the trained model and compare the model's prediction to the true testing labels which are y. Then report the final loss and accurary

In [14]:
# Evaluate the model on the test data
loss, accuracy = model.evaluate(x_test_flattened, y_test_encoded, verbose=0)

print(f"\n--- Evaluation Results ---")
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


--- Evaluation Results ---
Test Loss: 0.0948
Test Accuracy: 0.9779


Test case 1 to calculate the overall accuracy and loss across the entire test dataset.

In [15]:
print("--- Running Final Model Evaluation on the Entire Test Set ---")

# Evaluate the model on the test data
# verbose=0 keeps the output clean
loss, accuracy = model.evaluate(x_test_flattened, y_test_encoded, verbose=0)

print(f"\nEvaluation Results:")
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

--- Running Final Model Evaluation on the Entire Test Set ---

Evaluation Results:
Test Loss: 0.0948
Test Accuracy: 0.9779


Test case 2 to compare model's prediction to the actual label

In [2]:
import numpy as np
import matplotlib.pyplot as plt

# 1. Select a random index
random_index = np.random.randint(0, len(x_test_flattened))

# 2. Get the sample image and true label
# We use the original x_test for visualization (28x28) and y_test for the true label (integer)
sample_image = x_test[random_index]
true_label = y_test[random_index]

# 3. Prepare the image for prediction
# The model expects a batch of inputs, so we reshape the single flattened image: (784,) -> (1, 784)
input_for_model = x_test_flattened[random_index].reshape(1, 784)

# 4. Get the prediction
# model.predict returns the probability distribution (10 values)
prediction_probs = model.predict(input_for_model, verbose=0)

# 5. Determine the final prediction (the class with the highest probability)
predicted_label = np.argmax(prediction_probs)


# --- 6. Display Results ---
plt.figure(figsize=(6, 3))

# Display the image
plt.subplot(1, 2, 1)
plt.imshow(sample_image, cmap='gray')
plt.title(f"True Label: {true_label}")
plt.axis('off')

# Display the prediction bar chart
plt.subplot(1, 2, 2)
# np.arange(10) creates the 0-9 labels for the x-axis
plt.bar(np.arange(10), prediction_probs[0])
plt.xticks(np.arange(10))
plt.title(f"Model Prediction: {predicted_label}")
plt.xlabel("Digit Class")
plt.ylabel("Probability")
plt.tight_layout()
plt.show()

print(f"Prediction Details: The model predicted the digit {predicted_label} with a confidence of {prediction_probs[0][predicted_label]*100:.2f}%.")

NameError: name 'x_test_flattened' is not defined