# Import Required Libraries
Import necessary libraries such as numpy, tensorflow/keras, and matplotlib for visualization.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

import sys
sys.path.append('../')  # To import from parent directory

2025-08-29 13:25:53.004479: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-29 13:25:53.040938: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-08-29 13:25:53.784937: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


# Load the MNIST Dataset
Load the MNIST dataset using Keras datasets API and split into training and test sets.

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(f"Training samples: {X_train.shape[0]}, Test samples: {X_test.shape[0]}")

Training samples: 60000, Test samples: 10000


# Preprocess the Data
Normalize the image data and convert labels to categorical format suitable for classification.

In [3]:
# Normalize images to [0, 1] and flatten
X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0
X_train = X_train.reshape(-1, 28*28)
X_test = X_test.reshape(-1, 28*28)

# Convert labels to categorical (one-hot)
y_train_cat = to_categorical(y_train, 10)
y_test_cat = to_categorical(y_test, 10)

# Instantiate the MLP Model
Define and compile a Multi-Layer Perceptron (MLP) model using your custom implementation.

In [4]:
from models.mlp import MLP
from algorithms.sgd import SGD
from loss_functions.cross_entropy import cross_entropy_loss, cross_entropy_grad

# Define model dimensions
input_dim = 28*28
hidden_dim = 128
output_dim = 10

# Instantiate model and optimizer
mlp = MLP(input_dim, hidden_dim, output_dim)
optimizer = SGD(mlp, lr=0.1)

# Train the Model
Train the MLP model on the training data and monitor training metrics.

In [5]:
# Train the model
optimizer.train(X_train, y_train_cat, cross_entropy_loss, cross_entropy_grad, epochs=10, batch_size=128, verbose=True)

Epoch 1/10, Loss: 2.2074
  Forward pass time: 2.4922 seconds
  Backward pass time: 5.5973 seconds
  Model Forward Measurements:
fc1 - time: 2.411245584487915
relu - time: 0.011876583099365234
fc2 - time: 0.02692890167236328
softmax - time: 0.03602290153503418
  Model Backward Measurements:
fc1 - time: 4.54232382774353
relu - time: 0.011155843734741211
fc2 - time: 0.03374814987182617
softmax - time: 0.9691014289855957
  Loss Forward Measurements:
cross_entropy - time: 0.0483708381652832
  Loss Backward Measurements:
cross_entropy - time: 0.011192798614501953
Epoch 2/10, Loss: 1.1844
  Forward pass time: 2.4604 seconds
  Backward pass time: 6.0179 seconds
  Model Forward Measurements:
fc1 - time: 2.3513565063476562
relu - time: 0.014025688171386719
fc2 - time: 0.027215003967285156
softmax - time: 0.056749582290649414
  Model Backward Measurements:
fc1 - time: 4.931543827056885
relu - time: 0.01647472381591797
fc2 - time: 0.034404754638671875
softmax - time: 0.9919476509094238
  Loss Forw

# Validate Model Performance on Test Set
Evaluate the trained model on the test set and display accuracy and loss metrics.

In [6]:
# Evaluate on test set
pred_probs, _ = mlp.forward(X_test)
pred_labels = np.argmax(pred_probs, axis=1)
true_labels = np.argmax(y_test_cat, axis=1)
accuracy = np.mean(pred_labels == true_labels)
test_loss, _ = cross_entropy_loss(pred_probs, y_test_cat)
print(f"Test Accuracy: {accuracy*100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")

Test Accuracy: 92.94%
Test Loss: 0.2794
