# Import Required Libraries
Import necessary libraries such as numpy, tensorflow/keras, and matplotlib for visualization.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

import sys
sys.path.append('../')  # To import from parent directory

# Load the MNIST Dataset
Load the MNIST dataset using Keras datasets API and split into training and test sets.

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(f"Training samples: {X_train.shape[0]}, Test samples: {X_test.shape[0]}")

Training samples: 60000, Test samples: 10000


# Preprocess the Data
Normalize the image data and convert labels to categorical format suitable for classification.

In [3]:
# Normalize images to [0, 1] and flatten
X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0
X_train = X_train.reshape(-1, 28*28)
X_test = X_test.reshape(-1, 28*28)

# Convert labels to categorical (one-hot)
y_train_cat = to_categorical(y_train, 10)
y_test_cat = to_categorical(y_test, 10)

# Instantiate the MLP Model
Define and compile a Multi-Layer Perceptron (MLP) model using your custom implementation.

In [4]:
from models.mlp import MLP
from algorithms.sgd import SGD
from loss_functions.cross_entropy import cross_entropy_loss, cross_entropy_grad

# Define model dimensions
input_dim = 28*28
hidden_dim = 128
output_dim = 10

# Instantiate model and optimizer
mlp = MLP(input_dim, hidden_dim, output_dim)
optimizer = SGD(mlp, lr=0.1)

# Train the Model
Train the MLP model on the training data and monitor training metrics.

In [5]:
# Train the model
optimizer.train(X_train, y_train_cat, cross_entropy_loss, cross_entropy_grad, epochs=10, batch_size=128, verbose=True)

Epoch 1/10, Loss: 2.2064
  Forward pass time: 0.1881 seconds
  Backward pass time: 0.8821 seconds
  Model Forward Measurements:
fc1 - time: 0.1535334587097168
relu - time: 0.00441288948059082
fc2 - time: 0.009857177734375
softmax - time: 0.017409086227416992
  Model Backward Measurements:
fc1 - time: 0.24909710884094238
relu - time: 0.010662317276000977
fc2 - time: 0.02085709571838379
softmax - time: 0.5862538814544678
  Loss Forward Measurements:
cross_entropy - time: 0.01747751235961914
  Loss Backward Measurements:
cross_entropy - time: 0.008481502532958984
Epoch 2/10, Loss: 1.1591
  Forward pass time: 0.1957 seconds
  Backward pass time: 0.8817 seconds
  Model Forward Measurements:
fc1 - time: 0.15528202056884766
relu - time: 0.00479888916015625
fc2 - time: 0.009662628173828125
softmax - time: 0.017276525497436523
  Model Backward Measurements:
fc1 - time: 0.2501981258392334
relu - time: 0.011608362197875977
fc2 - time: 0.021292924880981445
softmax - time: 0.5837726593017578
  Loss

# Validate Model Performance on Test Set
Evaluate the trained model on the test set and display accuracy and loss metrics.

In [6]:
# Evaluate on test set
pred_probs, _ = mlp.forward(X_test)
pred_labels = np.argmax(pred_probs, axis=1)
true_labels = np.argmax(y_test_cat, axis=1)
accuracy = np.mean(pred_labels == true_labels)
test_loss, _ = cross_entropy_loss(pred_probs, y_test_cat)
print(f"Test Accuracy: {accuracy*100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")

Test Accuracy: 92.63%
Test Loss: 0.2817
