In [20]:
#
# This code comes from: https://www.kaggle.com/code/hojjatk/read-mnist-dataset
#
import matplotlib
matplotlib.use('TkAgg')
import numpy as np  # linear algebra
import struct
from array import array
from os.path import join
import random
import matplotlib.pyplot as plt


#
# MNIST Data Loader Class
#
class MnistDataloader(object):
    def __init__(self, training_images_filepath, training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath

    def read_images_labels(self, images_filepath, labels_filepath):
        labels = []
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())

        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())
        images = []
        for i in range(size):
            images.append([0] * rows * cols)
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images[i][:] = img

        return images, labels

    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (np.array(x_train), np.array(y_train)),(np.array(x_test), np.array(y_test))

#
# Set file paths based on added MNIST Datasets
#
input_path = '/Users/mubaraqolojo/Downloads/MLP/archive'
training_images_filepath = join(input_path, 'train-images.idx3-ubyte')
training_labels_filepath = join(input_path, 'train-labels.idx1-ubyte')
test_images_filepath = join(input_path, 't10k-images.idx3-ubyte')
test_labels_filepath = join(input_path, 't10k-labels.idx1-ubyte')

#
# Helper function to show a list of images with their relating titles
#
def show_images(images, title_texts):
    cols = 5
    rows = int(len(images)/cols) + 1
    plt.figure(figsize=(30,20))
    index = 1
    for x in zip(images, title_texts):
        image = x[0]
        title_text = x[1]
        plt.subplot(rows, cols, index)
        plt.imshow(image, cmap=plt.cm.gray)
        if (title_text != ''):
            plt.title(title_text, fontsize=15)
        index += 1
    plt.show()

#
# Load MINST dataset
#
mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath, test_images_filepath, test_labels_filepath)
(x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()


np.save('/Users/mubaraqolojo/Downloads/MLP/archive/mnist-train-x.npy', x_train.reshape(len(x_train), 784))
np.save('/Users/mubaraqolojo/Downloads/MLP/archive/mnist-train-y.npy', y_train)
np.save('/Users/mubaraqolojo/Downloads/MLP/archive/mnist-test-x.npy', x_test.reshape(len(x_test), 784))
np.save('/Users/mubaraqolojo/Downloads/MLP/archive/mnist-test-y.npy', y_test)

#
# Show some random training and test images
#
images_2_show = []
titles_2_show = []
for i in range(0, 10):
    r = random.randint(1, 60000)
    images_2_show.append(x_train[r])
    titles_2_show.append('training image [' + str(r) + '] = ' + str(y_train[r]))

for i in range(0, 5):
    r = random.randint(1, 10000)
    images_2_show.append(x_test[r])
    titles_2_show.append('test image [' + str(r) + '] = ' + str(y_test[r]))

show_images(images_2_show, titles_2_show)

In [21]:
#!/usr/bin/env python3

import numpy as np
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import struct
from array import array
from os.path import join
import random

# Import MLP classes from your mlp.py implementation.
# Ensure that your mlp.py includes the CrossEntropy loss and Softmax activation.
from mlp import Layer, MultilayerPerceptron, CrossEntropy, Relu, Softmax

#
# MNIST Data Loader Class
#
class MnistDataloader(object):
    def __init__(self, training_images_filepath, training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath

    def read_images_labels(self, images_filepath, labels_filepath):
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())

        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())
        images = []
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images.append(img)
        return images, labels

    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (np.array(x_train), np.array(y_train)), (np.array(x_test), np.array(y_test))

#
# Helper function to display a list of images with titles.
#
def show_images(images, title_texts):
    cols = 5
    rows = int(len(images) / cols) + 1
    plt.figure(figsize=(12, 12))
    index = 1
    for image, title_text in zip(images, title_texts):
        plt.subplot(rows, cols, index)
        plt.imshow(image, cmap=plt.cm.gray)
        plt.title(title_text, fontsize=10)
        plt.axis('off')
        index += 1
    plt.show()

#
# Helper function to one-hot encode label vectors.
#
def one_hot_encode(labels, num_classes=10):
    one_hot = np.zeros((len(labels), num_classes))
    one_hot[np.arange(len(labels)), labels] = 1
    return one_hot

def main():
    # Set file paths for MNIST dataset (adjust the input_path as needed)
    input_path = '/Users/mubaraqolojo/Downloads/MLP/archive'
    training_images_filepath = join(input_path, 'train-images.idx3-ubyte')
    training_labels_filepath = join(input_path, 'train-labels.idx1-ubyte')
    test_images_filepath = join(input_path, 't10k-images.idx3-ubyte')
    test_labels_filepath = join(input_path, 't10k-labels.idx1-ubyte')

    # Load MNIST dataset
    mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath,
                                       test_images_filepath, test_labels_filepath)
    (x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()
    
    # Preprocess images: flatten and normalize pixel values to [0, 1]
    x_train = x_train.reshape(len(x_train), 784).astype(np.float32) / 255.0
    x_test = x_test.reshape(len(x_test), 784).astype(np.float32) / 255.0
    
    # Ensure labels are integer type.
    y_train = np.array(y_train, dtype=np.int32)
    y_test = np.array(y_test, dtype=np.int32)
    
    # Convert labels to one-hot encoded vectors for training.
    y_train_onehot = one_hot_encode(y_train, num_classes=10)
    y_test_onehot = one_hot_encode(y_test, num_classes=10)
    
    # Split training data into training and validation sets (e.g., 90% train, 10% validation)
    num_train = int(0.9 * len(x_train))
    train_x, val_x = x_train[:num_train], x_train[num_train:]
    train_y, val_y = y_train_onehot[:num_train], y_train_onehot[num_train:]
    
    # Build an MLP for MNIST classification: 784 -> 128 -> 10
    layers = [
        Layer(fan_in=784, fan_out=128, activation_function=Relu()),
        Layer(fan_in=128, fan_out=10, activation_function=Softmax())
    ]
    mlp = MultilayerPerceptron(layers)
    
    # Use CrossEntropy loss for classification.
    loss_func = CrossEntropy()
    
    # Training parameters
    learning_rate = 1e-3
    batch_size = 64
    epochs = 20
    
    # Train the network
    train_losses, val_losses = mlp.train(
        train_x, train_y,
        val_x, val_y,
        loss_func=loss_func,
        learning_rate=learning_rate,
        batch_size=batch_size,
        epochs=epochs
    )
    
    # Evaluate on test set (disable dropout during evaluation)
    test_pred = mlp.forward(x_test, training=False)
    # Get predicted classes as the index with highest probability
    pred_classes = np.argmax(test_pred, axis=1)
    # Compute test accuracy
    accuracy = np.mean(pred_classes == y_test)
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    
    # Plot training and validation loss curves
    plt.figure(figsize=(8,6))
    plt.plot(range(1, epochs+1), train_losses, label='Train Loss')
    plt.plot(range(1, epochs+1), val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('MNIST: Training vs Validation Loss')
    plt.legend()
    plt.show()
    
    # Display some random test images along with true and predicted labels
    sample_indices = np.random.choice(len(x_test), 10, replace=False)
    sample_images = x_test[sample_indices].reshape(-1, 28, 28)
    sample_titles = [f"True: {y_test[i]}, Pred: {pred_classes[i]}" for i in sample_indices]
    show_images(sample_images, sample_titles)
    
if __name__ == "__main__":
    main()


Epoch [1/20] | Train Loss: 1.944908 | Val Loss: 1.582879
Epoch [2/20] | Train Loss: 1.383735 | Val Loss: 1.127302
Epoch [3/20] | Train Loss: 1.050952 | Val Loss: 0.868615
Epoch [4/20] | Train Loss: 0.860301 | Val Loss: 0.717082
Epoch [5/20] | Train Loss: 0.743411 | Val Loss: 0.620729
Epoch [6/20] | Train Loss: 0.665682 | Val Loss: 0.554935
Epoch [7/20] | Train Loss: 0.610425 | Val Loss: 0.507278
Epoch [8/20] | Train Loss: 0.569186 | Val Loss: 0.471442
Epoch [9/20] | Train Loss: 0.537236 | Val Loss: 0.443789
Epoch [10/20] | Train Loss: 0.511618 | Val Loss: 0.421370
Epoch [11/20] | Train Loss: 0.490595 | Val Loss: 0.403347
Epoch [12/20] | Train Loss: 0.472981 | Val Loss: 0.387858
Epoch [13/20] | Train Loss: 0.457972 | Val Loss: 0.374796
Epoch [14/20] | Train Loss: 0.444963 | Val Loss: 0.363812
Epoch [15/20] | Train Loss: 0.433569 | Val Loss: 0.354065
Epoch [16/20] | Train Loss: 0.423411 | Val Loss: 0.345543
Epoch [17/20] | Train Loss: 0.414329 | Val Loss: 0.337907
Epoch [18/20] | Train L

In [22]:
#!/usr/bin/env python3
import numpy as np
import struct
from array import array
from os.path import join
import random
import matplotlib.pyplot as plt

# Import your MLP implementation (make sure mlp.py is in your working directory)
from mlp import Layer, MultilayerPerceptron, CrossEntropy, Relu, Softmax

###############################################################################
# MNIST Data Loader Class
###############################################################################
class MnistDataloader(object):
    def __init__(self, training_images_filepath, training_labels_filepath,
                 test_images_filepath, test_labels_filepath):
        self.training_images_filepath = training_images_filepath
        self.training_labels_filepath = training_labels_filepath
        self.test_images_filepath = test_images_filepath
        self.test_labels_filepath = test_labels_filepath

    def read_images_labels(self, images_filepath, labels_filepath):
        with open(labels_filepath, 'rb') as file:
            magic, size = struct.unpack(">II", file.read(8))
            if magic != 2049:
                raise ValueError('Magic number mismatch, expected 2049, got {}'.format(magic))
            labels = array("B", file.read())

        with open(images_filepath, 'rb') as file:
            magic, size, rows, cols = struct.unpack(">IIII", file.read(16))
            if magic != 2051:
                raise ValueError('Magic number mismatch, expected 2051, got {}'.format(magic))
            image_data = array("B", file.read())

        images = []
        for i in range(size):
            img = np.array(image_data[i * rows * cols:(i + 1) * rows * cols])
            img = img.reshape(28, 28)
            images.append(img)
        return images, labels

    def load_data(self):
        x_train, y_train = self.read_images_labels(self.training_images_filepath, self.training_labels_filepath)
        x_test, y_test = self.read_images_labels(self.test_images_filepath, self.test_labels_filepath)
        return (np.array(x_train), np.array(y_train)), (np.array(x_test), np.array(y_test))

###############################################################################
# Helper function to convert labels to one-hot encoding
###############################################################################
def one_hot(labels, num_classes=10):
    one_hot_labels = np.zeros((len(labels), num_classes), dtype=np.float32)
    for i, label in enumerate(labels):
        one_hot_labels[i, int(label)] = 1.0
    return one_hot_labels

###############################################################################
# Main training function for MNIST classification using MLP
###############################################################################
def main():
    # Set file paths (adjust these paths as needed)
    input_path = '/Users/mubaraqolojo/Downloads/MLP/archive'
    training_images_filepath = join(input_path, 'train-images.idx3-ubyte')
    training_labels_filepath = join(input_path, 'train-labels.idx1-ubyte')
    test_images_filepath = join(input_path, 't10k-images.idx3-ubyte')
    test_labels_filepath = join(input_path, 't10k-labels.idx1-ubyte')

    # Load MNIST dataset
    mnist_dataloader = MnistDataloader(training_images_filepath, training_labels_filepath,
                                       test_images_filepath, test_labels_filepath)
    (x_train, y_train), (x_test, y_test) = mnist_dataloader.load_data()

    # Preprocess images: flatten (28x28 -> 784) and normalize pixel values to [0,1]
    x_train = x_train.reshape(x_train.shape[0], 784).astype(np.float32) / 255.0
    x_test  = x_test.reshape(x_test.shape[0], 784).astype(np.float32) / 255.0

    # Convert labels to one-hot encoding for training/validation
    y_train_onehot = one_hot(y_train, num_classes=10)

    # Split training data into training (90%) and validation (10%)
    n_train = x_train.shape[0]
    split_index = int(0.9 * n_train)
    # Shuffle training data
    indices = np.random.permutation(n_train)
    x_train = x_train[indices]
    y_train_onehot = y_train_onehot[indices]
    y_train_int = y_train[indices]  # keep for potential analysis

    train_x = x_train[:split_index]
    train_y = y_train_onehot[:split_index]
    val_x = x_train[split_index:]
    val_y = y_train_onehot[split_index:]

    # Design the MLP architecture for MNIST classification:
    # Input layer: 784 neurons; two hidden layers; output layer: 10 neurons with Softmax activation.
    layers = [
        Layer(fan_in=784, fan_out=128, activation_function=Relu()),
        Layer(fan_in=128, fan_out=64, activation_function=Relu()),
        Layer(fan_in=64, fan_out=10, activation_function=Softmax())
    ]
    mlp = MultilayerPerceptron(layers)

    # Loss function: use CrossEntropy for classification.
    loss_func = CrossEntropy()

    # Training hyperparameters (tune these for best performance)
    learning_rate = 1e-3
    batch_size = 32
    epochs = 50

    # Train the model (training and validation loss will be printed each epoch)
    train_losses, val_losses = mlp.train(
        train_x, train_y,
        val_x, val_y,
        loss_func=loss_func,
        learning_rate=learning_rate,
        batch_size=batch_size,
        epochs=epochs,
        momentum=0.9  # try adding momentum if desired
    )

    # Evaluate the model on the full test set (disable dropout)
    test_pred = mlp.forward(x_test, training=False)
    # For each test example, the predicted class is the index with maximum probability.
    predicted_labels = np.argmax(test_pred, axis=1)
    accuracy = np.mean(predicted_labels == y_test)
    print(f"\nTest Accuracy: {accuracy*100:.2f}%")

    # Plot training & validation loss curves
    plt.figure(figsize=(8, 6))
    epochs_range = range(1, epochs + 1)
    plt.plot(epochs_range, train_losses, label='Train Loss')
    plt.plot(epochs_range, val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('MNIST: Training vs. Validation Loss')
    plt.legend()
    plt.show()

    # Optionally, display some sample predictions
    sample_indices = np.random.choice(x_test.shape[0], 10, replace=False)
    print("\nSample predictions:")
    for idx in sample_indices:
        print(f"Image {idx}: Predicted = {predicted_labels[idx]}, True = {y_test[idx]}")

if __name__ == '__main__':
    main()


Epoch [1/50] | Train Loss: 0.726982 | Val Loss: 0.337220
Epoch [2/50] | Train Loss: 0.305736 | Val Loss: 0.268559
Epoch [3/50] | Train Loss: 0.251285 | Val Loss: 0.234920
Epoch [4/50] | Train Loss: 0.217419 | Val Loss: 0.207540
Epoch [5/50] | Train Loss: 0.190720 | Val Loss: 0.192421
Epoch [6/50] | Train Loss: 0.171317 | Val Loss: 0.170475
Epoch [7/50] | Train Loss: 0.154308 | Val Loss: 0.157571
Epoch [8/50] | Train Loss: 0.141426 | Val Loss: 0.150460
Epoch [9/50] | Train Loss: 0.130039 | Val Loss: 0.138400
Epoch [10/50] | Train Loss: 0.120291 | Val Loss: 0.130073
Epoch [11/50] | Train Loss: 0.111655 | Val Loss: 0.125563
Epoch [12/50] | Train Loss: 0.104792 | Val Loss: 0.119592
Epoch [13/50] | Train Loss: 0.097616 | Val Loss: 0.113599
Epoch [14/50] | Train Loss: 0.091190 | Val Loss: 0.110832
Epoch [15/50] | Train Loss: 0.085989 | Val Loss: 0.107209
Epoch [16/50] | Train Loss: 0.080689 | Val Loss: 0.103732
Epoch [17/50] | Train Loss: 0.076065 | Val Loss: 0.102098
Epoch [18/50] | Train L