Note: this currently runs terribly slow. I'm attempting to speed up this with numpy but it's a WIP.


Data preparation: The 28x28 pixel images are flattened into a 784-dimensional vector. The pixel values are often normalized by dividing them by 255.
Model architecture: An MLP for MNIST typically includes:

    An input layer with 784 nodes, one for each pixel.
    One or more hidden layers with a specified number of neurons, often using an activation function like ReLU.
    An output layer with 10 nodes, representing the 10 possible digits (0-9), usually with a Softmax activation function to produce probabilities.

Training: The model is trained using a large dataset of labeled images. The process involves:

    Forward propagation: The input image vector is passed through the network to get an output prediction.
    Backpropagation: The difference between the prediction and the actual label is calculated, and this error is used to adjust the model's weights to improve future predictions.
    This process is repeated for multiple epochs, which are full passes through the entire dataset.

Evaluation: After training, the model's performance is evaluated on a separate, unseen test set to determine its accuracy.


In [None]:
import datasets as ds

import pea
from pea import nn
import random

# inspecting dataset before committing to download it
# ds_builder = ds.load_dataset_builder("ylecun/mnist")
# ds_builder.info.features

In [None]:
random.seed(1337)

In [None]:
# download dataset splits
train_ds = ds.load_dataset("ylecun/mnist", split="train")
test_ds = ds.load_dataset("ylecun/mnist", split="test")

In [None]:
# inspecting first image in test set
print(test_ds[0]["label"])
test_ds[0]["image"]

In [None]:
train_ds[0]

In [None]:
# preprocess: convert png image to rgb
train_ds = train_ds.cast_column("image", ds.features.Image(mode="RGB"))
train_ds[0]

In [None]:
# preprocess: convert rgb image to grayscale pixel values and then value
def rgb_to_value(examples):
    out = []
    for image in examples["image"]:
        # convert to rgb optional but best practice
        rgb = list(image.convert("RGB").getdata())
        # 0-255 to 0.0-1.0
        rgb_norm = [(r / 255.0, g / 255.0, b / 255.0) for r, g, b in rgb]
        # convert to grayscale with luma formula: perceived brightness 0.0-1.0
        gray = [0.2989 * r + 0.5870 * g + 0.1140 * b for r, g, b in rgb_norm]
        value = [pea.Value(g) for g in gray]
        out.append(value)
    examples["value"] = out
    return examples


train_ds = train_ds.with_transform(rgb_to_value)
train_ds[0]["value"]

In [None]:
from collections.abc import Iterable


class MNIST(nn.Module):
    def __init__(self):
        super().__init__()
        self.mlp = nn.MLP(28 * 28, [128, 64, 10])

    def __call__(self, x: Iterable[pea.Value]) -> list[pea.Value]:
        return self.mlp(x)

    def parameters(self) -> list[pea.Value]:
        return self.mlp.parameters()

    def __repr__(self) -> str:
        return f"MNIST of 1 MLP [\n{self.mlp.__repr__()}\n]"


model = MNIST()
print(str(model))
print(len(model.parameters()))

In [None]:
# train loop
def train(
    model: nn.Module,
    dataset: ds.Dataset,
    *,
    epochs: int,
    batch_size: int,
    lr: float,
):
    """
    Epoch means one full pass over the dataset. Batch size means how many examples
    to process before updating the model parameters. Loss is averaged over the entire
    dataset for reporting.
    """
    for epoch in range(epochs):
        total_loss = 0.0
        for i in range(0, len(dataset), batch_size):
            batch = dataset[i : i + batch_size]
            batch_loss = pea.Value(0.0)
            for j in range(batch_size):
                x = batch["value"][j]
                y = batch["label"][j]
                pred = model(x)
                one_hot_y = nn.one_hot(pea.Value(y), num_classes=10)
                sample_loss = nn.cross_entropy_loss(pred, one_hot_y)
                batch_loss += sample_loss
                print(f"        Sample {j + 1}/{batch_size}, Loss: {sample_loss.data}")
            avg_batch_loss = batch_loss / batch_size
            total_loss += avg_batch_loss.data
            # backward
            model.zero_grad()
            avg_batch_loss.backward()
            # update
            for p in model.parameters():
                p.data -= lr * p.grad
            print(
                f"    Batch {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}, Batch Avg Loss: {avg_batch_loss.data}"
            )
        avg_loss = total_loss / len(dataset)
        print(f"Epoch {epoch + 1}/{epochs}, Epoch Avg Loss: {avg_loss}")


train(model, train_ds, epochs=1, batch_size=32, lr=0.01)

In [None]:
# preprocess test dataset
test_ds = test_ds.cast_column("image", ds.features.Image(mode="RGB"))
test_ds[0]

In [None]:
test_ds = test_ds.with_transform(rgb_to_value)
test_ds[0]["value"]

In [None]:
def evaluate(model: nn.Module, dataset: ds.Dataset) -> float:
    correct = 0
    for i in range(len(dataset)):
        x = dataset[i]["value"]
        y = dataset[i]["label"]
        pred = model(x)
        pred_label = max(range(len(pred)), key=lambda k: pred[k].data)
        if pred_label == y:
            correct += 1
        print(f"Predicted: {pred_label}, Actual: {y}, Accuracy so far: {(correct / (i + 1) * 100):.3g}%")
    accuracy = correct / len(dataset)
    return accuracy


accuracy = evaluate(model, test_ds)
print(f"Test set accuracy: {accuracy * 100:.2f}%")