Implementation with pytorch. Just for practice as well. View MLP_from_scratch.ipynb for the from scratch version

In [16]:
from sklearn.datasets import fetch_openml
import numpy as np

mnist = fetch_openml('mnist_784', version=1, as_frame=False)

X = mnist['data']       # Shape: (70000, 784)
y = mnist['target']     # Shape: (70000,)

X = X / 255.0           # Normalize pixel values to [0, 1]
y = y.astype(np.int64)  # Convert labels to integers

In [17]:
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader, TensorDataset

In [23]:
# Split into train/test (60k train, 10k test)
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train)
y_test_tensor = torch.tensor(y_test)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [19]:
class LinearClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_features=784, out_features=256),
            nn.ReLU(),
            nn.Linear(in_features=256, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=64),
            nn.ReLU(),
            nn.Linear(in_features=64, out_features=10)
        )
    
    def forward(self, x):
        return self.net(x)

In [21]:
epochs = 10
model = LinearClassifier()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
for epoch in range(epochs):
    count = 0
    total_loss = 0
    for xb, yb in train_dataloader:
        yhat = model(xb)
        loss = loss_fn(yhat, yb)
        total_loss += loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        count+=1
    print(f"Epoch {epoch} Loss: {total_loss / count}")


Epoch 0 Loss: 1.6597137451171875
Epoch 1 Loss: 0.44393908977508545
Epoch 2 Loss: 0.33175548911094666
Epoch 3 Loss: 0.26853111386299133
Epoch 4 Loss: 0.2200690358877182
Epoch 5 Loss: 0.18364672362804413
Epoch 6 Loss: 0.15597079694271088
Epoch 7 Loss: 0.1357380896806717
Epoch 8 Loss: 0.11958706378936768
Epoch 9 Loss: 0.10651423782110214


In [26]:
# Forward pass on test set
yhat = model(X_test_tensor)

# Predictions
preds_test = torch.argmax(yhat, axis=1)

# Accuracy
accuracy_test = (preds_test == y_test_tensor).float().mean()

print(f"Test Accuracy: {accuracy_test:.4f}")


Test Accuracy: 0.9659
