In [12]:
from engine import Matrix, Atom
from nn import MLP, mse_loss
from tqdm import tqdm

In [13]:
# Just using the external libraries to load and preprocess the data
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST

In [14]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = MNIST('.', train=True, transform=transform, download=True)
test_dataset = MNIST('.', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

train_images = []
train_labels = []
for images, labels in train_loader:
    train_images.append(images.view(-1, 28*28).numpy())
    train_labels.append(labels.numpy())

train_images = np.vstack(train_images)
train_labels = np.hstack(train_labels)
train_labels = torch.nn.functional.one_hot(torch.tensor(train_labels), num_classes=10).numpy()

test_images, test_labels = next(iter(test_loader))
test_images = test_images.view(-1, 28*28).numpy()
test_labels = test_labels.numpy()


In [15]:
# Some of the functions implemented in numpy for speed, maybe I can implement low level versions in another language in the future
def softmax(logits):
    exps = np.exp(logits - np.max(logits, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)


def cross_entropy_loss(targets, predictions):
    m = targets.shape[0]
    p = softmax(predictions)
    log_likelihood = -np.log(p[range(m), targets.argmax(axis=1)] + 1e-9)
    loss = np.sum(log_likelihood) / m
    return loss

In [16]:
batch_size = 8
steps = 500
lr = 0.01

model = MLP(784, [64, 32, 10])
print(f"Model initialized with {len(model.layers)} layers and {len(model.parameters())} parameters")

for step in tqdm(range(steps)):
    ri = np.random.permutation(train_images.shape[0])[:batch_size]
    Xb = [[Atom(x) for x in train_images[i]] for i in ri]
    yb = train_labels[ri]

    # Forward pass
    y_pred_atoms = [model(x) for x in Xb]
    y_pred = np.array([[y.data for y in pred] for pred in y_pred_atoms])
    
    # Calculate loss
    loss = cross_entropy_loss(yb, y_pred)

    # Backward pass
    probs = softmax(y_pred)
    for i in range(len(yb)):
        for j in range(len(yb[i])):
            y_pred_atoms[i][j].grad = probs[i][j] - yb[i][j]
    
    model.zero_grad()
    for y_pred_atom in y_pred_atoms:
        for y in y_pred_atom:
            y.backward()
    
    # Update parameters
    for p in model.parameters():
        p.data -= lr * p.grad

    if step % 100 == 0:
        print(f'Step {step}, Loss: {loss}')

print(f'Final Step, Loss: {loss}')

Model initialized with 3 layers and 52650 parameters


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 1/500 [00:06<57:40,  6.94s/it]

Step 0, Loss: 20.723265836644238


  0%|          | 2/500 [00:13<56:09,  6.77s/it]

In [None]:
from sklearn.metrics import accuracy_score
test_atoms = [[Atom(x) for x in test_images[i]] for i in range(test_images.shape[0])]
predictions = [model(x) for x in test_atoms]
pred_labels = np.array([[y.data for y in pred] for pred in predictions])
pred_labels = np.argmax(pred_labels, axis=1)

accuracy = accuracy_score(test_labels, pred_labels)
print(f'Accuracy on test data: {accuracy * 100:.2f}%')

Accuracy on test data: 9.00%
