In [None]:
import torch
from tqdm import tqdm
import json

In [None]:
X_path = "./mnist_data/t10k-images.idx3-ubyte"
Y_path = "./mnist_data/t10k-labels.idx1-ubyte"

import numpy as np
import struct

def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

X = read_idx(X_path)
y = read_idx(Y_path)

X = X.reshape(X.shape[0], -1)
X = X.astype(np.float32) / 255
X = torch.from_numpy(X)
y = torch.from_numpy(y)
print(X.shape, y.shape)

train_percentage = 0.9

X_train, X_test = X[:int(len(X)*train_percentage)], X[int(len(X)*train_percentage):]
y_train, y_test = y[:int(len(y)*train_percentage)], y[int(len(y)*train_percentage):]

In [68]:
def model_to_json(model):
    model_dict = model.state_dict()
    for key in model_dict.keys():
        model_dict[key] = model_dict[key].tolist()
    return model_dict

In [81]:
# very simple model that uses a variety of Layer types but only ReLu activation
input_size = 28*28
n_labels = 10

class SimpleModel(torch.nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        
        self.layers = [
            torch.nn.Linear(input_size, 128),
            torch.nn.ReLU(),
            
            torch.nn.Linear(128, 64),
            torch.nn.ReLU(),

            # output layer
            torch.nn.Linear(64, n_labels)
        ]

        self.model = torch.nn.Sequential(*self.layers)
    
    def forward(self, x):
        assert x.shape == (input_size,), f"Expected input shape (784,) but got {x.shape}"
        return self.model(x)

model = SimpleModel()
model.forward(torch.randn(784))

tensor([-0.1514, -0.0687,  0.0044,  0.0327,  0.2049, -0.1485,  0.0893,  0.0492,
         0.2144,  0.0345], grad_fn=<ViewBackward0>)

In [82]:
# training
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

n_epochs = 2

for epoch in range(n_epochs):
    model.train()
    for i in tqdm(range(len(X_train))):
        optimizer.zero_grad()
        y_pred = model(X_train[i])
        loss = loss_fn(y_pred.unsqueeze(0), y_train[i].unsqueeze(0))
        loss.backward()
        optimizer.step()

    model.eval()
    correct = 0
    for i in range(len(X_test)):
        y_pred = model(X_test[i])
        if torch.argmax(y_pred) == y_test[i]:
            correct += 1
    print(f"Epoch {epoch+1}/{n_epochs} - Test accuracy: {correct/len(X_test)}")

with open("model_trained.json", "w") as f:
    json.dump(model_to_json(model), f, indent=4)


100%|██████████| 9000/9000 [00:06<00:00, 1444.76it/s]


Epoch 1/2 - Test accuracy: 0.899


100%|██████████| 9000/9000 [00:06<00:00, 1479.99it/s]


Epoch 2/2 - Test accuracy: 0.918
