In [1]:
import torch
from tqdm import tqdm
import json

In [2]:
X_path = "./mnist_data/t10k-images.idx3-ubyte"
Y_path = "./mnist_data/t10k-labels.idx1-ubyte"

import numpy as np
import struct

def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

X = read_idx(X_path)
y = read_idx(Y_path)

X = X.reshape(X.shape[0], -1)
X = X.astype(np.float32) / 255
X = torch.from_numpy(X)
y = torch.from_numpy(y)
print(X.shape, y.shape)#

X_train, X_test = X[:int(len(X)*0.8)], X[int(len(X)*0.8):]
y_train, y_test = y[:int(len(y)*0.8)], y[int(len(y)*0.8):]

torch.Size([10000, 784]) torch.Size([10000])


  y = torch.from_numpy(y)


In [5]:
# very simple model
input_size = 28*28
n_labels = 10

class SimpleModel(torch.nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.layers = [
            torch.nn.Linear(input_size, 256),
            torch.nn.Linear(256, 512),
            torch.nn.Linear(512, 512),
            torch.nn.Linear(512, 256),
            torch.nn.Linear(256, n_labels),
        ]
        self.model = torch.nn.Sequential(*self.layers)

    def forward(self, x):
        return self.model(x)

model = SimpleModel()
print(model)
n_neurons = sum([layer.weight.numel() for layer in model.layers])
print(f"Number of neurons: {n_neurons:,}")

SimpleModel(
  (model): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=512, bias=True)
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): Linear(in_features=512, out_features=256, bias=True)
    (4): Linear(in_features=256, out_features=10, bias=True)
  )
)
Number of neurons: 727,552


In [6]:
# train
import torch.optim as optim

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

n_epochs = 10
batch_size = 100

for epoch in range(n_epochs):
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]

        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{n_epochs}, Loss: {loss.item()}")
print("Finished training")

Epoch 1/10, Loss: 0.27356645464897156
Epoch 2/10, Loss: 0.20802274346351624
Epoch 3/10, Loss: 0.24634283781051636
Epoch 4/10, Loss: 0.2669011950492859
Epoch 5/10, Loss: 0.3040083646774292
Epoch 6/10, Loss: 0.3274763822555542
Epoch 7/10, Loss: 0.30881571769714355
Epoch 8/10, Loss: 0.2672855854034424
Epoch 9/10, Loss: 0.27544105052948
Epoch 10/10, Loss: 0.3405182361602783
Finished training


In [7]:
# test
n_correct = 0
n_total = 0
with torch.no_grad():
    for i in tqdm(range(0, len(X_test), batch_size)):
        X_batch = X_test[i:i+batch_size]
        y_batch = y_test[i:i+batch_size]

        y_pred = model(X_batch)
        n_correct += (torch.argmax(y_pred, dim=1) == y_batch).sum().item()
        n_total += len(y_batch)
        
print(f"Accuracy: {n_correct/n_total}")

100%|██████████| 20/20 [00:00<00:00, 1590.83it/s]

Accuracy: 0.8655





In [8]:
# save test data as json
with open("test_data.json", "w") as f:
    json.dump({"X": X_test.tolist(), "y": y_test.tolist()}, f)
print("Saved test data as json")

Saved test data as json


In [7]:
# save model as JSON
import json
model_dict = model.state_dict()
for key in model_dict.keys():
    model_dict[key] = model_dict[key].tolist()
with open("model.json", "w") as f:
    json.dump(model_dict, f)
print("Model saved as JSON")
n_neurons = sum([layer.weight.numel() for layer in model.layers])
print(f"Number of neurons: {n_neurons:,}")

Model saved as JSON
Number of neurons: 2,562,560
