In [1]:
import urllib.request
import gzip
import numpy as np
from tqdm import trange

from giagrad import Tensor
import giagrad.nn as nn
import giagrad.optim as optim 

from typing import List

## Load data

In [None]:
def fetch(url, type_data = None):
    # Extract the dataset from the compressed file
    with gzip.open(urllib.request.urlopen(url)) as f:
        if type_data == 'label':
            data = np.frombuffer(f.read(), np.uint8, offset=8)
        else:
            data = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28, 28)
    return data

X_train_all = fetch("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
Y_train_all = fetch("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", 'label')
X_test_all = fetch("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")
Y_test_all = fetch("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", 'label')

print(X_train_all.shape)
print(X_test_all.shape)

### Resize and reshape

In [None]:
n_train = 3000
n_test = 500

# Every row is a flattened image
X_train = X_train_all[:n_train].reshape(-1, 28*28)
Y_train = Y_train_all[:n_train]

X_test = X_test_all[:n_test].reshape(-1, 28*28)
Y_test = Y_test_all[:n_test]

## MLP

In [None]:
from itertools import pairwise

class MLP(nn.Module):
    def __init__(self, dims: List[int]):
        self.layers = [nn.Linear(in_, out) for in_, out in pairwise(dims)]
    
    def __call__(self, x):
        x = self.layers[0](x)
        for layer in self.layers[1:]:
            x = x.relu()
            x = layer(x)
        return x

    def parameters(self):
        return sum([l.parameters() for l in self.layers], [])

    def __repr__(self):
        return "MLP\n\t" + '\n\t'.join(str(layer) for layer in self.layers)

In [None]:
model = MLP([784, 600, 10])
model

## Train

In [None]:
criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = optim.SGD(
    model.parameters(), 
    lr=0.01, 
    momentum=0.,
    nesterov=True
)

In [None]:
%%time

for ite in (prog_bar := trange(40)):
     # Zero gradient 
    optimizer.zero_grad() 
    # Pass data through the network
    output = model(X_train)
    # Calculate loss
    loss = criterion(output, Y_train)
    # Backpropagate
    loss.backward(debug=True)
    # Update weights
    optimizer.step()

    prog_bar.set_description(f"It: {ite}, loss: {loss}")

## Evaluate

In [None]:
hits = [model(pred).softmax(axis=1).data.argmax() == y 
             for pred, y in zip(X_train, Y_train)]

print(f'train accuracy: {sum(hits)/n_train*100} %')

hits = [model(pred).softmax(axis=1).data.argmax() == y 
             for pred, y in zip(X_test, Y_test)]

print(f'test accuracy: {sum(hits)/n_test*100} %')