In [1]:
import urllib.request
import gzip
import numpy as np
from tqdm import trange

from giagrad import Tensor
import giagrad.nn as nn
import giagrad.optim as optim 
from giagrad.display import draw_dot

from typing import List

## Load data

In [2]:
def fetch(url, type_data = None):
    # Extract the dataset from the compressed file
    with gzip.open(urllib.request.urlopen(url)) as f:
        if type_data == 'label':
            data = np.frombuffer(f.read(), np.uint8, offset=8)
        else:
            data = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28, 28)
    return data

X_train_all = fetch("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
Y_train_all = fetch("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", 'label')
X_test_all = fetch("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")
Y_test_all = fetch("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", 'label')

print(X_train_all.shape)
print(X_test_all.shape)

(60000, 28, 28)
(10000, 28, 28)


### Resize and reshape

In [3]:
n_train = 10000
n_test = 250

# Every row is a flattened image
X_train = X_train_all[:n_train].reshape(-1, 28*28)
Y_train = Y_train_all[:n_train]

X_test = X_test_all[:n_test].reshape(-1, 28*28)
Y_test = Y_test_all[:n_test]

## MLP

In [4]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.l1 = nn.Linear(784, 550)
        self.l2 = nn.Linear(550, 10)
        self.dropout = nn.Dropout(0.2)
        
    def __call__(self, x):
        x = self.l1(x).relu()
        x = self.dropout(x)
        x = self.l2(x)
        return x

In [5]:
model = MLP()
model

MLP
	Layer(in=784, out=550, bias=True)
	Layer(in=550, out=10, bias=True)
	Dropout(p=0.2)

## Train

In [6]:
criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = optim.SGD(
    model.parameters(), 
    lr=0.001, 
    momentum=0.9,
    nesterov=True
)

In [7]:
# dropout layers in train mode
model.train()

for ite in (prog_bar := trange(20)):
     # Zero gradient 
    optimizer.zero_grad() 
    # Pass data through the network
    output = model(X_train)
    # Calculate loss
    loss = criterion(output, Y_train)
    # Backpropagate
    loss.backward()
    # Update weights
    optimizer.step()

    prog_bar.set_description(f"loss: {loss.data}")

loss: 0.6017593145370483: 100%|█████████████████| 20/20 [00:07<00:00,  2.64it/s]


## Evaluate

In [8]:
# dropout layers in evaluation mode
model.eval()

print(f'train accuracy: {(model(X_train).softmax(axis=1).data.argmax(axis=1) == Y_train).mean() * 100} %')
print(f'test accuracy: {(model(X_test).softmax(axis=1).data.argmax(axis=1) == Y_test).mean() * 100} %')

train accuracy: 86.91 %
test accuracy: 84.39999999999999 %
