In [17]:
import urllib.request
import gzip
import numpy as np
from tqdm import trange

from giagrad import Tensor
import giagrad.nn as nn
import giagrad.optim as optim 
from giagrad.display import draw_dot

from collections import OrderedDict

from typing import List

## Load data

In [18]:
def fetch(url, type_data = None):
    # Extract the dataset from the compressed file
    with gzip.open(urllib.request.urlopen(url)) as f:
        if type_data == 'label':
            data = np.frombuffer(f.read(), np.uint8, offset=8)
        else:
            data = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28, 28)
    return data

X_train_all = fetch("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
Y_train_all = fetch("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", 'label')
X_test_all = fetch("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")
Y_test_all = fetch("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", 'label')

print(X_train_all.shape)
print(X_test_all.shape)

(60000, 28, 28)
(10000, 28, 28)


### Resize and reshape

In [19]:
n_train = 10000
n_test = 250

# Every row is a flattened image
X_train = X_train_all[:n_train].reshape(-1, 28*28)
Y_train = Y_train_all[:n_train]

X_test = X_test_all[:n_test].reshape(-1, 28*28)
Y_test = Y_test_all[:n_test]

## MLP - Using ```Sequential()```

### Model Creation

In [20]:
model = nn.Sequential(OrderedDict([
    ('Linear1',nn.Linear(784, 550)),
    ('ReLu1',nn.ReLU()),
    ('Dropout1',nn.Dropout(0.2)),
    ('Linear2',nn.Linear(550, 10))])
)

model

Sequential
	Layer(in=784, out=550, bias=True)
	ReLU
	Dropout(p=0.2)
	Layer(in=550, out=10, bias=True)

### Model View

In [21]:
for i in model.__odict__.keys():
    print(i)

Linear1
ReLu1
Dropout1
Linear2


### Training

In [22]:
criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = optim.SGD(
    model.parameters(), 
    lr=0.001, 
    momentum=0.9,
    nesterov=True
)

EPOCHS = 20

model.train()

for ite in (prog_bar := trange(EPOCHS)):
    # Zero gradient 
    optimizer.zero_grad() 
    # Pass data through the network
    output = model(X_train)
    # Calculate loss
    loss = criterion(output, Y_train)
    # Backpropagate
    loss.backward()
    # Update weights
    optimizer.step()

    prog_bar.set_description(f"loss: {loss.data}")

loss: 0.6196080446243286: 100%|██████████| 20/20 [00:19<00:00,  1.04it/s]


### Evaluation

In [23]:
model.eval()

print(f'train accuracy: {(model(X_train).softmax(axis=1).data.argmax(axis=1) == Y_train).mean() * 100} %')
print(f'test accuracy: {(model(X_test).softmax(axis=1).data.argmax(axis=1) == Y_test).mean() * 100} %')

train accuracy: 86.67 %
test accuracy: 84.0 %
