In [1]:
import urllib.request
import gzip
import numpy as np
from tqdm import trange

from giagrad import Tensor
import giagrad.nn as nn
import giagrad.optim as optim 
from giagrad.display import draw_dot

from collections import OrderedDict

from typing import List

## Load data

In [2]:
def fetch(url, type_data = None):
    # Extract the dataset from the compressed file
    with gzip.open(urllib.request.urlopen(url)) as f:
        if type_data == 'label':
            data = np.frombuffer(f.read(), np.uint8, offset=8)
        else:
            data = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28, 28)
    return data

X_train_all = fetch("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")
Y_train_all = fetch("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", 'label')
X_test_all = fetch("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")
Y_test_all = fetch("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", 'label')

print(X_train_all.shape)
print(X_test_all.shape)

(60000, 28, 28)
(10000, 28, 28)


### Resize and reshape

In [3]:
n_train = 5000
n_test = 250

# Every row is a flattened image
X_train = Tensor(X_train_all[:n_train].reshape(-1, 28*28))
Y_train = Tensor(Y_train_all[:n_train])

X_test = Tensor(X_test_all[:n_test].reshape(-1, 28*28))
Y_test = Tensor(Y_test_all[:n_test])

## CNN

### Model Creation

In [4]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(
            in_features=28*28,
            out_features=28*28
        )
        self.linear2 = nn.Linear(
            in_features=28*28,
            out_features=28*28
        )
        
        self.linear3 = nn.Linear(
            in_features=28*28,
            out_features=28*28
        )
        
        self.linear4 = nn.Linear(
            in_features=28*28,
            out_features=28*28
        )
        
        self.linear5 = nn.Linear(
            in_features=28*28,
            out_features=10
        )
        
#         self.dropout = nn.Dropout(p=0.4)
        
    def __call__(self, x):
        x = self.linear1(x).relu()
        x = self.linear2(x).relu()
        x = self.linear3(x).relu()
        x = self.linear4(x).relu()
        return self.linear5(x)


In [5]:
model = Model()
print(model)

Model
	linear1: Layer(in_features=784, out_features=784, bias=True)
	linear2: Layer(in_features=784, out_features=784, bias=True)
	linear3: Layer(in_features=784, out_features=784, bias=True)
	linear4: Layer(in_features=784, out_features=784, bias=True)
	linear5: Layer(in_features=784, out_features=10, bias=True)


### Training

In [6]:
criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = optim.Adam(
    model.parameters(),
    lr=0.0001,
    amsgrad=True
)

In [7]:
EPOCHS = 50

model.train()

for ite in (prog_bar := trange(EPOCHS)):
    # Zero gradient 
    optimizer.zero_grad() 
    # Pass data through the network
    output = model(X_train)
    # Calculate loss
    loss = criterion(output, Y_train)
    # Backpropagate
    loss.backward()
    # Update weights
    optimizer.step()

    prog_bar.set_description(f"loss: {loss.data}")

loss: 2.7076754570007324: 100%|█████████████████| 50/50 [00:48<00:00,  1.03it/s]


### Evaluation

In [18]:
model.eval()

print(f'train accuracy: {(model(X_train).softmax(axis=1).data.argmax(axis=1) == Y_train).mean() * 100} %')
print(f'test accuracy: {(model(X_test).softmax(axis=1).data.argmax(axis=1) == Y_test).mean() * 100} %')

train accuracy: tensor: 10.559999 fn: Mul %
test accuracy: tensor: 11.599999 fn: Mul %


## PyTorch

In [39]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [50]:
X_train = torch.from_numpy(X_train.data.copy().astype(np.float32))
Y_train = torch.from_numpy(Y_train.data.copy().astype(np.int64))

X_test = torch.from_numpy(X_test.data.copy().astype(np.float32))
Y_test = torch.from_numpy(Y_test.data.copy())

In [51]:
Y_train.dtype

torch.int64

In [52]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(
            in_features=28*28,
            out_features=28*28
        )
        self.linear2 = nn.Linear(
            in_features=28*28,
            out_features=28*28
        )
        
        self.linear3 = nn.Linear(
            in_features=28*28,
            out_features=28*28
        )
        
        self.linear4 = nn.Linear(
            in_features=28*28,
            out_features=28*28
        )
        
        self.linear5 = nn.Linear(
            in_features=28*28,
            out_features=10
        )
        
#         self.dropout = nn.Dropout(p=0.4)
        
    def __call__(self, x):
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = F.relu(self.linear3(x))
        x = F.relu(self.linear4(x))
#         x = self.dropout(x)
        return self.linear5(x)

In [53]:
model = Model()
print(model)

Model(
  (linear1): Linear(in_features=784, out_features=784, bias=True)
  (linear2): Linear(in_features=784, out_features=784, bias=True)
  (linear3): Linear(in_features=784, out_features=784, bias=True)
  (linear4): Linear(in_features=784, out_features=784, bias=True)
  (linear5): Linear(in_features=784, out_features=10, bias=True)
)


In [54]:
criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = optim.Adam(
    model.parameters(),
    lr=0.0001,
    amsgrad=True
)

In [55]:
EPOCHS = 50

model.train()

for ite in (prog_bar := trange(EPOCHS)):
    # Zero gradient 
    optimizer.zero_grad() 
    # Pass data through the network
    output = model(X_train)
    # Calculate loss
    loss = criterion(output, Y_train)
    # Backpropagate
    loss.backward()
    # Update weights
    optimizer.step()

    prog_bar.set_description(f"loss: {loss.data}")

loss: 0.03736574575304985: 100%|████████████████| 50/50 [00:21<00:00,  2.30it/s]


In [15]:
model.eval()

print(f'train accuracy: {(F.softmax(model(X_train), dim=1).argmax(dim=1) == Y_train).double().mean() * 100} %')
print(f'test accuracy: {(F.softmax(model(X_test), dim=1).argmax(dim=1) == Y_test).double().mean() * 100} %')

train accuracy: 10.5 %
test accuracy: 4.8 %
