In [49]:
import torch
from torch import nn
from torchvision import datasets
import fastai 
from torchvision.transforms import ToTensor
# from fastai.data.core import DataLoader
from torch.utils.data import DataLoader
from fastai.data.core import DataLoaders
from fastai.vision.all import Learner, Metric
from fastai import optimizer


In [2]:
model = nn.Sequential(
    nn.Linear(28*28, 30),
    nn.ReLU(), 
    nn.Linear(28*28, 10)
)

In [3]:
training_data = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.MNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

In [4]:
batch_size = 256

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([256, 1, 28, 28])
Shape of y: torch.Size([256]) torch.int64


In [5]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [62]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(), 
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits 

model = NeuralNetwork().to(device)
model


NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [7]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [63]:
class BasicOptimizer:
    def __init__(self, params, lr): 
        self.params,self.lr = list(params),lr
        self.state = {p: {} for p in self.params}
        self.hypers = [{'lr': lr}]
        
    def step(self, *args, **kwargs):
        for p in self.params: 
            p.data -= p.grad.data * self.lr

    def zero_grad(self, *args, **kwargs):
        for p in self.params:
            p.grad = None

    def set_hypers(self, **kwargs):
        if 'lr' in kwargs:
            self.lr = kwargs['lr']
            self.hypers[0]['lr'] = kwargs['lr']
            
optimizer = BasicOptimizer(model.parameters(), lr=1)

In [9]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            print(f"BATCH: {batch} of {size/batch_size} batches")
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


In [11]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    

In [15]:
dls = DataLoaders(train_dataloader, test_dataloader)

In [80]:
class CustomAccuracy1(Metric):
    def __init__(self):
        self.correct = 0
        self.total = 0

    def reset(self):
        self.correct = 0
        self.total = 0

    def accumulate(self, learn):
        # Get max probability of the final dimension
        preds = learn.pred.argmax(dim=-1)
        self.correct += (preds == learn.y).sum().item()
        self.total += len(learn.y)

    @property
    def value(self):
        return (self.correct / self.total) * 100 if self.total > 0 else None

    @property
    def name(self):
        return "meowMeow1"

In [81]:
class CustomAccuracy2(Metric):
    def __init__(self):
        self.correct = 0
        self.total = 0

    def reset(self):
        self.correct = 0
        self.total = 0

    def accumulate(self, learn):
        # Get max probability of the final dimension
        preds = learn.pred.argmax(dim=-1)
        self.correct += (preds == learn.y).sum().item()
        self.total += len(learn.y)

    @property
    def value(self):
        return (self.correct / self.total) * 100 if self.total > 0 else None

    @property
    def name(self):
        return "meowMeow2"

In [82]:
class MixAccuracy(Metric):
    def __init__(self):
        self.c1 = CustomAccuracy1()
        self.c2 = CustomAccuracy2()

    def reset(self):
        self.c1.reset()
        self.c2.reset()

    def accumulate(self, learn):
        self.c1.accumulate(learn)
        self.c2.accumulate(learn)

    @property
    def value(self):
        return self.c1.value, self.c2.value

    @property
    def name(self):
        return f"{self.c1.name}_{self.c2.name}"

In [83]:
learner = Learner(dls, model, opt_func=BasicOptimizer, loss_func=loss_fn, metrics=MixAccuracy)

In [85]:
df = learner.fit(2, lr=1e-2)
df

epoch,train_loss,valid_loss,meowMeow1_meowMeow2,time
0,0.339499,0.337384,"(90.41, 90.41)",00:06
1,0.328377,0.326924,"(90.64999999999999, 90.64999999999999)",00:05


In [26]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
BATCH: 0 of 234.375 batches
loss: 0.000407  [  256/60000]
BATCH: 100 of 234.375 batches
loss: 0.000389  [25856/60000]
BATCH: 200 of 234.375 batches
loss: 0.000223  [51456/60000]
Test Error: 
 Accuracy: 98.3%, Avg loss: 0.091710 

Epoch 2
-------------------------------
BATCH: 0 of 234.375 batches
loss: 0.000387  [  256/60000]
BATCH: 100 of 234.375 batches
loss: 0.000370  [25856/60000]
BATCH: 200 of 234.375 batches
loss: 0.000211  [51456/60000]
Test Error: 
 Accuracy: 98.3%, Avg loss: 0.092077 

Epoch 3
-------------------------------
BATCH: 0 of 234.375 batches
loss: 0.000368  [  256/60000]
BATCH: 100 of 234.375 batches
loss: 0.000347  [25856/60000]
BATCH: 200 of 234.375 batches
loss: 0.000199  [51456/60000]
Test Error: 
 Accuracy: 98.3%, Avg loss: 0.092462 

Epoch 4
-------------------------------
BATCH: 0 of 234.375 batches
loss: 0.000351  [  256/60000]
BATCH: 100 of 234.375 batches
loss: 0.000332  [25856/60000]
BATCH: 200 of 234.375 batches
lo