In [1]:
!pip install -Uqq fastbook
import fastbook 
fastbook.setup_book()

[?25l[K     |▌                               | 10kB 30.2MB/s eta 0:00:01[K     |█                               | 20kB 33.5MB/s eta 0:00:01[K     |█▍                              | 30kB 36.6MB/s eta 0:00:01[K     |█▉                              | 40kB 40.6MB/s eta 0:00:01[K     |██▎                             | 51kB 41.2MB/s eta 0:00:01[K     |██▊                             | 61kB 42.5MB/s eta 0:00:01[K     |███▏                            | 71kB 42.9MB/s eta 0:00:01[K     |███▋                            | 81kB 28.1MB/s eta 0:00:01[K     |████                            | 92kB 28.9MB/s eta 0:00:01[K     |████▌                           | 102kB 29.4MB/s eta 0:00:01[K     |█████                           | 112kB 29.4MB/s eta 0:00:01[K     |█████▌                          | 122kB 29.4MB/s eta 0:00:01[K     |██████                          | 133kB 29.4MB/s eta 0:00:01[K     |██████▍                         | 143kB 29.4MB/s eta 0:00:01[K     |██████▉      

In [2]:
from fastai.vision.all import *
from fastbook import *
from time import time

In [3]:
path = untar_data(URLs.MNIST)
Path.BASE_PATH = path

In [4]:
path.ls()

(#2) [Path('training'),Path('testing')]

In [27]:
digits = DataBlock(blocks=(ImageBlock(cls=PILImageBW), CategoryBlock),
                   get_items=get_image_files,
                   splitter=GrandparentSplitter(train_name='training', valid_name='testing'),
                   get_y=parent_label)

In [28]:
dls = digits.dataloaders(path)

In [29]:
dls.one_batch()[0].shape, dls.one_batch()[1]

((64, 1, 28, 28),
 TensorCategory([1, 7, 8, 3, 7, 5, 3, 5, 2, 5, 4, 2, 1, 4, 8, 0, 9, 8, 3, 5, 5, 7, 6, 3, 3, 6, 1, 8, 7, 7, 1, 8, 9, 0, 6, 0, 3, 4, 0, 8, 1, 1, 6, 9, 4, 5, 6, 8, 1, 1, 9, 7, 4, 1, 4, 8, 3, 6, 1, 5, 7, 5, 7, 6],
        device='cuda:0'))

In [30]:
dls.valid.one_batch()[0][0].shape

(1, 28, 28)

# Model

In [31]:
def mnist_loss(preds, target):
  return torch.where(target==1, 1-preds, preds).mean()

preds = tensor([0.2, 0.6, 0.3])
target = tensor([0, 1, 1])
mnist_loss(preds, target)

tensor(0.4333)

In [32]:
def batch_accuracy(preds, yb):
  preds = preds.argmax(dim=1)
  correct = preds == yb
  return correct.float().mean()

test_xb = tensor([[0.2, 0.2, 0.6], [0.1, 0.8, 0.1], [0.3, 0.3, 0.4]])
test_yb = tensor([2, 1, 0])
batch_accuracy(test_xb, test_yb)

tensor(0.6667)

In [33]:
class BasicOptimiser:
  def __init__(self, params, lr):
    self.params,self.lr = list(params),lr

  def step(self):
    for p in self.params:
      p.data -= p.grad.data * self.lr

  def zero_grad(self):
    for p in self.params:
      p.grad = None

In [34]:
class BasicSigmoid:
  def __call__(self, xb):
    return xb.sigmoid()

  def parameters(self):
    return []

In [35]:
class BasicLearner:
  def __init__(self, dls: DataLoaders, model, opt_func, loss_function, batch_accuracy):
    self.dls = dls # type: DataLoaders
    self.model = model
    self.opt_func = opt_func(model.parameters(), 0.03)
    self.loss_function = loss_function
    self.batch_accuracy = batch_accuracy

  def validate_epoch(self):
    accs = [self.batch_accuracy(self.model(xb), yb) for xb, yb in self.dls.valid]
    return round(torch.stack(accs).mean().item(), 4)

  def fit(self, epochs):
    for epoch in range(epochs):
      accs = []
      start_time = time()
      batch_counter = 1
      for xb, yb in self.dls.train:
        print(f"Epoch {epoch}, batch {batch_counter}", end="\r")
        batch_counter += 1
        preds = self.model(xb)
        loss = self.loss_function(preds, yb)
        loss.backward()
        self.opt_func.step()
        self.opt_func.zero_grad()
      print(f"Epoch {epoch}, Accuracy: {self.validate_epoch()}, took {time() - start_time:.2f}s")

  def pred(self, xb):
    return self.model(xb).argmax(dim=1)

In [36]:
simple_net = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30,10)
)
simple_net.to(torch.cuda.current_device())

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=30, bias=True)
  (2): ReLU()
  (3): Linear(in_features=30, out_features=10, bias=True)
)

moving the model to GPU is usually done by fastai 

In [37]:
learner = BasicLearner(dls, simple_net, BasicOptimiser, F.cross_entropy, batch_accuracy)
learner.fit(2)

Epoch 0, Accuracy: 0.9011, took 53.84s
Epoch 1, Accuracy: 0.9121, took 53.98s


In [17]:
torch.cuda.get_device_name(0), torch.cuda.current_device()

('Tesla T4', 0)

In [37]:
valid_im, valid_label = dls.valid.one_batch()[0][4], dls.valid.one_batch()[1][4]
prediction = learner.pred(valid_im)
dls.vocab[prediction], dls.vocab[valid_label]

((#1) ['7'], '7')

# Experiments

What about using the median loss, rather than the mean?

In [None]:
def mnist_loss_median(preds, target):
  return torch.where(target==1, 1-preds, preds).median()

In [None]:
model = SimpleNet(28*28, 1)
learner_exp1 = BasicLearner(dls, model, BasicOptimiser(model.parameters(), 0.03), mnist_loss_median, batch_accuracy)
learner_exp1.fit(10)

Epoch 0, Accuracy: 0.5068
Epoch 1, Accuracy: 0.5068
Epoch 2, Accuracy: 0.5068
Epoch 3, Accuracy: 0.5068
Epoch 4, Accuracy: 0.5166
Epoch 5, Accuracy: 0.5552
Epoch 6, Accuracy: 0.6074
Epoch 7, Accuracy: 0.6519
Epoch 8, Accuracy: 0.7769
Epoch 9, Accuracy: 0.8369


Answer: It just trains slower (but still converges at some point in this case)

## Bonus: Weights & Biases