<a href="https://colab.research.google.com/github/LukeWeidenwalker/ai-notebooks/blob/master/fastai_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install -Uqq fastbook
import fastbook 
fastbook.setup_book()

[K     |████████████████████████████████| 727kB 5.6MB/s 
[K     |████████████████████████████████| 194kB 29.9MB/s 
[K     |████████████████████████████████| 51kB 7.4MB/s 
[K     |████████████████████████████████| 1.2MB 29.1MB/s 
[K     |████████████████████████████████| 61kB 8.0MB/s 
[?25hMounted at /content/gdrive


In [51]:
from fastai.vision.all import *
from fastbook import *
from time import time


In [145]:
torch.random.manual_seed(42);
torch.set_printoptions(sci_mode=False)

In [146]:
path = untar_data(URLs.MNIST)
Path.BASE_PATH = path

In [147]:
path.ls()

(#2) [Path('training'),Path('testing')]

In [148]:
digits = DataBlock(blocks=(ImageBlock(cls=PILImageBW), CategoryBlock),
                   get_items=get_image_files,
                   splitter=GrandparentSplitter(train_name='training', valid_name='testing'),
                   get_y=parent_label)

In [149]:
dls = digits.dataloaders(path)

In [150]:
dls.one_batch()[0].shape, dls.one_batch()[1]

((64, 1, 28, 28),
 TensorCategory([3, 2, 1, 3, 7, 3, 5, 0, 2, 5, 2, 1, 1, 3, 7, 3, 1, 1, 2, 7, 6, 1, 9, 8, 3, 0, 1, 4, 0, 0, 0, 1, 2, 8, 3, 7, 3, 6, 7, 6, 5, 8, 2, 2, 8, 0, 7, 0, 7, 3, 1, 1, 1, 0, 4, 2, 1, 1, 0, 1, 8, 8, 1, 8],
        device='cuda:0'))

In [151]:
dls.valid.one_batch()[0][0].shape

(1, 28, 28)

# Model

In [152]:
def softmax(x): 
  return torch.exp(x) / torch.exp(x).sum(dim=1, keepdim=True)

In [153]:
def neg_log_likelihood(x, targ):
  return (-x[range(len(targ)), targ])

In [163]:
def cross_entropy_loss(acts, targ, reduction="mean"):
  preds = torch.log(softmax(acts))
  return neg_log_likelihood(preds, targ).mean() if reduction=='mean' else neg_log_likelihood(preds, targ)

In [155]:
acts = torch.randn((6,2))*2
target = tensor([0, 1, 1, 0, 0, 1])

print(neg_log_likelihood(log_sm, target).mean().item(), F.nll_loss(log_sm, target, reduction='mean').item())
print(cross_entropy_loss(acts, target).mean().item(), F.cross_entropy(acts, target, reduction='mean').item())

0.38729965686798096 0.38729965686798096
1.7651389837265015 1.7651389837265015


The torch [docs](https://pytorch.org/docs/stable/nn.functional.html#log-softmax) explain that in practise doing softmax() followed by log() is slower and numerically unstable. Note to myself to checkout F.log_softmax() to see what they are doing differently when I'm further into the course.

In [156]:
def batch_accuracy(preds, yb):
  preds = preds.argmax(dim=1)
  correct = preds == yb
  return correct.float().mean()

test_xb = tensor([[0.2, 0.2, 0.6], [0.1, 0.8, 0.1], [0.3, 0.3, 0.4]])
test_yb = tensor([2, 1, 0])
batch_accuracy(test_xb, test_yb)

tensor(0.6667)

In [157]:
class BasicOptimiser:
  def __init__(self, params, lr):
    self.params,self.lr = list(params),lr

  def step(self):
    for p in self.params:
      p.data -= p.grad.data * self.lr

  def zero_grad(self):
    for p in self.params:
      p.grad = None

In [158]:
class BasicSigmoid:
  def __call__(self, xb):
    return xb.sigmoid()

  def parameters(self):
    return []

In [159]:
class BasicLearner:
  def __init__(self, dls: DataLoaders, model, opt_func, loss_function, batch_accuracy):
    self.dls = dls # type: DataLoaders
    self.model = model
    self.opt_func = opt_func(model.parameters(), 0.03)
    self.loss_function = loss_function
    self.batch_accuracy = batch_accuracy

  def validate_epoch(self):
    accs = [self.batch_accuracy(self.model(xb), yb) for xb, yb in self.dls.valid]
    return round(torch.stack(accs).mean().item(), 4)

  def fit(self, epochs):
    for epoch in range(epochs):
      accs = []
      start_time = time()
      for xb, yb in self.dls.train:
        preds = self.model(xb)
        loss = self.loss_function(preds, yb)
        loss.backward()
        self.opt_func.step()
        self.opt_func.zero_grad()
      print(f"Epoch {epoch}, Accuracy: {self.validate_epoch()}, took {time() - start_time:.2f}s")

  def pred(self, xb):
    return self.model(xb).argmax(dim=1)

In [160]:
simple_net = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30,10)
)
simple_net.to(torch.cuda.current_device())

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=30, bias=True)
  (2): ReLU()
  (3): Linear(in_features=30, out_features=10, bias=True)
)

moving the model to GPU is usually done by fastai 

In [164]:
learner = BasicLearner(dls, simple_net, BasicOptimiser, cross_entropy_loss, batch_accuracy)
learner.fit(2)

Epoch 0, Accuracy: 0.9068, took 74.17s
Epoch 1, Accuracy: 0.9162, took 72.13s


In [37]:
valid_im, valid_label = dls.valid.one_batch()[0][4], dls.valid.one_batch()[1][4]
prediction = learner.pred(valid_im)
dls.vocab[prediction], dls.vocab[valid_label]

((#1) ['7'], '7')

# Experiments

What about using the median loss, rather than the mean?

In [None]:
def mnist_loss_median(preds, target):
  return torch.where(target==1, 1-preds, preds).median()

In [None]:
model = SimpleNet(28*28, 1)
learner_exp1 = BasicLearner(dls, model, BasicOptimiser(model.parameters(), 0.13), mnist_loss_median, batch_accuracy)
learner_exp1.fit(10)

Epoch 0, Accuracy: 0.5068
Epoch 1, Accuracy: 0.5068
Epoch 2, Accuracy: 0.5068
Epoch 3, Accuracy: 0.5068
Epoch 4, Accuracy: 0.5166
Epoch 5, Accuracy: 0.5552
Epoch 6, Accuracy: 0.6074
Epoch 7, Accuracy: 0.6519
Epoch 8, Accuracy: 0.7769
Epoch 9, Accuracy: 0.8369


Answer: It just trains slower (but still converges at some point in this case)

## Bonus: Weights & Biases