In [12]:
import torch
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np


In [13]:
data_path = '../data-unversioned/p1ch7/'

cifar10 = datasets.CIFAR10(
    data_path, train=True, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468), # mean and sd, calculated previously
                             (0.2470, 0.2435, 0.2616))
    ]))


cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

In [14]:
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']
cifar2 = [(img, label_map[label])
          for img, label in cifar10
          if label in [0, 2]]
cifar2_val = [(img, label_map[label])
              for img, label in cifar10_val
              if label in [0, 2]]

Neural net:

In [15]:
import torch.nn as nn

n_out = 2

model = nn.Sequential(
    nn.Linear(3072, 512), # arbitrary 512 hiddden features
    nn.Tanh(),
    nn.Linear(512, n_out),
    nn.Softmax(dim=1)
)

We can try running the untrained model on an image:

In [18]:
img, _ = cifar2[0]

img.shape

torch.Size([3, 32, 32])

Convert image to vector, and then add an extra dimension

In [20]:
img_batch = img.view(-1).unsqueeze(0)

model(img_batch)

tensor([[0.3697, 0.6303]], grad_fn=<SoftmaxBackward0>)

To use the NLL, we have to take the log ourselves:

In [None]:
model = nn.Sequential(
    nn.Linear(3072, 512), # arbitrary 512 hiddden features
    nn.Tanh(),
    nn.Linear(512, 2),
    nn.LogSoftmax(dim=1)
)

loss = nn.NLLLoss()

## Training

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
 
model = nn.Sequential(
            nn.Linear(3072, 512),
            nn.Tanh(),
            nn.Linear(512, 2),
            nn.LogSoftmax(dim=1))
 
learning_rate = 1e-2
 
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
 
loss_fn = nn.NLLLoss()
 
n_epochs = 100
 
for epoch in range(n_epochs):
    for img, label in cifar2:
        out = model(img.view(-1).unsqueeze(0))
        loss = loss_fn(out, torch.tensor([label]))
 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
 
    print("Epoch: %d, Loss: %f" % (epoch, float(loss)))

Here we had a batch size of 1. However, using dataloaders, we can define our own batch size, and introduce shuffling

In [25]:
import torch
import torch.nn as nn
import torch.optim as optim

train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)

"""model = nn.Sequential(
            nn.Linear(3072, 512),
            nn.Tanh(),
            nn.Linear(512, 2),
            nn.LogSoftmax(dim=1))
"""

model = nn.Sequential(
            nn.Linear(3072, 1024),
            nn.Tanh(),
            nn.Linear(1024, 512),
            nn.Tanh(),
            nn.Linear(512, 128),
            nn.Tanh(),
            nn.Linear(128, 2),
            nn.LogSoftmax(dim=1))

learning_rate = 1e-2

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

loss_fn = nn.NLLLoss()

n_epochs = 100
 
for epoch in range(n_epochs):
    for imgs, labels in train_loader:
        batch_size = imgs.shape[0]
        outputs = model(imgs.view(batch_size, -1))
        loss = loss_fn(outputs, labels)
 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
 
    print("Epoch: %d, Loss: %f" % (epoch, float(loss)))

Epoch: 0, Loss: 0.440426
Epoch: 1, Loss: 0.454341
Epoch: 2, Loss: 0.415200
Epoch: 3, Loss: 0.594881
Epoch: 4, Loss: 0.682169
Epoch: 5, Loss: 0.328828
Epoch: 6, Loss: 0.351497
Epoch: 7, Loss: 0.395594
Epoch: 8, Loss: 0.658749
Epoch: 9, Loss: 0.179220
Epoch: 10, Loss: 0.684045
Epoch: 11, Loss: 0.427414
Epoch: 12, Loss: 0.553806
Epoch: 13, Loss: 0.272633
Epoch: 14, Loss: 0.519202
Epoch: 15, Loss: 0.444048
Epoch: 16, Loss: 0.350012
Epoch: 17, Loss: 0.439343
Epoch: 18, Loss: 0.372027
Epoch: 19, Loss: 0.317420
Epoch: 20, Loss: 0.228976
Epoch: 21, Loss: 0.196509
Epoch: 22, Loss: 0.356990
Epoch: 23, Loss: 0.208268
Epoch: 24, Loss: 0.274760
Epoch: 25, Loss: 0.372228
Epoch: 26, Loss: 0.191447
Epoch: 27, Loss: 0.182893
Epoch: 28, Loss: 0.412579
Epoch: 29, Loss: 0.181992
Epoch: 30, Loss: 0.212724
Epoch: 31, Loss: 0.038079
Epoch: 32, Loss: 0.322014
Epoch: 33, Loss: 0.493585
Epoch: 34, Loss: 0.203102
Epoch: 35, Loss: 0.368076
Epoch: 36, Loss: 0.122943
Epoch: 37, Loss: 0.051233
Epoch: 38, Loss: 0.219

In [23]:
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64,
                                         shuffle=False)
 
correct = 0
total = 0
 
with torch.no_grad():
    for imgs, labels in val_loader:
        batch_size = imgs.shape[0]
        outputs = model(imgs.view(batch_size, -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
 
print("Accuracy: %f", correct / total)

Accuracy: %f 0.8075


In [24]:
model = nn.Sequential(
            nn.Linear(3072, 1024),
            nn.Tanh(),
            nn.Linear(1024, 512),
            nn.Tanh(),
            nn.Linear(512, 128),
            nn.Tanh(),
            nn.Linear(128, 2),
            nn.LogSoftmax(dim=1))