In [1]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import torch

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x28e70923730>

In [2]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

In [3]:
from torchvision import datasets, transforms
data_path = '../data-unversioned/p1ch7/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

In [4]:
cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

In [5]:
label_map = {6: 0, 7: 1, 8: 2, 9:3}
class_names = ['frog', 'horse', 'ship', "truck"]
cifar2 = [(img, label_map[label])
          for img, label in cifar10 
          if label in [6, 7, 8, 9]]
cifar2_val = [(img, label_map[label])
              for img, label in cifar10_val
              if label in [6, 7, 8, 9]]

In [6]:
import torch.nn as nn

n_out = 2

model = nn.Sequential(
            nn.Linear(
                3072,  # <1>
                512,   # <2>
            ),
            nn.Tanh(),
            nn.Linear(
                512,   # <2>
                n_out, # <3>
            )
        )

In [7]:
def softmax(x):
    return torch.exp(x) / torch.exp(x).sum()

In [8]:
x = torch.tensor([1.0, 2.0, 3.0])

softmax(x)

tensor([0.0900, 0.2447, 0.6652])

In [9]:
softmax(x).sum()

tensor(1.)

In [10]:
softmax = nn.Softmax(dim=1)

x = torch.tensor([[1.0, 2.0, 3.0],
                  [1.0, 2.0, 3.0]])

softmax(x)

tensor([[0.0900, 0.2447, 0.6652],
        [0.0900, 0.2447, 0.6652]])

In [11]:
model = nn.Sequential(
            nn.Linear(3072, 1024),
            nn.Tanh(),
            nn.Linear(1024, 512),
            nn.Tanh(),
            nn.Linear(512, 128),
            nn.Tanh(),
            nn.Linear(128, 2))

loss_fn = nn.CrossEntropyLoss()

In [12]:
out = torch.tensor([
    [0.6, 0.4],
    [0.9, 0.1],
    [0.3, 0.7],
    [0.2, 0.8],
])
class_index = torch.tensor([0, 0, 1, 1]).unsqueeze(1)

truth = torch.zeros((4,2))
truth.scatter_(dim=1, index=class_index, value=1.0)
truth

tensor([[1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.]])

In [13]:
def mse(out):
    return ((out - truth) ** 2).sum(dim=1).mean()
mse(out)

tensor(0.1500)

In [14]:
out.gather(dim=1, index=class_index)

tensor([[0.6000],
        [0.9000],
        [0.7000],
        [0.8000]])

In [15]:
def likelihood(out):
    prod = 1.0
    for x in out.gather(dim=1, index=class_index):
        prod *= x
    return prod

likelihood(out) #skipped

tensor([0.3024])

In [16]:
def neg_log_likelihood(out):
    return -likelihood(out).log()

neg_log_likelihood(out) #skipped steps

tensor([1.1960])

In [17]:
softmax = nn.Softmax(dim=1)

log_softmax = nn.LogSoftmax(dim=1)

x = torch.tensor([[0.0, 104.0]])

softmax(x)

tensor([[0., 1.]])

In [18]:
torch.log(softmax(x))

tensor([[-inf, 0.]])

In [19]:
log_softmax(x)

tensor([[-104.,    0.]])

In [20]:
torch.exp(log_softmax(x))

tensor([[0., 1.]])

In [21]:
model = nn.Sequential(
            nn.Linear(3072, 1024),
            nn.Tanh(),
            nn.Linear(1024, 512),
            nn.Tanh(),
            nn.Linear(512, 128),
            nn.Tanh(),
            nn.Linear(128, 2))

loss_fn = nn.CrossEntropyLoss()

In [22]:
loss = nn.NLLLoss()

In [23]:
img, label = cifar2[0]

out = model(img.view(-1).unsqueeze(0))

loss(out, torch.tensor([label]))

tensor(0.0506, grad_fn=<NllLossBackward>)

In [24]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)

In [25]:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)

In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import time

start= time.time()

train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=True)

model = nn.Sequential(
            nn.Linear(3072, 1024),
            nn.Tanh(),
            nn.Linear(1024, 512),
            nn.Tanh(),
            nn.Linear(512, 128),
            nn.Tanh(),
            nn.Linear(128, 4))



learning_rate = 1e-2

optimizer = optim.SGD(model.parameters(), lr=learning_rate)

loss_fn = nn.CrossEntropyLoss()

n_epochs = 200
for epoch in range(n_epochs):
    for imgs, labels in train_loader:
        outputs = model(imgs.view(imgs.shape[0], -1))
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print("Epoch: %d, Loss: %f" % (epoch, float(loss)))
    
stop = time.time()
duration = stop - start
print(duration)

Epoch: 0, Loss: 0.763829
Epoch: 1, Loss: 0.612436
Epoch: 2, Loss: 0.582877
Epoch: 3, Loss: 1.117976
Epoch: 4, Loss: 1.109863
Epoch: 5, Loss: 0.617239
Epoch: 6, Loss: 0.773430
Epoch: 7, Loss: 0.664916
Epoch: 8, Loss: 0.581708
Epoch: 9, Loss: 0.515720
Epoch: 10, Loss: 0.697813
Epoch: 11, Loss: 0.768996
Epoch: 12, Loss: 0.485842
Epoch: 13, Loss: 0.315629
Epoch: 14, Loss: 0.409046
Epoch: 15, Loss: 0.496643
Epoch: 16, Loss: 0.628896
Epoch: 17, Loss: 0.290522
Epoch: 18, Loss: 0.298632
Epoch: 19, Loss: 0.555380
Epoch: 20, Loss: 0.691303
Epoch: 21, Loss: 0.268271
Epoch: 22, Loss: 0.381659
Epoch: 23, Loss: 0.327566
Epoch: 24, Loss: 0.180963
Epoch: 25, Loss: 0.142416
Epoch: 26, Loss: 0.097799
Epoch: 27, Loss: 0.285027
Epoch: 28, Loss: 0.068501
Epoch: 29, Loss: 0.134568
Epoch: 30, Loss: 0.193493
Epoch: 31, Loss: 0.102385
Epoch: 32, Loss: 0.047102
Epoch: 33, Loss: 0.168602
Epoch: 34, Loss: 0.053034
Epoch: 35, Loss: 0.093777
Epoch: 36, Loss: 0.016586
Epoch: 37, Loss: 0.052554
Epoch: 38, Loss: 0.012

In [27]:
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64,
                                           shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in train_loader:
        outputs = model(imgs.view(imgs.shape[0], -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
        
print("Accuracy: %f" % (correct / total))

Accuracy: 1.000000


In [28]:
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64,
                                         shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for imgs, labels in val_loader:
        outputs = model(imgs.view(imgs.shape[0], -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
        
print("Accuracy: %f" % (correct / total))

Accuracy: 0.764250


In [30]:
sum([p.numel() for p in model.parameters()])

3737732