In [1]:
## Input necessary from chapter 7

from torchvision import datasets

data_path = './data'
cifar10 = datasets.CIFAR10(data_path, train=True, download=True)
cifar10_val = datasets.CIFAR10(data_path, train=False, download=True)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 30260747.37it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [2]:
from torchvision import transforms

transformed_cifar10 = datasets.CIFAR10(
    data_path, train=True, download=False, transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.4914, 0.4822, 0.4465),
                                                                                                (0.2470, 0.2435, 0.2616))])
)

transformed_cifar10_val = datasets.CIFAR10(data_path, train=False, transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.4942, 0.4851, 0.4504),
                                                                                                (0.2467, 0.2429, 0.2616))])
)

In [3]:
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']
cifar2 = [(img, label_map[label]) for img, label in cifar10 if label in [0, 2]]
cifar2_val = [(img, label_map[label]) for img, label in cifar10_val if label in [0, 2]]
transformed_cifar2 = [ (img, label_map[label]) for img, label in transformed_cifar10 if label in [0,2]]
transformed_cifar2_val = [ (img, label_map[label]) for img, label in transformed_cifar10_val if label in [0,2]]

In [4]:
## Build the custom nn.Module

import torch.nn as nn
import torch.nn.functional as F
import torch

class Net(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3,16, kernel_size=3, padding=1)
    self.conv2 = nn.Conv2d(16,8, kernel_size=3, padding=1)
    self.fc1 = nn.Linear(8*8*8,32)
    self.fc2 = nn.Linear(32,2)

# Since the nn.MaxPool2d and the nn.Tanh dont have any parameters that need to be optimized,
# I can call them directly in the forawrd pass using their functional counterparts.

  def forward(self,x):
    out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
    out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
    out = out.view(-1,8*8*8)
    out = torch.tanh(self.fc1(out))
    out = self.fc2(out)
    return out


In [30]:
model = Net()
img, _ = transformed_cifar2[0]
model(img.unsqueeze(0))

tensor([[-2.4826,  2.1013]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [8]:
import datetime

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
  for epoch in range(1,n_epochs+1):
    loss_train = 0.0
    for imgs, labels in train_loader:
      outputs = model(imgs)
      loss = loss_fn(outputs,labels)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      loss_train += loss.item()

    if epoch == 1 or epoch % 10 == 0:
      print('{} Epoch {}, Training Loss {}'.format(datetime.datetime.now(), epoch, loss_train / len(train_loader)))



In [9]:
import torch.optim as optim

train_loader = torch.utils.data.DataLoader(transformed_cifar2, batch_size=65, shuffle=True)
model = Net()
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 50,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader
)


2024-01-08 14:05:59.988477 Epoch 1, Training Loss 0.5617305110027264
2024-01-08 14:06:31.444145 Epoch 10, Training Loss 0.33483558912555894
2024-01-08 14:07:07.049864 Epoch 20, Training Loss 0.30141888094412816
2024-01-08 14:07:42.493731 Epoch 30, Training Loss 0.27469871580213695
2024-01-08 14:08:17.640753 Epoch 40, Training Loss 0.25466032872919914
2024-01-08 14:08:53.367949 Epoch 50, Training Loss 0.2362552567735895


In [10]:
val_loader = torch.utils.data.DataLoader(transformed_cifar2_val, batch_size=64, shuffle=False)

def validate(model, train_loader, val_loader):
  for name, loader in [("train", train_loader), ("val", val_loader)]:
    correct = 0
    total = 0
    with torch.no_grad():
      for imgs, labels in loader:
        outputs = model(imgs)
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
    print("Accuracy {}: {:.2f}".format(name, correct / total))

In [11]:
validate(model,train_loader, val_loader)

Accuracy train: 0.90
Accuracy val: 0.88


In [None]:
# Save the model to a file

# torch.save(model.state_dict(), data_path + 'birds_vs_airplanes.pt')

In [12]:
# Training on the GPU
import torch

device = (torch.device('cuda') if torch.cuda.is_available()
            else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda.


In [13]:
import datetime
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
  for epoch in range(1, n_epochs+1):
    loss_train = 0.0

    for imgs, labels in train_loader:
      imgs = imgs.to(device = device)
      labels = labels.to(device = device)
      outputs = model(imgs)
      loss = loss_fn(outputs, labels)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      loss_train += loss.item()
    if epoch == 1 or epoch % 10 == 0:
      print('{} Epoch {}, Training Loss {}'.format(datetime.datetime.now(), epoch, loss_train / len(train_loader)))


In [14]:
import torch.optim as optim

train_loader = torch.utils.data.DataLoader(transformed_cifar2, batch_size=64, shuffle=True)

model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(), lr =1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader
)

2024-01-08 14:09:50.959421 Epoch 1, Training Loss 0.5767048466357456
2024-01-08 14:09:54.059671 Epoch 10, Training Loss 0.33085323832217295
2024-01-08 14:09:57.443540 Epoch 20, Training Loss 0.2915463439028734
2024-01-08 14:10:01.319048 Epoch 30, Training Loss 0.2676542077664357
2024-01-08 14:10:04.691325 Epoch 40, Training Loss 0.2469455377691111
2024-01-08 14:10:08.052451 Epoch 50, Training Loss 0.22989912131789383
2024-01-08 14:10:11.533698 Epoch 60, Training Loss 0.21591406016592768
2024-01-08 14:10:15.347576 Epoch 70, Training Loss 0.20158522262884554
2024-01-08 14:10:18.776917 Epoch 80, Training Loss 0.1893925824362761
2024-01-08 14:10:22.162182 Epoch 90, Training Loss 0.17792314547262375
2024-01-08 14:10:25.861618 Epoch 100, Training Loss 0.16499218147746317


In [15]:
val_loader = torch.utils.data.DataLoader(transformed_cifar2_val, batch_size=64, shuffle=False)

def validate(model, train_loader, val_loader):
  for name, loader in [("train", train_loader), ("val", val_loader)]:
    correct = 0
    total = 0
    with torch.no_grad():
      for imgs, labels in loader:
        imgs = imgs.to(device =device)
        labels = labels.to(device=device)
        outputs = model(imgs)
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())
    print("Accuracy {}: {:.2f}".format(name, correct / total))

In [16]:
validate(model,train_loader, val_loader)

Accuracy train: 0.93
Accuracy val: 0.88


NOTE: PyTorch will attempt
to load the weight to the same device it was saved from—that is, weights on the GPU will be restored to the GPU. As we don’t know whether we want the same device, we have two options: we could move the network to the CPU before saving it, or move it back after restoring. It is a bit more concise to instruct PyTorch to override the device information when loading weights. This is done by passing the map_location keyword argument to torch.load:

In [None]:
# loaded_model = Net().to(device=device)
# loaded_model.load_state_dict(torch.load(data_path + 'birds_vs_airplanes.pt', map_location=device))

In [None]:
# Regularization

def training_loop_l2reg(n_epochs, optimizer, model, loss_fn, train_loader):
  for epoch in range(1,n_epochs+1):
    loss_train = 0.0
    for imgs, labels in train_loader:
      imgs = imgs.to(device=device)
      labels = labels.to(device=device)
      outputs = model(imgs)
      loss = loss_fn(outputs,labels)

      l2_lambda = 0.001
      l2_norm = sum(p.pow(2.0).sum for p in model.parameters())
      loss = loss + l2_lambda * l2_norm

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      loss_train += loss.item()

    if epoch == 1 or epoch % 10 == 0:
      print('{} Epoch {}, Training Loss {}'.format(datetime.datetime.now(), epoch, loss_train / len(train_loader)))



However, the SGD optimizer in PyTorch already has a weight_decay parameter that corresponds to 2 * lambda, and it directly performs weight decay during the update as described previously. It is fully equivalent to adding the L2 norm of weights to the loss, without the need for accumulating terms in the loss and involving autograd

# Ex. 1

In [None]:
class MyNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3,16, kernel_size=5, padding=1)
    self.conv2 = nn.Conv2d(16,8, kernel_size=5, padding=1)
    self.fc1 = nn.Linear(8*6*6,32)
    self.fc2 = nn.Linear(32,2)

  def forward(self,x):
    out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
    out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
    out = out.view(-1,8*6*6)
    out = torch.tanh(self.fc1(out))
    out = self.fc2(out)
    return out

# Changing the kernel_size changes the output size of the images. In order to make the NN work I need to change the size of the input tensor to the linear fully connected layer.


In [None]:
model = MyNet()
img, _ = transformed_cifar2[0]
model(img.unsqueeze(0))

tensor([[ 0.0380, -0.0108]], grad_fn=<AddmmBackward0>)

In [None]:
import torch.optim as optim

train_loader = torch.utils.data.DataLoader(transformed_cifar2, batch_size=64, shuffle=True)

model = MyNet().to(device=device)
optimizer = optim.SGD(model.parameters(), lr =1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader
)


2024-01-08 08:25:17.903628 Epoch 1, Training Loss 0.5558322288428142
2024-01-08 08:25:21.109386 Epoch 10, Training Loss 0.32513423074202935
2024-01-08 08:25:24.660363 Epoch 20, Training Loss 0.2804817344257786
2024-01-08 08:25:28.216733 Epoch 30, Training Loss 0.25238952624380206
2024-01-08 08:25:32.055823 Epoch 40, Training Loss 0.22156331830533446
2024-01-08 08:25:35.529250 Epoch 50, Training Loss 0.20798351482790747
2024-01-08 08:25:38.971716 Epoch 60, Training Loss 0.18379476761362354
2024-01-08 08:25:42.834171 Epoch 70, Training Loss 0.16878397079410068
2024-01-08 08:25:46.352032 Epoch 80, Training Loss 0.1539767455475725
2024-01-08 08:25:49.840236 Epoch 90, Training Loss 0.13467178892961162
2024-01-08 08:25:53.472307 Epoch 100, Training Loss 0.12318216368650935


In [None]:
validate(model,train_loader, val_loader)
## Changing the kernel size seems to slightly worsen the validation accuracy

Accuracy train: 0.94
Accuracy val: 0.87


In [None]:
# Changing the kernel_size = (1,3) will lead to output tensors that are not squares. Basically the H_out and W_out will be different.
# Will need to change the input dim that the first fully connected layer is designed to receive.
class MyNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3,16, kernel_size=(1,3), padding=1)
    self.conv2 = nn.Conv2d(16,8, kernel_size=(1,3), padding=1)
    self.fc1 = nn.Linear(8*9*8,32)
    self.fc2 = nn.Linear(32,2)

  def forward(self,x):
    out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
    out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
    out = out.view(-1,8*9*8)
    out = torch.tanh(self.fc1(out))
    out = self.fc2(out)
    return out

In [None]:
model = MyNet()
img, label = transformed_cifar2[0]
model(img.unsqueeze(0))

tensor([[ 0.0964, -0.1005]], grad_fn=<AddmmBackward0>)

In [None]:
model = MyNet().to(device=device)
optimizer = optim.SGD(model.parameters(), lr =1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader
)

2024-01-08 08:44:15.619506 Epoch 1, Training Loss 0.5869740634966808
2024-01-08 08:44:18.874720 Epoch 10, Training Loss 0.3718656675451121
2024-01-08 08:44:22.375566 Epoch 20, Training Loss 0.32597980605568855
2024-01-08 08:44:26.330329 Epoch 30, Training Loss 0.306542256170777
2024-01-08 08:44:29.863299 Epoch 40, Training Loss 0.2884918229215464
2024-01-08 08:44:33.351786 Epoch 50, Training Loss 0.272084216877913
2024-01-08 08:44:37.064456 Epoch 60, Training Loss 0.25782877031215434
2024-01-08 08:44:40.808807 Epoch 70, Training Loss 0.24317903418070191
2024-01-08 08:44:44.328194 Epoch 80, Training Loss 0.23007660007970349
2024-01-08 08:44:47.862762 Epoch 90, Training Loss 0.2154793070664831
2024-01-08 08:44:51.811075 Epoch 100, Training Loss 0.2010889251710503


In [None]:
validate(model,train_loader, val_loader)
# it does better in validation accuracy ?!

Accuracy train: 0.91
Accuracy val: 0.88


# Ex. 2

In [20]:
class_names = cifar10.classes
print(class_names)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [23]:
class_names.remove('airplane')
class_names.remove('bird')
print(class_names)

['automobile', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


In [24]:
# Gather all images that are NOT birds or airplanes and then feed them through the network and check if anything comes back as more than 95% an airplane or bird
label_map = {1 : 0, 3:1, 4:2, 5:3 , 6:4, 7:5, 8:6, 9:7, 10:8}
cifar8 = [(img, label_map[label]) for img, label in cifar10 if label not in [0, 2]]
cifar8_val = [(img, label_map[label]) for img, label in cifar10_val if label not in [0, 2]]
transformed_cifar8 = [ (img, label_map[label]) for img, label in transformed_cifar10 if label not in [0,2]]
transformed_cifar8_val = [ (img, label_map[label]) for img, label in transformed_cifar10_val if label not in [0,2]]

In [25]:
img, label = transformed_cifar8[0]
img = img.to(device=device)
test_out = model(img.unsqueeze(0))
test_out, test_out.shape, test_out[0,0], test_out[0,1]

(tensor([[-0.7370, -0.0057]], device='cuda:0', grad_fn=<AddmmBackward0>),
 torch.Size([1, 2]),
 tensor(-0.7370, device='cuda:0', grad_fn=<SelectBackward0>),
 tensor(-0.0057, device='cuda:0', grad_fn=<SelectBackward0>))

In [26]:
for i in range(0,len(transformed_cifar8)+1):
  img, l = transformed_cifar8[i]
  img = img.to(device=device)
  out = model(img)
  if out[0,0] >= 0.95 or out[0,1] >= 0.95:
    print('Index {}, Label {}, Model output {} ' .format(i , l , out))
    break


Index 1, Label 7, Model output tensor([[-2.1637,  1.3977]], device='cuda:0', grad_fn=<AddmmBackward0>) 
