In [2]:
import imageio 
import torch
from PIL import Image
from torchvision import transforms
import numpy as np 
import matplotlib.pyplot as plt 
import torch.nn as nn
import torch.optim as optim

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
from torchvision import datasets
data_path = '../data-unversioned/p1ch7/'
cifar10 = datasets.CIFAR10(data_path, train=True, download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4915, 0.4823, 0.4468),(0.2470, 0.2435, 0.2616))]))
cifar10_val = datasets.CIFAR10(data_path, train=False, download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4915, 0.4823, 0.4468),(0.2470, 0.2435, 0.2616))]))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data-unversioned/p1ch7/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ../data-unversioned/p1ch7/cifar-10-python.tar.gz to ../data-unversioned/p1ch7/
Files already downloaded and verified


In [5]:
import torch.nn.functional as F
class Net(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
            self.act1 = nn.Tanh()
            self.pool1 = nn.MaxPool2d(2)
            self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
            self.act2 = nn.Tanh()
            self.pool2 = nn.MaxPool2d(2)
            self.fc1 = nn.Linear(8*8*8, 32)
            self.act3 = nn.Tanh()
            self.fc2 = nn.Linear(32,10)
            
        def forward(self, x):
            out = self.pool1(self.act1(self.conv1(x)))
            out = self.pool2(self.act2(self.conv2(out)))
            out = out.view(-1, 8*8*8)
            out = self.act3(self.fc1(out))
            out = self.fc2(out)
            return out


In [6]:
import datetime
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
  for epoch in range(1, n_epochs + 1):
    loss_train = 0.0
    for imgs, labels in train_loader:
      imgs = imgs.to(device=device)
      labels = labels.to(device=device)
      outputs = model(imgs)
      loss = loss_fn(outputs, labels)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      loss_train += loss.item()
    print('{} Epoch {}, Training loss {}'.format(
    datetime.datetime.now(), epoch,
    loss_train / len(train_loader)))

In [7]:
def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0
        with torch.no_grad():
            for imgs, labels in loader:
                imgs, labels = imgs.to(device), labels.to(device)
                batchsize = imgs.shape[0]
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1)
                total += labels.shape[0]
                correct += int((predicted == labels).sum())
        print("Accuracy {}: {:.2f}".format(name, correct / total))

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64, shuffle=True)

model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(),lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-30 16:30:58.227099 Epoch 1, Training loss 2.042073919949934
2022-03-30 16:31:14.352645 Epoch 2, Training loss 1.7447231538460384
2022-03-30 16:31:30.294415 Epoch 3, Training loss 1.5877545703097682
2022-03-30 16:31:46.194301 Epoch 4, Training loss 1.4978074417699634
2022-03-30 16:32:02.224197 Epoch 5, Training loss 1.4370534296535775
2022-03-30 16:32:18.189360 Epoch 6, Training loss 1.3837433657835208
2022-03-30 16:32:34.196557 Epoch 7, Training loss 1.3324004486393746
2022-03-30 16:32:50.137744 Epoch 8, Training loss 1.286688561482198
2022-03-30 16:33:06.075392 Epoch 9, Training loss 1.2455877017639483
2022-03-30 16:33:21.943076 Epoch 10, Training loss 1.2086571809428428
2022-03-30 16:33:37.906456 Epoch 11, Training loss 1.1735460141583172
2022-03-30 16:33:54.107778 Epoch 12, Training loss 1.1465314299706608
2022-03-30 16:34:10.308463 Epoch 13, Training loss 1.120724389086599
2022-03-30 16:34:26.406269 Epoch 14, Training loss 1.0982416886502824
2022-03-30 16:34:42.483419 Epoch

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.75
Accuracy val: 0.60


In [None]:
########## Part 2 #############

In [8]:
import torch.nn.functional as F
class Net(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
            self.act1 = nn.Tanh()
            self.pool1 = nn.MaxPool2d(2)
            self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
            self.act2 = nn.Tanh()
            self.pool2 = nn.MaxPool2d(2)
            self.conv3 = nn.Conv2d(8, 3, kernel_size=3, padding=1)
            self.act3 = nn.Tanh()
            self.pool3 = nn.MaxPool2d(2)
            self.fc1 = nn.Linear(8*8*8, 32)
            self.act4 = nn.Tanh()
            self.fc2 = nn.Linear(32,10)
            
        def forward(self, x):
            out = self.pool1(self.act1(self.conv1(x)))
            out = self.pool2(self.act2(self.conv2(out)))
            out = out.view(-1, 8*8*8)
            out = self.act3(self.fc1(out))
            out = self.fc2(out)
            return out


In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64, shuffle=True)

model = Net().to(device=device)
optimizer = optim.SGD(model.parameters(),lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-30 19:57:46.639845 Epoch 1, Training loss 2.0342248437349753
2022-03-30 19:58:04.252934 Epoch 2, Training loss 1.752946306220101
2022-03-30 19:58:21.964344 Epoch 3, Training loss 1.584032882814822
2022-03-30 19:58:39.525112 Epoch 4, Training loss 1.4926144758148876
2022-03-30 19:58:57.294147 Epoch 5, Training loss 1.423058059209448
2022-03-30 19:59:14.961421 Epoch 6, Training loss 1.3588145898126276
2022-03-30 19:59:32.613245 Epoch 7, Training loss 1.3056890267850187
2022-03-30 19:59:50.221532 Epoch 8, Training loss 1.2642026870604366
2022-03-30 20:00:07.791012 Epoch 9, Training loss 1.2260863840427545
2022-03-30 20:00:25.309662 Epoch 10, Training loss 1.1931788312352223
2022-03-30 20:00:42.751348 Epoch 11, Training loss 1.1648307405316922
2022-03-30 20:01:00.266555 Epoch 12, Training loss 1.140716722859141
2022-03-30 20:01:17.825681 Epoch 13, Training loss 1.1203773072765917
2022-03-30 20:01:35.349738 Epoch 14, Training loss 1.1004196355867264
2022-03-30 20:01:53.096633 Epoch 

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)


Accuracy train: 0.81
Accuracy val: 0.61


In [None]:
########Problem 2###########

In [None]:
########Part 1 #############

In [9]:
class NetRes(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3,
                               padding=1)
        self.conv3 = nn.Conv2d(n_chans1 // 2, n_chans1 // 2,
                               kernel_size=3, padding=1)
        self.fc1 = nn.Linear(4 * 4 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out1 = out
        out = F.max_pool2d(torch.relu(self.conv3(out)) + out1, 2)
        out = out.view(-1, 4 * 4 * self.n_chans1 // 2)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)
model = NetRes(n_chans1=32).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-30 21:25:09.895866 Epoch 1, Training loss 2.064734585297382
2022-03-30 21:25:30.762045 Epoch 2, Training loss 1.6738504064662376
2022-03-30 21:25:51.572340 Epoch 3, Training loss 1.512226495596454
2022-03-30 21:26:12.514036 Epoch 4, Training loss 1.4071405790650937
2022-03-30 21:26:33.275505 Epoch 5, Training loss 1.328491179991866
2022-03-30 21:26:54.151956 Epoch 6, Training loss 1.262372285737406
2022-03-30 21:27:15.062363 Epoch 7, Training loss 1.2083584762290311
2022-03-30 21:27:35.931698 Epoch 8, Training loss 1.1575619627142806
2022-03-30 21:27:56.713547 Epoch 9, Training loss 1.115993541905947
2022-03-30 21:28:17.446053 Epoch 10, Training loss 1.0787508140896898
2022-03-30 21:28:38.167031 Epoch 11, Training loss 1.0478703122004829
2022-03-30 21:28:58.980263 Epoch 12, Training loss 1.0177687699989895
2022-03-30 21:29:19.764741 Epoch 13, Training loss 0.9897192240980885
2022-03-30 21:29:40.686425 Epoch 14, Training loss 0.967040217212399
2022-03-30 21:30:01.404424 Epoch 15

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.87
Accuracy val: 0.70


In [None]:
############## Part B ############

In [10]:
def training_loop_l2reg(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs+1):
      loss_train = 0.0
      for imgs, labels in train_loader:
          imgs = imgs.to(device=device)
          labels = labels.to(device=device)
          outputs = model(imgs)
          loss = loss_fn(outputs, labels)

          l2_lambda = 0.001
          l2_norm = sum(p.pow(2.0).sum()
                  for p in model.parameters())
          loss = loss + l2_lambda * l2_norm

          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

          loss_train += loss.item()
      if epoch == 1 or epoch % 10 == 0:
          print('{} Epoch {}, Training loss {}'.format(
              datetime.datetime.now(), epoch,
              loss_train / len(train_loader)))

In [11]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)
model = NetRes(n_chans1=32).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop_l2reg(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-30 23:21:47.844961 Epoch 1, Training loss 2.142087420539173
2022-03-30 23:23:29.516536 Epoch 10, Training loss 1.1538939296894366
2022-03-30 23:25:22.676706 Epoch 20, Training loss 0.9667551664592665
2022-03-30 23:27:14.761352 Epoch 30, Training loss 0.8843781549454955
2022-03-30 23:29:06.870703 Epoch 40, Training loss 0.837581584025222
2022-03-30 23:30:58.987060 Epoch 50, Training loss 0.80340772989156
2022-03-30 23:32:50.664595 Epoch 60, Training loss 0.7811093477489394
2022-03-30 23:34:42.870576 Epoch 70, Training loss 0.7634496121760219
2022-03-30 23:36:34.860892 Epoch 80, Training loss 0.7529852288534574
2022-03-30 23:38:26.240298 Epoch 90, Training loss 0.7438377430829246
2022-03-30 23:40:17.776408 Epoch 100, Training loss 0.7361932923193173
2022-03-30 23:42:09.253360 Epoch 110, Training loss 0.7312463601227002
2022-03-30 23:44:00.375576 Epoch 120, Training loss 0.7255655428027863
2022-03-30 23:45:51.811823 Epoch 130, Training loss 0.7176811651271933
2022-03-30 23:47:43.2

In [12]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.66
Accuracy val: 0.61


In [13]:
class NetDropout(nn.Module):
  def __init__(self, n_chans1=32):
      super().__init__()
      self.n_chans1 = n_chans1
      self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
      self.conv1_dropout = nn.Dropout2d(p=0.3)
      self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, padding=1)
      self.conv2_dropout = nn.Dropout2d(p=0.3)
      self.fc1 = nn.Linear(8*8*n_chans1 // 2, 32)
      self.fc2 = nn.Linear(32,2)

  def forward(self, x):
      out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
      out = self.conv1_dropout(out)
      out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
      out = self.conv2_dropout(out)
      out = out.view(-1, 8*8*self.n_chans1 // 2)
      out = torch.tanh(self.fc1(out))
      out = self.fc2(out)
      return out

In [14]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)
model = NetRes(n_chans1=32).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-31 00:22:18.322547 Epoch 1, Training loss 2.0831853264128157
2022-03-31 00:22:29.251249 Epoch 2, Training loss 1.666690823672068
2022-03-31 00:22:40.198290 Epoch 3, Training loss 1.4858769315587894
2022-03-31 00:22:51.039498 Epoch 4, Training loss 1.3841847990022595
2022-03-31 00:23:02.083261 Epoch 5, Training loss 1.3068497464479998
2022-03-31 00:23:13.026683 Epoch 6, Training loss 1.2475846812243352
2022-03-31 00:23:23.868023 Epoch 7, Training loss 1.1911033019232933
2022-03-31 00:23:34.770645 Epoch 8, Training loss 1.140045927461151
2022-03-31 00:23:45.639474 Epoch 9, Training loss 1.0972352738270674
2022-03-31 00:23:56.486347 Epoch 10, Training loss 1.060932628562688
2022-03-31 00:24:07.450286 Epoch 11, Training loss 1.0286064064868576
2022-03-31 00:24:18.313123 Epoch 12, Training loss 1.0009670899347272
2022-03-31 00:24:29.295649 Epoch 13, Training loss 0.9783376996474498
2022-03-31 00:24:40.274626 Epoch 14, Training loss 0.9572183841939472
2022-03-31 00:24:51.038089 Epoch

In [15]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.84
Accuracy val: 0.69


In [16]:
class NetBatchNorm(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_batchnorm = nn.BatchNorm2d(num_features=n_chans1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, padding=1)
        self.conv2_batchnorm = nn.BatchNorm2d(num_features=n_chans1 // 2)
        self.fc1 = nn.Linear(8*8*n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32,2)

    def forward(self, x):
        out = self.conv1_batchnorm(self.conv1(x))
        out = F.max_pool2d(torch.tanh(out), 2)
        out = self.conv2_batchnorm(self.conv2(out))
        out = F.max_pool2d(torch.tanh(out), 2)
        out = out.view(-1, 8*8*self.n_chans1 //2)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [17]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)
model = NetRes(n_chans1=32).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-31 01:15:47.432040 Epoch 1, Training loss 2.127211928672498
2022-03-31 01:15:57.936351 Epoch 2, Training loss 1.743445701763758
2022-03-31 01:16:08.470747 Epoch 3, Training loss 1.5428482315424459
2022-03-31 01:16:19.086349 Epoch 4, Training loss 1.4376540674882776
2022-03-31 01:16:29.596830 Epoch 5, Training loss 1.3556002857130203
2022-03-31 01:16:39.995035 Epoch 6, Training loss 1.2874773601284417
2022-03-31 01:16:50.369154 Epoch 7, Training loss 1.2281443213715273
2022-03-31 01:17:00.887716 Epoch 8, Training loss 1.1787294968772117
2022-03-31 01:17:11.287878 Epoch 9, Training loss 1.1350381022219158
2022-03-31 01:17:21.653178 Epoch 10, Training loss 1.0955347188598359
2022-03-31 01:17:32.104907 Epoch 11, Training loss 1.0598252476633663
2022-03-31 01:17:42.505809 Epoch 12, Training loss 1.02748018297393
2022-03-31 01:17:53.037361 Epoch 13, Training loss 1.0022700105786628
2022-03-31 01:18:03.566055 Epoch 14, Training loss 0.9815815787028779
2022-03-31 01:18:13.902169 Epoch 

In [18]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.83
Accuracy val: 0.69
