In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x7f8bba4faf90>

In [None]:
##############################################################################################################################
#########Problem 1#########
##############################################################################################################################

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [None]:
from torchvision import datasets
data_path = '../data-unversioned/p1ch7/'
cifar10 = datasets.CIFAR10(data_path, train=True, download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4915, 0.4823, 0.4468),(0.2470, 0.2435, 0.2616))]))
cifar10_val = datasets.CIFAR10(data_path, train=False, download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4915, 0.4823, 0.4468),(0.2470, 0.2435, 0.2616))]))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data-unversioned/p1ch7/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ../data-unversioned/p1ch7/cifar-10-python.tar.gz to ../data-unversioned/p1ch7/
Files already downloaded and verified


In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8) # <1>
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
import datetime
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
  for epoch in range(1, n_epochs + 1):
    loss_train = 0.0
    for imgs, labels in train_loader:
      imgs = imgs.to(device=device)
      labels = labels.to(device=device)
      outputs = model(imgs)
      loss = loss_fn(outputs, labels)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      loss_train += loss.item()
    print('{} Epoch {}, Training loss {}'.format(
    datetime.datetime.now(), epoch,
    loss_train / len(train_loader)))

In [None]:
def validate(model, train_loader, val_loader):
  for name, loader in [("train", train_loader), ("val", val_loader)]:
    correct = 0
    total = 0
    with torch.no_grad():
      for imgs, labels in loader:
          imgs, labels = imgs.to(device), labels.to(device)
          batchsize = imgs.shape[0]
          outputs = model(imgs)
          _, predicted = torch.max(outputs, dim=1)
          total += labels.shape[0]
          correct += int((predicted == labels).sum())
    print("Accuracy {}: {:.2f}".format(name , correct / total))

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)  # <1>

model = Net().to(device=device)  #  <2>
optimizer = optim.SGD(model.parameters(), lr=1e-2)  #  <3>
loss_fn = nn.CrossEntropyLoss()  #  <4>

training_loop(  # <5>
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-29 02:55:28.511502 Epoch 1, Training loss 2.098292909467312
2022-03-29 02:55:44.957787 Epoch 2, Training loss 1.785247363855162
2022-03-29 02:56:01.498808 Epoch 3, Training loss 1.6172291255363114
2022-03-29 02:56:18.106240 Epoch 4, Training loss 1.5265314772610774
2022-03-29 02:56:34.565004 Epoch 5, Training loss 1.460617965901904
2022-03-29 02:56:51.125967 Epoch 6, Training loss 1.3960864796967762
2022-03-29 02:57:07.374479 Epoch 7, Training loss 1.3315046275668132
2022-03-29 02:57:23.676630 Epoch 8, Training loss 1.2779924499866602
2022-03-29 02:57:40.115119 Epoch 9, Training loss 1.2345515409546435
2022-03-29 02:57:56.530338 Epoch 10, Training loss 1.1979004371044275
2022-03-29 02:58:13.287508 Epoch 11, Training loss 1.1683111435464582
2022-03-29 02:58:30.434091 Epoch 12, Training loss 1.1412167190895666
2022-03-29 02:58:47.213326 Epoch 13, Training loss 1.1183172301258273
2022-03-29 02:59:05.539046 Epoch 14, Training loss 1.0987535007774372
2022-03-29 02:59:23.375201 Epoch

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.79
Accuracy val: 0.60


In [None]:
##############################################################################################################################
#########Problem 1 PART B #############
##############################################################################################################################

In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.conv3 = nn.Conv2d(8, 3, kernel_size=3, padding=1)
        self.act3 = nn.Tanh()
        self.pool3 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8) # <1>
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                        shuffle=True)
model = Net().to(device=device)  #  <2>
optimizer = optim.SGD(model.parameters(), lr=1e-2)  #  <3>
loss_fn = nn.CrossEntropyLoss()  #  <4>

training_loop(  # <5>
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-29 04:44:05.192207 Epoch 1, Training loss 2.0476194139941573
2022-03-29 04:44:21.416536 Epoch 2, Training loss 1.7932819879573325
2022-03-29 04:44:37.600597 Epoch 3, Training loss 1.6093341150247227
2022-03-29 04:44:53.748188 Epoch 4, Training loss 1.504915327519712
2022-03-29 04:45:09.785505 Epoch 5, Training loss 1.4332176494171551
2022-03-29 04:45:25.983206 Epoch 6, Training loss 1.3671491766524742
2022-03-29 04:45:41.928765 Epoch 7, Training loss 1.310898762987093
2022-03-29 04:45:58.000601 Epoch 8, Training loss 1.262955849128001
2022-03-29 04:46:14.110734 Epoch 9, Training loss 1.2249980403486724
2022-03-29 04:46:30.077334 Epoch 10, Training loss 1.1931186087448578
2022-03-29 04:46:46.138664 Epoch 11, Training loss 1.1658514782290934
2022-03-29 04:47:02.169020 Epoch 12, Training loss 1.1414087233336077
2022-03-29 04:47:18.183312 Epoch 13, Training loss 1.1186294909328451
2022-03-29 04:47:34.181483 Epoch 14, Training loss 1.1012593538255033
2022-03-29 04:47:50.238129 Epoch

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.81
Accuracy val: 0.62


In [None]:
##############################################################################################################################
#########Problem 2 PART 1#########
##############################################################################################################################

In [None]:
class NetRes(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3,
                               padding=1)
        self.conv3 = nn.Conv2d(n_chans1 // 2, n_chans1 // 2,
                               kernel_size=3, padding=1)
        self.fc1 = nn.Linear(4 * 4 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out1 = out
        out = F.max_pool2d(torch.relu(self.conv3(out)) + out1, 2)
        out = out.view(-1, 4 * 4 * self.n_chans1 // 2)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)
model = NetRes(n_chans1=32).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-30 14:56:48.653678 Epoch 1, Training loss 2.118773611915081
2022-03-30 14:56:59.732190 Epoch 2, Training loss 1.7503244863141834
2022-03-30 14:57:10.799833 Epoch 3, Training loss 1.5545217981728752
2022-03-30 14:57:21.884237 Epoch 4, Training loss 1.4413532382996797
2022-03-30 14:57:32.987184 Epoch 5, Training loss 1.3589966778864946
2022-03-30 14:57:43.981476 Epoch 6, Training loss 1.2896170318126678
2022-03-30 14:57:55.056163 Epoch 7, Training loss 1.2293713762022345
2022-03-30 14:58:06.258686 Epoch 8, Training loss 1.180406075151985
2022-03-30 14:58:17.535626 Epoch 9, Training loss 1.1376883311344839
2022-03-30 14:58:28.579210 Epoch 10, Training loss 1.0984396485568921
2022-03-30 14:58:39.589001 Epoch 11, Training loss 1.0637750482315298
2022-03-30 14:58:50.588554 Epoch 12, Training loss 1.034006581205846
2022-03-30 14:59:01.586133 Epoch 13, Training loss 1.00559051735017
2022-03-30 14:59:12.436030 Epoch 14, Training loss 0.9804429912658603
2022-03-30 14:59:23.493625 Epoch 1

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.76
Accuracy val: 0.64


In [None]:
##############################################################################################################################
#########Problem 2 PART 2######### 1/3 (BATCH NORMALIZATION)
##############################################################################################################################

In [None]:
class NetRes(nn.Module):
    def __init__(self, n_chans1=32):
          super(NetRes, self).__init__()
          self.conv = nn.Conv2d(n_chans1, n_chans1, 
            kernel_size=3, padding=1, bias=False)
          self.batch_norm = nn.BatchNorm2d(num_features=
                                     n_chans1)
          torch.nn.init.kaiming_normal_(self.conv.weight, 
                            nonlinearity='relu')
          torch.nn.init.constant_(self.batch_norm.weight, 
                            0.5)
          torch.nn.init.zeros_(self.batch_norm.bias)
        
    def forward(self, x):
     out = self.conv(x)
     out = torch.relu(out)
     return out + x

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, 
                        batch_size=64, shuffle=True)

model = Net().to('cuda:0')
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs =300,
    optimizer= optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
    
)

2022-03-30 22:43:00.080416 Epoch 1, Training loss 2.0433139910783304
2022-03-30 22:43:09.056253 Epoch 2, Training loss 1.7821431681323234
2022-03-30 22:43:18.483246 Epoch 3, Training loss 1.6077832204606526
2022-03-30 22:43:27.734464 Epoch 4, Training loss 1.5257196385232383
2022-03-30 22:43:37.226082 Epoch 5, Training loss 1.4694439521073686
2022-03-30 22:43:46.696314 Epoch 6, Training loss 1.4247727144099867
2022-03-30 22:43:56.168772 Epoch 7, Training loss 1.385382987196793
2022-03-30 22:44:05.417752 Epoch 8, Training loss 1.345282492735197
2022-03-30 22:44:14.700885 Epoch 9, Training loss 1.3018881581780855
2022-03-30 22:44:23.814807 Epoch 10, Training loss 1.2568906674452145
2022-03-30 22:44:33.032870 Epoch 11, Training loss 1.2209365199441495
2022-03-30 22:44:42.461687 Epoch 12, Training loss 1.1908568870990783
2022-03-30 22:44:51.618654 Epoch 13, Training loss 1.1631897738979906
2022-03-30 22:45:00.702190 Epoch 14, Training loss 1.1378452889907085
2022-03-30 22:45:10.113566 Epoc

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.81
Accuracy val: 0.61


In [None]:
##############################################################################################################################
#########Problem 2 PART 2######### 2/3 (Dropout)
##############################################################################################################################

In [None]:
class NetRes(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
      super().__init__()
      self.n_chans1 = n_chans1
      self.conv1 = nn.Sequential(
        *(n_blocks * [NetRes(n_chans1=n_chans1)]))
      self.conv1_dropout = nn.Dropout2d(p=0.3)
      self.fc1 = nn.Linear(8*8*n_chans1 // 2, 32)
      self.fc2 = nn.Linear(32,2)
        
    def forward(self, x):
      out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
      out = self.conv1_dropout(out)
      out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
      out = self.conv2_dropout(out)
      out = out.view(-1, 8*8*self.n_chans1 // 2)
      out = torch.tanh(self.fc1(out))
      out = self.fc2(out)
      return out

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, 
                        batch_size=64, shuffle=True)

model = Net().to('cuda:0')
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs =300,
    optimizer= optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
    
)

2022-03-30 21:50:34.646271 Epoch 1, Training loss 2.0466181361461846
2022-03-30 21:50:43.828393 Epoch 2, Training loss 1.7477930401597181
2022-03-30 21:50:53.122481 Epoch 3, Training loss 1.5780409339748684
2022-03-30 21:51:02.230131 Epoch 4, Training loss 1.483592438118537
2022-03-30 21:51:11.453534 Epoch 5, Training loss 1.4116625601373365
2022-03-30 21:51:21.292715 Epoch 6, Training loss 1.3526329143577829
2022-03-30 21:51:30.920935 Epoch 7, Training loss 1.304327110500287
2022-03-30 21:51:40.374649 Epoch 8, Training loss 1.263481073443542
2022-03-30 21:51:49.802405 Epoch 9, Training loss 1.22599498413103
2022-03-30 21:51:58.818066 Epoch 10, Training loss 1.197358668307819
2022-03-30 21:52:07.997774 Epoch 11, Training loss 1.1683437597873572
2022-03-30 21:52:17.749909 Epoch 12, Training loss 1.145666810405224
2022-03-30 21:52:27.297721 Epoch 13, Training loss 1.1223295064228576
2022-03-30 21:52:36.651003 Epoch 14, Training loss 1.1002734790525168
2022-03-30 21:52:46.172385 Epoch 15,

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.81
Accuracy val: 0.61


In [None]:
##############################################################################################################################
#########Problem 2 PART 3######### 3/3 (Weight Decay)
##############################################################################################################################

In [None]:
def training_loop(n_epochs, optimizer, model, loss_fn, 
                  train_loader):
  for epoch in range(1, n_epochs +1):
    loss_train = 0.0
    for imgs, labels in train_loader:
      outputs = model(imgs.to('cuda:0'))
      loss = loss_fn(outputs.to('cuda:0'), 
                     labels.to('cuda:0'))
      
      ambda = 0.001
      norm = sum(p.pow(2.0).sum()
                    for p in model.parameters())
    
      loss = loss + ambda*norm

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      loss_train += loss.item()
     
    print('{} Epoch {}, Training Loss {}'.format(datetime.datetime.now(),
                                    epoch, loss_train / len(train_loader)))

In [None]:
model = NetRes()
model.to('cuda:0')

NetRes(
  (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (batch_norm): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, 
                    batch_size=64, shuffle=True)

model = Net().to('cuda:0')
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs =300,
    optimizer= optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
    
)

2022-03-30 23:52:50.730233 Epoch 1, Training Loss 2.072790256394145
2022-03-30 23:53:00.858566 Epoch 2, Training Loss 1.7770396222543838
2022-03-30 23:53:10.865084 Epoch 3, Training Loss 1.616920314176613
2022-03-30 23:53:20.822348 Epoch 4, Training Loss 1.5275759218293992
2022-03-30 23:53:30.979104 Epoch 5, Training Loss 1.4489904951561443
2022-03-30 23:53:40.945324 Epoch 6, Training Loss 1.3861161008515321
2022-03-30 23:53:50.742502 Epoch 7, Training Loss 1.3383906567493058
2022-03-30 23:54:00.764503 Epoch 8, Training Loss 1.2991474222039323
2022-03-30 23:54:10.673615 Epoch 9, Training Loss 1.2664397182062155
2022-03-30 23:54:20.610602 Epoch 10, Training Loss 1.239240956047307
2022-03-30 23:54:30.505538 Epoch 11, Training Loss 1.2165156798746886
2022-03-30 23:54:40.277176 Epoch 12, Training Loss 1.1966671775216642
2022-03-30 23:54:50.195609 Epoch 13, Training Loss 1.1801723683124308
2022-03-30 23:55:00.284385 Epoch 14, Training Loss 1.165393960216771
2022-03-30 23:55:10.238828 Epoch 

In [None]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                          shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                        shuffle=False)
validate(model, train_loader, val_loader)

Accuracy train: 0.77
Accuracy val: 0.68
