In [None]:
# from google.colab import drive
# drive.mount('/content/drive')
# file_path = "drive/MyDrive/ECGR 4106/HW_3/"

Problem 1

In [1]:
import torch
import numpy as np 
import pandas as pd 
import torch.optim as optim
import torch.nn as nn
import collections
import torch.nn.functional as F


class_names = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

from torchvision import datasets, transforms
data_path = '/data/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting /data/cifar-10-python.tar.gz to /data/
Files already downloaded and verified


In [2]:
cifar10_train = [(img, label) for img, label in cifar10]
cifar10_test = [(img, label) for img, label in cifar10_val]

In [3]:
device = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda


In [4]:
class NetWidth(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 16, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(16 * 32 * 32, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = torch.tanh(self.conv1(x))
        out = torch.tanh(self.conv2(out))
        out = out.view(-1, 16 * 32 * 32)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out
        
class NetWidth2(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(16 * 4 * 4, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.tanh(self.conv1(x)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = F.max_pool2d(torch.tanh(self.conv2(out)), 2)
        out = out.view(-1, 16 * 4 * 4)
        out = torch.tanh(self.fc1(out))
        out = self.fc2(out)
        return out

In [5]:
train_loader = torch.utils.data.DataLoader(cifar10_train, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_test, batch_size=64,
                                         shuffle=False)
all_acc_dict = collections.OrderedDict()

def validate(model, train_loader, val_loader):
    accdict = {}
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <1>
                total += labels.shape[0]
                correct += int((predicted == labels).sum())

        print("Accuracy {}: {:.2f}".format(name , correct / total))
        accdict[name] = correct / total
    return accdict


In [6]:
import datetime

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))
            validate(model, train_loader, val_loader)

In [None]:
model = NetWidth().to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

validate(model, train_loader, val_loader)


2022-03-20 00:08:48.179095 Epoch 1, Training loss 1.823598206805451
Accuracy train: 0.42
Accuracy val: 0.42
2022-03-20 00:10:17.148176 Epoch 10, Training loss 1.0616125284558366
Accuracy train: 0.62
Accuracy val: 0.56
2022-03-20 00:11:56.026092 Epoch 20, Training loss 0.648580199007488
Accuracy train: 0.75
Accuracy val: 0.59
2022-03-20 00:13:34.694001 Epoch 30, Training loss 0.34556875762808353
Accuracy train: 0.84
Accuracy val: 0.58
2022-03-20 00:15:13.587998 Epoch 40, Training loss 0.1712465749391357
Accuracy train: 0.91
Accuracy val: 0.57
2022-03-20 00:16:52.440839 Epoch 50, Training loss 0.09240160246505914
Accuracy train: 0.95
Accuracy val: 0.57
2022-03-20 00:18:31.254326 Epoch 60, Training loss 0.055751043345655324
Accuracy train: 0.98
Accuracy val: 0.57
2022-03-20 00:20:10.507786 Epoch 70, Training loss 0.037175594836168585
Accuracy train: 0.99
Accuracy val: 0.57
2022-03-20 00:21:48.968865 Epoch 80, Training loss 0.02698798025446132
Accuracy train: 0.99
Accuracy val: 0.57
2022-0

{'train': 0.99994, 'val': 0.5708}

In [None]:
model2 = NetWidth2().to(device=device)
optimizer = optim.SGD(model2.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model2,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

validate(model2, train_loader, val_loader)

2022-03-20 00:58:15.778759 Epoch 1, Training loss 1.9827871834835433
Accuracy train: 0.39
Accuracy val: 0.39
2022-03-20 00:59:15.796367 Epoch 10, Training loss 1.067048784945627
Accuracy train: 0.62
Accuracy val: 0.60
2022-03-20 01:00:22.168403 Epoch 20, Training loss 0.8343745812278269
Accuracy train: 0.70
Accuracy val: 0.66
2022-03-20 01:01:28.384854 Epoch 30, Training loss 0.7350310712595425
Accuracy train: 0.74
Accuracy val: 0.69
2022-03-20 01:02:34.475215 Epoch 40, Training loss 0.6636746202588386
Accuracy train: 0.77
Accuracy val: 0.69
2022-03-20 01:03:40.683420 Epoch 50, Training loss 0.6044617405023112
Accuracy train: 0.79
Accuracy val: 0.69
2022-03-20 01:04:47.125925 Epoch 60, Training loss 0.5531179899221186
Accuracy train: 0.81
Accuracy val: 0.68
2022-03-20 01:05:53.133607 Epoch 70, Training loss 0.5068753277287459
Accuracy train: 0.82
Accuracy val: 0.68
2022-03-20 01:06:58.821882 Epoch 80, Training loss 0.4645010269701938
Accuracy train: 0.83
Accuracy val: 0.67
2022-03-20 0

{'train': 0.90468, 'val': 0.6211}

Problem 2

In [7]:
class ResBlock(nn.Module):
    def __init__(self, n_chans):
        super(ResBlock, self).__init__()
        self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3,
                              padding=1, bias=False)  # <1>
        self.batch_norm = nn.BatchNorm2d(num_features=n_chans)
        torch.nn.init.kaiming_normal_(self.conv.weight,
                                      nonlinearity='relu')  # <2>
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)

    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x

In [8]:
class NetResDeep(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

class NetResDeepDropout(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        self.dropout2d = nn.Dropout2d(p=0.3)
        self.dropout = nn.Dropout(p=0.3)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.dropout2d(out)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = self.dropout2d(out)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.dropout(out)
        out = self.fc2(out)
        return out

class NetResDeepBatchNorm(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_norm = nn.BatchNorm2d(n_chans1)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc1_norm = nn.BatchNorm1d(32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1_norm(self.conv1(x))), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1_norm(self.fc1(out)))
        out = self.fc2(out)
        return out

In [None]:
model = NetResDeep(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 200,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
# validate(model, train_loader, val_loader)

2022-03-22 00:12:53.464905 Epoch 1, Training loss 1.7507648257648243
Accuracy train: 0.41
Accuracy val: 0.40
2022-03-22 00:16:04.149449 Epoch 10, Training loss 0.8832015352480856
Accuracy train: 0.68
Accuracy val: 0.64
2022-03-22 00:19:35.255654 Epoch 20, Training loss 0.6757848311949264
Accuracy train: 0.76
Accuracy val: 0.67
2022-03-22 00:23:06.479471 Epoch 30, Training loss 0.5441320890851338
Accuracy train: 0.80
Accuracy val: 0.67
2022-03-22 00:26:37.489273 Epoch 40, Training loss 0.43137307158287835
Accuracy train: 0.82
Accuracy val: 0.66
2022-03-22 00:30:08.721498 Epoch 50, Training loss 0.3384645460054393
Accuracy train: 0.85
Accuracy val: 0.66
2022-03-22 00:33:39.842219 Epoch 60, Training loss 0.27541364037700933
Accuracy train: 0.85
Accuracy val: 0.65
2022-03-22 00:37:11.128845 Epoch 70, Training loss 0.2361571470042095
Accuracy train: 0.87
Accuracy val: 0.65
2022-03-22 00:40:42.243754 Epoch 80, Training loss 0.18915058438287444
Accuracy train: 0.88
Accuracy val: 0.64
2022-03-

In [None]:
model2 = NetResDeep(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model2.parameters(), lr=3e-3,  weight_decay=1e-4)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 200,
    optimizer = optimizer,
    model = model2,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
# validate(model2, train_loader, val_loader)

2022-03-22 15:30:25.713659 Epoch 1, Training loss 1.736941417007495
Accuracy train: 0.34
Accuracy val: 0.34
2022-03-22 15:33:34.052711 Epoch 10, Training loss 0.9380189663613848
Accuracy train: 0.65
Accuracy val: 0.61
2022-03-22 15:37:02.250452 Epoch 20, Training loss 0.7215486439826239
Accuracy train: 0.74
Accuracy val: 0.66
2022-03-22 15:40:30.371824 Epoch 30, Training loss 0.5860441800044931
Accuracy train: 0.78
Accuracy val: 0.66
2022-03-22 15:43:58.280817 Epoch 40, Training loss 0.471880220543698
Accuracy train: 0.81
Accuracy val: 0.66
2022-03-22 15:47:26.161063 Epoch 50, Training loss 0.37581508036922007
Accuracy train: 0.83
Accuracy val: 0.64
2022-03-22 15:50:54.053254 Epoch 60, Training loss 0.30170636885153973
Accuracy train: 0.84
Accuracy val: 0.64
2022-03-22 15:54:21.894902 Epoch 70, Training loss 0.24483407838532076
Accuracy train: 0.84
Accuracy val: 0.63
2022-03-22 15:57:49.754775 Epoch 80, Training loss 0.1965501949791332
Accuracy train: 0.81
Accuracy val: 0.61
2022-03-22

In [9]:
model3 = NetResDeepDropout(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model3.parameters(), lr=9e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model3,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
# validate(model3, train_loader, val_loader)

2022-03-22 22:54:50.094471 Epoch 1, Training loss 2.144991959604766
Accuracy train: 0.24
Accuracy val: 0.24
2022-03-22 22:58:01.489169 Epoch 10, Training loss 1.4980751986393843
Accuracy train: 0.43
Accuracy val: 0.43
2022-03-22 23:01:33.987559 Epoch 20, Training loss 1.3424742673059253
Accuracy train: 0.52
Accuracy val: 0.50
2022-03-22 23:05:06.428735 Epoch 30, Training loss 1.2730646330072446
Accuracy train: 0.53
Accuracy val: 0.52
2022-03-22 23:08:38.710578 Epoch 40, Training loss 1.2343927220920163
Accuracy train: 0.55
Accuracy val: 0.52
2022-03-22 23:12:10.760015 Epoch 50, Training loss 1.1940795218243319
Accuracy train: 0.57
Accuracy val: 0.55
2022-03-22 23:15:42.983597 Epoch 60, Training loss 1.1613170257614702
Accuracy train: 0.57
Accuracy val: 0.55
2022-03-22 23:19:15.193591 Epoch 70, Training loss 1.1515594824500706
Accuracy train: 0.58
Accuracy val: 0.55
2022-03-22 23:22:47.534858 Epoch 80, Training loss 1.1194788520141026
Accuracy train: 0.59
Accuracy val: 0.56
2022-03-22 2

In [None]:
model4 = NetResDeepBatchNorm(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model4.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 200,
    optimizer = optimizer,
    model = model4,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
# validate(model4, train_loader, val_loader)

2022-03-22 01:23:20.041551 Epoch 1, Training loss 1.8864518004610105
Accuracy train: 0.45
Accuracy val: 0.45
2022-03-22 01:26:38.942740 Epoch 10, Training loss 0.9291007843468805
Accuracy train: 0.70
Accuracy val: 0.64
2022-03-22 01:30:23.637627 Epoch 20, Training loss 0.6740351446982845
Accuracy train: 0.79
Accuracy val: 0.66
2022-03-22 01:34:04.159802 Epoch 30, Training loss 0.521326186330727
Accuracy train: 0.84
Accuracy val: 0.66
2022-03-22 01:37:48.669873 Epoch 40, Training loss 0.4083817498782254
Accuracy train: 0.87
Accuracy val: 0.64
2022-03-22 01:41:29.185785 Epoch 50, Training loss 0.31791943403156214
Accuracy train: 0.89
Accuracy val: 0.63
2022-03-22 01:45:13.897219 Epoch 60, Training loss 0.2436674494591668
Accuracy train: 0.91
Accuracy val: 0.63
2022-03-22 01:48:54.429783 Epoch 70, Training loss 0.1840492696941966
Accuracy train: 0.93
Accuracy val: 0.62
2022-03-22 01:52:39.256938 Epoch 80, Training loss 0.13698897474562116
Accuracy train: 0.94
Accuracy val: 0.62
2022-03-22