In [1]:
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x7fb28ca14910>

In [2]:
class_names = [
    'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
    'ship', 'truck'
]


In [3]:
from torchvision import datasets, transforms

data_path = './'
cifar = datasets.CIFAR10(data_path,
                         train=True,
                         download=True,
                         transform=transforms.Compose([
                             transforms.ToTensor(),
                             transforms.Normalize((0.4915, 0.4823, 0.4468),
                                                  (0.2470, 0.2435, 0.2616))
                         ]))


Files already downloaded and verified


In [4]:
cifar_val = datasets.CIFAR10(data_path,
                             train=False,
                             download=True,
                             transform=transforms.Compose([
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.4915, 0.4823, 0.4468),
                                                      (0.2470, 0.2435, 0.2616))
                             ]))


Files already downloaded and verified


In [5]:
label_map = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}
class_names = [
    'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
    'ship', 'truck'
]
cifar10 = [(img, label_map[label]) for img, label in cifar]
cifar10_val = [(img, label_map[label]) for img, label in cifar_val]

In [6]:
# defining training device to be GPU if available
device = (torch.device('cuda')
          if torch.cuda.is_available() else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda.


In [7]:
class Net(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8)  # <1>
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [8]:
import datetime


def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [9]:
train_loader = torch.utils.data.DataLoader(cifar10,
                                           batch_size=64,
                                           shuffle=True)

model = Net()  # <1>
model.to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs=300,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    train_loader=train_loader,
)

2022-03-28 12:42:00.734665 Epoch 1, Training loss 2.0272156626672087
2022-03-28 12:42:32.193694 Epoch 10, Training loss 1.157125376145858
2022-03-28 12:43:07.705250 Epoch 20, Training loss 0.9849611330215279
2022-03-28 12:43:42.943194 Epoch 30, Training loss 0.9046245566414445
2022-03-28 12:44:18.849933 Epoch 40, Training loss 0.8509574451714831
2022-03-28 12:44:56.082982 Epoch 50, Training loss 0.8095788680531485
2022-03-28 12:45:33.139916 Epoch 60, Training loss 0.7747540147331975
2022-03-28 12:46:05.542413 Epoch 70, Training loss 0.7477211259743747
2022-03-28 12:46:38.476941 Epoch 80, Training loss 0.7259142484582598
2022-03-28 12:47:16.956521 Epoch 90, Training loss 0.7057621136803152
2022-03-28 12:47:51.604055 Epoch 100, Training loss 0.683885840808644
2022-03-28 12:48:25.457080 Epoch 110, Training loss 0.669882084326366
2022-03-28 12:48:59.248704 Epoch 120, Training loss 0.6544880609759285
2022-03-28 12:49:33.843939 Epoch 130, Training loss 0.6395354754936969
2022-03-28 12:50:06.

In [10]:
train_loader = torch.utils.data.DataLoader(cifar10,
                                           batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val,
                                         batch_size=64,
                                         shuffle=False)


def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                imgs = imgs.to(device=device)  # <1>
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1)  # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name, correct / total))


validate(model, train_loader, val_loader)

Accuracy train: 0.81
Accuracy val: 0.62


In [11]:
torch.save(model.state_dict(), data_path + 'cifar10_cnn.pt')

In [12]:
loaded_model = Net()  # <1>
loaded_model.load_state_dict(torch.load(data_path + 'cifar10_cnn.pt'))


<All keys matched successfully>

In [13]:
class Net2(nn.Module):

    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1,
                               n_chans1 // 2,
                               kernel_size=3,
                               padding=1)
        self.conv3 = nn.Conv2d(n_chans1 // 2,
                               n_chans1 // 2,
                               kernel_size=3,
                               padding=1)
        self.fc1 = nn.Linear(4 * 4 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out = F.max_pool2d(torch.relu(self.conv3(out)), 2)
        out = out.view(-1, 4 * 4 * self.n_chans1 // 2)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [14]:
train_loader = torch.utils.data.DataLoader(cifar10,
                                           batch_size=64,
                                           shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar10_val,
                                         batch_size=64,
                                         shuffle=False)
all_acc_dict = collections.OrderedDict()

model = Net2(n_chans1=32)  # <1>
model.to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs=300,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    train_loader=train_loader,
)


def validate(model, train_loader, val_loader):
    accdict = {}
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1)  # <1>
                total += labels.shape[0]
                correct += int((predicted == labels).sum())

        print("Accuracy {}: {:.2f}".format(name, correct / total))
        accdict[name] = correct / total
    return accdict


all_acc_dict["depth"] = validate(model, train_loader, val_loader)

2022-03-28 12:58:17.893638 Epoch 1, Training loss 2.2579623297657196
2022-03-28 12:58:52.081774 Epoch 10, Training loss 1.1792650334822856
2022-03-28 12:59:30.364551 Epoch 20, Training loss 0.9259592535550637
2022-03-28 13:00:08.422976 Epoch 30, Training loss 0.8019527055113517
2022-03-28 13:00:46.621873 Epoch 40, Training loss 0.7234853408906771
2022-03-28 13:01:24.661155 Epoch 50, Training loss 0.6681999755103875
2022-03-28 13:02:02.484709 Epoch 60, Training loss 0.6303327808828305
2022-03-28 13:02:41.330534 Epoch 70, Training loss 0.599074250063323
2022-03-28 13:03:19.021301 Epoch 80, Training loss 0.5728257916620015
2022-03-28 13:03:56.914382 Epoch 90, Training loss 0.552716749917973
2022-03-28 13:04:35.650234 Epoch 100, Training loss 0.5328375514968277
2022-03-28 13:05:13.365527 Epoch 110, Training loss 0.5137770429939565
2022-03-28 13:05:51.451834 Epoch 120, Training loss 0.5001735438585586
2022-03-28 13:06:29.507044 Epoch 130, Training loss 0.48676098947939667
2022-03-28 13:07:0

In [15]:
torch.save(model.state_dict(), data_path + 'cifar10_cnn_2.pt')

loaded_model = Net2()  # <1>
loaded_model.load_state_dict(torch.load(data_path + 'cifar10_cnn_2.pt'))


<All keys matched successfully>

In [16]:
class ResBlock(nn.Module):

    def __init__(self, n_chans):
        super(ResBlock, self).__init__()
        self.conv = nn.Conv2d(n_chans,
                              n_chans,
                              kernel_size=3,
                              padding=1,
                              bias=False)  # <1>
        self.batch_norm = nn.BatchNorm2d(num_features=n_chans)
        torch.nn.init.kaiming_normal_(self.conv.weight,
                                      nonlinearity='relu')  # <2>
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)

    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x

In [17]:
class NetResDeep(nn.Module):

    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(*(n_blocks *
                                         [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [18]:
model = NetResDeep(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs=300,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    train_loader=train_loader,
)

all_acc_dict["res deep"] = validate(model, train_loader, val_loader)

2022-03-28 13:17:31.103375 Epoch 1, Training loss 1.6537252411513073
2022-03-28 13:19:10.543659 Epoch 10, Training loss 0.8952865375734657
2022-03-28 13:21:00.345388 Epoch 20, Training loss 0.7029523789272893
2022-03-28 13:22:50.262821 Epoch 30, Training loss 0.5760862308999767
2022-03-28 13:24:39.073742 Epoch 40, Training loss 0.47129336470152106
2022-03-28 13:26:27.061611 Epoch 50, Training loss 0.3821954912579883
2022-03-28 13:28:15.270538 Epoch 60, Training loss 0.3160720006908145
2022-03-28 13:30:06.502863 Epoch 70, Training loss 0.2563716293410267
2022-03-28 13:31:58.869709 Epoch 80, Training loss 0.21213952586283463
2022-03-28 13:33:51.045559 Epoch 90, Training loss 0.15849795307764006
2022-03-28 13:35:42.659678 Epoch 100, Training loss 0.13147679809719096
2022-03-28 13:37:34.322841 Epoch 110, Training loss 0.11758544699877234
2022-03-28 13:39:22.291453 Epoch 120, Training loss 0.10295337021989209
2022-03-28 13:41:11.840542 Epoch 130, Training loss 0.07277637570405669
2022-03-28

In [19]:
def training_loop_l2reg(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            l2_lambda = 0.001
            l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())  # <1>
            loss = loss + l2_lambda * l2_norm

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()
        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))


In [20]:
model = NetResDeep(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop_l2reg(
    n_epochs=300,
    optimizer=optimizer,
    model=model,
    loss_fn=loss_fn,
    train_loader=train_loader,
)
all_acc_dict["Resnet l2 reg"] = validate(model, train_loader, val_loader)

2022-03-28 14:12:34.924001 Epoch 1, Training loss 1.8529443972555877
2022-03-28 14:14:24.901975 Epoch 10, Training loss 1.0290287321485827
2022-03-28 14:16:27.023223 Epoch 20, Training loss 0.8332347395017629
2022-03-28 14:18:27.747870 Epoch 30, Training loss 0.7107355670855783
2022-03-28 14:20:28.783867 Epoch 40, Training loss 0.617741543580504
2022-03-28 14:22:27.785151 Epoch 50, Training loss 0.5388303382317429
2022-03-28 14:24:29.200062 Epoch 60, Training loss 0.478221152673292
2022-03-28 14:26:30.370353 Epoch 70, Training loss 0.4247900950901039
2022-03-28 14:28:32.745434 Epoch 80, Training loss 0.37776153782368316
2022-03-28 14:30:33.984261 Epoch 90, Training loss 0.3467440060947252
2022-03-28 14:32:35.735117 Epoch 100, Training loss 0.31754724077327784
2022-03-28 14:34:37.905049 Epoch 110, Training loss 0.28845322587529715
2022-03-28 14:36:40.149228 Epoch 120, Training loss 0.27854507338360446
2022-03-28 14:38:44.471415 Epoch 130, Training loss 0.24163195132599463
2022-03-28 14:

In [21]:
class NetResDeepDropout(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_dropout = nn.Dropout2d(p=0.3)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.conv1_dropout(out)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [22]:
model = NetResDeepDropout(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
all_acc_dict["NetResDeepdropout"] = validate(model, train_loader, val_loader)

2022-03-28 15:26:56.322412 Epoch 1, Training loss 1.8857117900458138
2022-03-28 15:28:39.937823 Epoch 10, Training loss 1.183483033991226
2022-03-28 15:30:33.458388 Epoch 20, Training loss 1.0019492818723859
2022-03-28 15:32:27.560152 Epoch 30, Training loss 0.9012617848413375
2022-03-28 15:34:20.417570 Epoch 40, Training loss 0.8300152822299991
2022-03-28 15:36:11.089144 Epoch 50, Training loss 0.772568424873035
2022-03-28 15:38:02.729024 Epoch 60, Training loss 0.721823562098586
2022-03-28 15:39:58.950338 Epoch 70, Training loss 0.6828312092577405
2022-03-28 15:41:53.166647 Epoch 80, Training loss 0.638541258738169
2022-03-28 15:43:47.100118 Epoch 90, Training loss 0.608074029273999
2022-03-28 15:45:41.468615 Epoch 100, Training loss 0.5768273579876136
2022-03-28 15:47:32.291132 Epoch 110, Training loss 0.5515036447845456
2022-03-28 15:49:22.896113 Epoch 120, Training loss 0.5282897266661725
2022-03-28 15:51:14.614294 Epoch 130, Training loss 0.50715550107648
2022-03-28 15:53:04.3005

In [25]:
# class NetBatchNorm(nn.Module):
#     def __init__(self, n_chans1=32):
#         super().__init__()
#         self.n_chans1 = n_chans1
#         self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
#         self.conv1_batchnorm = nn.BatchNorm2d(num_features=n_chans1)
#         self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3, 
#                                padding=1)
#         self.conv2_batchnorm = nn.BatchNorm2d(num_features=n_chans1 // 2)
#         self.fc1 = nn.Linear(8 * 8 * n_chans1 // 2, 32)
#         self.fc2 = nn.Linear(32, 2)
        
#     def forward(self, x):
#         out = self.conv1_batchnorm(self.conv1(x))
#         out = F.max_pool2d(torch.tanh(out), 2)
#         out = self.conv2_batchnorm(self.conv2(out))
#         out = F.max_pool2d(torch.tanh(out), 2)
#         out = out.view(-1, 8 * 8 * self.n_chans1 // 2)
#         out = torch.tanh(self.fc1(out))
#         out = self.fc2(out)
#         return out
    
    
class NetResDeepBatchNorm(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv1_batchnorm = nn.BatchNorm2d(num_features=n_chans1)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1_batchnorm(self.conv1(x))),2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [26]:
model = NetResDeepBatchNorm(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
all_acc_dict["NetResDeepBatchNorm"] = validate(model, train_loader, val_loader)

2022-03-28 16:45:18.085264 Epoch 1, Training loss 1.631086176161266
2022-03-28 16:47:01.447299 Epoch 10, Training loss 0.8519168049859269
2022-03-28 16:48:57.264420 Epoch 20, Training loss 0.6804067243624221
2022-03-28 16:50:53.971188 Epoch 30, Training loss 0.5628214233824055
2022-03-28 16:52:49.838432 Epoch 40, Training loss 0.46462158471955667
2022-03-28 16:54:44.697111 Epoch 50, Training loss 0.3873430900561535
2022-03-28 16:56:42.383493 Epoch 60, Training loss 0.3126164870074643
2022-03-28 16:58:39.234902 Epoch 70, Training loss 0.24821490480009553
2022-03-28 17:00:37.020036 Epoch 80, Training loss 0.2034750313681486
2022-03-28 17:02:34.119584 Epoch 90, Training loss 0.1735704119967492
2022-03-28 17:04:29.142659 Epoch 100, Training loss 0.12463425856340876
2022-03-28 17:06:25.418819 Epoch 110, Training loss 0.10340226514984274
2022-03-28 17:08:23.074665 Epoch 120, Training loss 0.09345086091769683
2022-03-28 17:10:19.910265 Epoch 130, Training loss 0.08333995694632801
2022-03-28 1