In [17]:
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x7f67c5634910>

In [18]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

In [19]:
from torchvision import datasets, transforms
data_path = './'
cifar = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified


In [20]:
cifar_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified


In [21]:
label_map = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}
class_names = [
    'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
    'ship', 'truck'
]
cifar10 = [(img, label_map[label]) for img, label in cifar]
cifar10_val = [(img, label_map[label]) for img, label in cifar_val]

In [22]:
# defining training device to be GPU if available
device = (torch.device('cuda:0') if torch.cuda.is_available()
          else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda:0.


In [23]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.act1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
        self.act2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(8 * 8 * 8, 32)
        self.act3 = nn.Tanh()
        self.fc2 = nn.Linear(32, 10)

    def forward(self, x):
        out = self.pool1(self.act1(self.conv1(x)))
        out = self.pool2(self.act2(self.conv2(out)))
        out = out.view(-1, 8 * 8 * 8) # <1>
        out = self.act3(self.fc1(out))
        out = self.fc2(out)
        return out

In [24]:
import datetime

def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_train += loss.item()

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [25]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)

model = Net()  # <1>
model.to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

2022-03-26 14:40:03.701275 Epoch 1, Training loss 2.027220543845535
2022-03-26 14:40:31.382359 Epoch 10, Training loss 1.1571621502299443
2022-03-26 14:40:58.910190 Epoch 20, Training loss 0.9849083702582533
2022-03-26 14:41:24.144920 Epoch 30, Training loss 0.9045757724119879
2022-03-26 14:41:49.082989 Epoch 40, Training loss 0.8508908647253081
2022-03-26 14:42:19.924078 Epoch 50, Training loss 0.8095403331548662
2022-03-26 14:42:51.460231 Epoch 60, Training loss 0.7747487638460095
2022-03-26 14:43:23.016442 Epoch 70, Training loss 0.7476884175825607
2022-03-26 14:43:57.386794 Epoch 80, Training loss 0.7258681955239962
2022-03-26 14:44:24.808018 Epoch 90, Training loss 0.7056385786332133
2022-03-26 14:44:51.788576 Epoch 100, Training loss 0.683824774897312
2022-03-26 14:45:18.550083 Epoch 110, Training loss 0.6696787827536274
2022-03-26 14:45:45.399899 Epoch 120, Training loss 0.6542487403239741
2022-03-26 14:46:11.674735 Epoch 130, Training loss 0.6391846546355415
2022-03-26 14:46:37

In [26]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=False)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                         shuffle=False)

def validate(model, train_loader, val_loader):
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():  # <1>
            for imgs, labels in loader:
                imgs = imgs.to(device=device)  # <1>
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <2>
                total += labels.shape[0]  # <3>
                correct += int((predicted == labels).sum())  # <4>

        print("Accuracy {}: {:.2f}".format(name , correct / total))

validate(model, train_loader, val_loader)

Accuracy train: 0.81
Accuracy val: 0.62


In [27]:
torch.save(model.state_dict(), data_path + 'cifar10_cnn.pt')

In [28]:
loaded_model = Net()  # <1>
loaded_model.load_state_dict(torch.load(data_path
                                        + 'cifar10_cnn.pt'))

<All keys matched successfully>

In [29]:
class Net2(nn.Module):
    def __init__(self, n_chans1=32):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(n_chans1, n_chans1 // 2, kernel_size=3,
                               padding=1)
        self.conv3 = nn.Conv2d(n_chans1 // 2, n_chans1 // 2,
                               kernel_size=3, padding=1)
        self.fc1 = nn.Linear(4 * 4 * n_chans1 // 2, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = F.max_pool2d(torch.relu(self.conv2(out)), 2)
        out = F.max_pool2d(torch.relu(self.conv3(out)), 2)
        out = out.view(-1, 4 * 4 * self.n_chans1 // 2)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [30]:
train_loader = torch.utils.data.DataLoader(cifar10, batch_size=64,
                                           shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar10_val, batch_size=64,
                                         shuffle=False)
all_acc_dict = collections.OrderedDict()

model = Net2(n_chans1=32)  # <1>
model.to(device=device)
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

def validate(model, train_loader, val_loader):
    accdict = {}
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device=device)
                labels = labels.to(device=device)
                outputs = model(imgs)
                _, predicted = torch.max(outputs, dim=1) # <1>
                total += labels.shape[0]
                correct += int((predicted == labels).sum())

        print("Accuracy {}: {:.2f}".format(name , correct / total))
        accdict[name] = correct / total
    return accdict

all_acc_dict["depth"] = validate(model, train_loader, val_loader)

2022-03-26 14:54:20.071006 Epoch 1, Training loss 2.257882063955907
2022-03-26 14:54:49.831032 Epoch 10, Training loss 1.1814646738417007
2022-03-26 14:55:22.800317 Epoch 20, Training loss 0.9264471422680809
2022-03-26 14:55:55.664531 Epoch 30, Training loss 0.8000339915228012
2022-03-26 14:56:28.251022 Epoch 40, Training loss 0.7217235721057028
2022-03-26 14:57:01.280099 Epoch 50, Training loss 0.6661967449938245
2022-03-26 14:57:34.403194 Epoch 60, Training loss 0.6293703287535006
2022-03-26 14:58:07.668683 Epoch 70, Training loss 0.6000132293957273
2022-03-26 14:58:41.569885 Epoch 80, Training loss 0.5732203877109396
2022-03-26 14:59:14.704272 Epoch 90, Training loss 0.5535791661695141
2022-03-26 14:59:47.318157 Epoch 100, Training loss 0.5342110112843002
2022-03-26 15:00:20.276174 Epoch 110, Training loss 0.5179338953874605
2022-03-26 15:00:53.029716 Epoch 120, Training loss 0.5045836665250761
2022-03-26 15:01:25.382817 Epoch 130, Training loss 0.49035039004843556
2022-03-26 15:01:

In [31]:
torch.save(model.state_dict(), data_path + 'cifar10_cnn_2.pt')

loaded_model = Net2()  # <1>
loaded_model.load_state_dict(torch.load(data_path
                                        + 'cifar10_cnn_2.pt'))

<All keys matched successfully>

In [32]:
class ResBlock(nn.Module):
    def __init__(self, n_chans):
        super(ResBlock, self).__init__()
        self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3,
                              padding=1, bias=False)  # <1>
        self.batch_norm = nn.BatchNorm2d(num_features=n_chans)
        torch.nn.init.kaiming_normal_(self.conv.weight,
                                      nonlinearity='relu')  # <2>
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)

    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x

In [33]:
class NetResDeep(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 10)
        
    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [34]:
model = NetResDeep(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)

all_acc_dict["res deep"] = validate(model, train_loader, val_loader)

2022-03-27 17:13:50.595849 Epoch 1, Training loss 1.654699182418911
2022-03-27 17:15:22.048389 Epoch 10, Training loss 0.8983723106591598
2022-03-27 17:17:07.124595 Epoch 20, Training loss 0.707053209700243
2022-03-27 17:18:52.359878 Epoch 30, Training loss 0.5809508525120938
2022-03-27 17:20:38.285898 Epoch 40, Training loss 0.47596838512002965
2022-03-27 17:22:42.974417 Epoch 50, Training loss 0.387585387720019
2022-03-27 17:24:27.247967 Epoch 60, Training loss 0.3160270772245534
2022-03-27 17:26:18.784138 Epoch 70, Training loss 0.25759424888493154
2022-03-27 17:28:00.985377 Epoch 80, Training loss 0.20874030054892267
2022-03-27 17:29:43.020550 Epoch 90, Training loss 0.15959179033632473
2022-03-27 17:31:25.458214 Epoch 100, Training loss 0.13916741610716676
2022-03-27 17:33:13.496896 Epoch 110, Training loss 0.12278354054083453
2022-03-27 17:34:55.474209 Epoch 120, Training loss 0.08534812689056177
2022-03-27 17:36:35.846895 Epoch 130, Training loss 0.08421751653032421
2022-03-27 1

In [35]:
def training_loop_l2reg(n_epochs, optimizer, model, loss_fn,
                        train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)
            labels = labels.to(device=device)
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)

            l2_lambda = 0.001
            l2_norm = sum(p.pow(2.0).sum()
                          for p in model.parameters())  # <1>
            loss = loss + l2_lambda * l2_norm

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            loss_train += loss.item()
        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))


In [36]:
model = NetResDeep(n_chans1=32, n_blocks=10).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop_l2reg(
    n_epochs = 300,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
all_acc_dict["Resnet l2 reg"] = validate(model, train_loader, val_loader)

2022-03-27 18:43:47.136991 Epoch 1, Training loss 1.874712833967965


KeyboardInterrupt: 