# Task description:
- Modify the ResNet example from the book to resemble the
Network from the paper "Wide Residual Networks" by Zagoruyko et al.

- Use the same augmentation methods as in the paper
(RandomHorizontalFlip and RandomCrop)
- Compare the accuracy (detection rates) for training and validation data with the
validation data with the example from the book
- Train the network only for the classification of birds and airplanes.
- Use 28 B(3,3) layers with k=2

# Implementation:

In [16]:
from matplotlib import pyplot as plt
import numpy as np
import collections

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import datasets, transforms

import datetime
import random

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)

<torch._C.Generator at 0x7fbfb858d210>

In [17]:
class_names = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']

data_path = '../data-unversioned/p1ch6/'
cifar10 = datasets.CIFAR10(
    data_path, train=True, download=True,
    transform=transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=True,
    transform=transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize((0.4915, 0.4823, 0.4468),
                             (0.2470, 0.2435, 0.2616))
    ]))

Files already downloaded and verified
Files already downloaded and verified


In [18]:
label_map = {0: 0, 2: 1}
class_names = ['airplane', 'bird']
cifar2 = [(img, label_map[label])
          for img, label in cifar10
          if label in [0, 2]]
cifar2_val = [(img, label_map[label])
              for img, label in cifar10_val
              if label in [0, 2]]


In [19]:
# define the training loop
def training_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0

        for imgs, labels in train_loader:
            imgs = imgs.to(device=device)  # <1>
            labels = labels.to(device=device)
            outputs = model(imgs)
            outputs = outputs[:len(labels)]
            loss = loss_fn(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train += loss.item()

        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch,
                loss_train / len(train_loader)))

In [20]:
# validates the accuracy of a given model on training and validation data
# and stores the results in a dictionary
def validate(model, train_loader, val_loader):
    accdict = {}
    for name, loader in [("train", train_loader), ("val", val_loader)]:
        correct = 0
        total = 0

        with torch.no_grad():
            for imgs, labels in loader:
                imgs = imgs.to(device=device)  # <1>
                labels = labels.to(device=device)
                outputs = model(imgs)
                outputs = outputs[:len(labels)]
                _, predicted = torch.max(outputs, dim=1) # <1>
                total += labels.shape[0]
                correct += int((predicted == labels).sum())

        print("Accuracy {}: {:.2f}".format(name , correct / total))
        accdict[name] = correct / total
    return accdict

#### ResNetDeep()

In [21]:
# copied from the Book Deep-Learning with Pytorch by Eli Stevens for comparison to own implemented network WideResidualNetwork
class ResBlock(nn.Module):
    def __init__(self, n_chans):
        super(ResBlock, self).__init__()
        self.conv = nn.Conv2d(n_chans, n_chans, kernel_size=3,
                              padding=1, bias=False)  # <1>
        self.batch_norm = nn.BatchNorm2d(num_features=n_chans)
        torch.nn.init.kaiming_normal_(self.conv.weight,
                                      nonlinearity='relu')  # <2>
        torch.nn.init.constant_(self.batch_norm.weight, 0.5)
        torch.nn.init.zeros_(self.batch_norm.bias)

    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = torch.relu(out)
        return out + x


class NetResDeep(nn.Module):
    def __init__(self, n_chans1=32, n_blocks=10):
        super().__init__()
        self.n_chans1 = n_chans1
        self.conv1 = nn.Conv2d(3, n_chans1, kernel_size=3, padding=1)
        self.resblocks = nn.Sequential(
            *(n_blocks * [ResBlock(n_chans=n_chans1)]))
        self.fc1 = nn.Linear(8 * 8 * n_chans1, 32)
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        out = F.max_pool2d(torch.relu(self.conv1(x)), 2)
        out = self.resblocks(out)
        out = F.max_pool2d(out, 2)
        out = out.view(-1, 8 * 8 * self.n_chans1)
        out = torch.relu(self.fc1(out))
        out = self.fc2(out)
        return out

In [22]:
device = torch.device('cuda')
print("ResNetDeep from Book Deep-Learning with PyTorch by Eli Stevens:")
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)
all_acc_dict = collections.OrderedDict()

model = NetResDeep(n_chans1=32, n_blocks=28).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-3)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
all_acc_dict["res deep"] = validate(model, train_loader, val_loader)
print("Number of free parameters in the model:",sum(p.numel() for p in model.parameters()))

ResNetDeep from Book Deep-Learning with PyTorch by Eli Stevens:
2023-06-19 13:06:53.768692 Epoch 1, Training loss 0.6870595774832805
2023-06-19 13:07:18.718525 Epoch 10, Training loss 0.3088138674854473
2023-06-19 13:07:46.773646 Epoch 20, Training loss 0.21589484877267462
2023-06-19 13:08:14.338897 Epoch 30, Training loss 0.15515167994578932
2023-06-19 13:08:41.756228 Epoch 40, Training loss 0.12364577526926615
2023-06-19 13:09:09.126357 Epoch 50, Training loss 0.07961698998489482
2023-06-19 13:09:36.559698 Epoch 60, Training loss 0.11591702577165643
2023-06-19 13:10:04.195961 Epoch 70, Training loss 0.01145614955572791
2023-06-19 13:10:31.507238 Epoch 80, Training loss 0.08479479701809917
2023-06-19 13:10:58.840804 Epoch 90, Training loss 0.03849992383817199
2023-06-19 13:11:26.195553 Epoch 100, Training loss 0.18032954080374378
Accuracy train: 0.98
Accuracy val: 0.86
Number of free parameters in the model: 75810


#### WideResidualNetwork()

In [23]:
# Definieren der Modellarchitektur des Wide Residual Networks
class WideResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(WideResidualBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.downsample = None

        # Check if downsample is needed based on input and output channels and stride
        if in_channels != out_channels or stride != 1:
            self.downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
            )

    def forward(self, x):
        identity = x
        # First convolutional block
        out = self.bn1(x)
        out = self.relu1(out)
        out = self.conv1(out)

        # Second convolutional block
        out = self.bn2(out)
        out = self.relu2(out)
        out = self.conv2(out)

        # Apply downsample if necessary
        # adjust the dimensions of the input feature map so that it matches the dimensions of the output feature map
        if self.downsample is not None:
            identity = self.downsample(x)

        # Add identity to output
        out += identity

        return out


class WideResidualNetwork(nn.Module):
    def __init__(self, num_blocks, width, num_classes=2):
        super(WideResidualNetwork, self).__init__()
        # Initial convolutional layer
        self.conv1 = nn.Conv2d(3, 16 * width, kernel_size=3, stride=1, padding=1, bias=False)
        self.relu = nn.ReLU(inplace=True)

        # Three blocks of WideResidualBlock modules
        self.block0 = self._make_block_seq(16 * width, 16 * width, num_blocks)
        self.block1 = self._make_block_seq(16 * width, 32 * width, num_blocks, stride=2)
        self.block2 = self._make_block_seq(32 * width, 64 * width, num_blocks, stride=2)
        self.blocks = [self.block0, self.block1, self.block2]

        # Batch normalization, average pooling, and fully connected layers
        self.bn = nn.BatchNorm2d(64 * width)
        self.avg_pool = nn.AvgPool2d(8, stride=1)
        self.fcl = nn.Linear(64 * width, num_classes)

    def _make_block_seq(self, in_channels, out_channels, num_blocks, stride=1):
        block_seq = []
        # Create the WideResidualBlock bock sequences
        block_seq.append(WideResidualBlock(in_channels, out_channels, stride))
        for _ in range(1, num_blocks):
            block_seq.append(WideResidualBlock(out_channels, out_channels))
        return nn.Sequential(*block_seq)

    def forward(self, x):
        # Input convolutional layer
        x = self.conv1(x)
        # Pass through each block in the WideResidualNetwork
        for block in self.blocks:
          x = block(x)
        # Batch normalization, activation, pooling, flattening, and fully connected layers
        x = self.bn(x)
        x = self.relu(x)
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fcl(x)
        return x



In [25]:
print("WideResidualNetwork from Paper Wide Residual Networks by Zagoruyko et al.:")
train_loader = torch.utils.data.DataLoader(cifar2, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(cifar2_val, batch_size=64, shuffle=False)
all_acc_dict = collections.OrderedDict()

model = WideResidualNetwork(num_blocks=4, width=2, num_classes=2).to(device=device)
optimizer = optim.SGD(model.parameters(), lr=3e-1)
loss_fn = nn.CrossEntropyLoss()

training_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_loader,
)
all_acc_dict["res deep"] = validate(model, train_loader, val_loader)
print("Number of free parameters in the model:",sum(p.numel() for p in model.parameters()))

WideResidualNetwork from Paper Wide Residual Networks by Zagoruyko et al.:
2023-06-19 13:24:48.440499 Epoch 1, Training loss 0.46060856208679785
2023-06-19 13:25:36.739702 Epoch 10, Training loss 0.16452496980500828
2023-06-19 13:26:29.759600 Epoch 20, Training loss 0.030695219507891756
2023-06-19 13:27:23.236152 Epoch 30, Training loss 0.0020471766195204375
2023-06-19 13:28:16.483703 Epoch 40, Training loss 0.01093795595121137
2023-06-19 13:29:09.644597 Epoch 50, Training loss 0.0036373017630038397
2023-06-19 13:30:02.857480 Epoch 60, Training loss 0.0003268259985764121
2023-06-19 13:30:56.091691 Epoch 70, Training loss 6.868609134355766e-05
2023-06-19 13:31:49.144783 Epoch 80, Training loss 7.751022358959768e-05
2023-06-19 13:32:42.302052 Epoch 90, Training loss 3.812436228342049e-05
2023-06-19 13:33:35.495770 Epoch 100, Training loss 3.922447275883291e-05
Accuracy train: 1.00
Accuracy val: 0.92
Number of free parameters in the model: 1471138


# Comparison:

The NetResDeep network uses simple residual blocks consisting of a convolutional layer and a batch normalization layer. These blocks are connected several times in series to achieve a deeper architecture. It has a comparatively small number of parameters (75,810) and consists of a single convolutional layer followed by residual blocks and a linear classification layer. The accuracy of the NetResDeep model is 98% on the training data and 86% on the validation data.

The WideResidualNetwork, on the other hand, uses Wide Residual Blocks, which have a wider network architecture with more channels. These blocks consist of two convolutional layers and are deeper than the simple residual blocks. The WideResidualNetwork has a much higher number of parameters (1,471,138) due to the larger number of channels in the blocks. It also has a Convolutional layer followed by three layers of Wide Residual Blocks, Batch Normalization, Pooling, and a Linear Classification layer. The WideResidualNetwork has a broader and deeper architecture that allows the model to capture more features and learn more complex relationships in the data. This leads to higher accuracy on the validation data, as can also be seen here: the accuracy of the WideResidualNetwork model is 100% on the training data and 92% on the validation data.