# Академия MADE
## Курс компьютерного зрения
### Семинар 2: реализация ResNet

#### План
1. (Recap) Рутина обучения в PyTorch
2. Реализация простой сети типа ResNet
3. Проверка эффекта от BatchNormalization
4. Проверка эффекта от residual-блоков
5. ДЗ для любознательных

In [2]:
import sys
sys.path.insert(1, "../input/seminar-2")

In [3]:
import tqdm
import numpy as np
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torchvision.datasets import MNIST, CIFAR10
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor, Normalize, Compose
from torch.optim import Adam
from torch.nn.functional import cross_entropy, relu

from utils import plot, show_kernels

In [4]:
# MNIST: 1x28x28
# DATASET = "MNIST"

# CIFAR10: 3x32x32
DATASET = "CIFAR10"

# DEVICE = "cpu"
# DEVICE = "cuda:0"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_EPOCHS = 2
BATCH_SIZE = 256
LR = 3e-4

In [6]:
# batch_size = 4

# dataset задаёт данные
# trainset = torchvision.datasets.CIFAR10(root='./data', train=True,  download=True, transform=transform)
# dataloader подгружает их
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

# testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [7]:
# len(trainset), len(testset)

In [8]:
transforms = Compose([
    ToTensor(),
    Normalize(0.5, 0.25, inplace=True),
])

if DATASET == "MNIST":
    IMAGE_CHANNELS = 1
    NUM_CLASSES = 10
    dataset = MNIST("./dataset/mnist", download=True, transform=transforms)
elif DATASET == "CIFAR10":
    IMAGE_CHANNELS = 3
    NUM_CLASSES = 10
    dataset = CIFAR10("./dataset/cifar", download=True, transform=transforms)
else:
    raise NotImplementedError(DATASET)

In [10]:
import random

def show_images_dataset(dataset, n=5, collate_fn=lambda x: x[0]):
    """Plot images from dataset."""
    images = [collate_fn(random.choice(dataset)) for _ in range(n)]
    grid = torchvision.utils.make_grid(images)
    grid -= grid.min()
    grid /= grid.max()
    plt.imshow(grid.permute(1, 2, 0))
    plt.show()


print("Dataset")
show_images_dataset(dataset)

#### (Recap) Рутина обучения в PyTorch

In [11]:
device = DEVICE

In [107]:
# batch_size = 512
# trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
#                                           shuffle=True, num_workers=2)
# 
# testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
#                                          shuffle=False, num_workers=2)

In [12]:
def train(model, dataset=dataset, num_epochs=NUM_EPOCHS, batch_size=BATCH_SIZE, lr=LR, device=DEVICE):
    """Model training routine function. 
    Uses Adam optimizer & cross-entropy loss.
    
    Args:
        model: torch.nn.Module
        dataset: torch.utils.data.Dataset
        num_epochs: int
        batch_size: int
        lr: float
        device: str
        
    Returns:
        losses: list of float values of length num_epochs * len(dataloader)
    """
    model.train()
    model = model.to(device)
    
    dataloader = DataLoader(dataset, batch_size=batch_size, drop_last=True, shuffle=True, pin_memory=True)
    optimizer = Adam(model.parameters(), lr=lr)
    
    losses = []    
    for epoch in range(num_epochs):
        for batch in tqdm.tqdm(dataloader):
            xs, ys_true = batch
            
            logits_pred = model(xs.to(device))#.c
            loss = cross_entropy(logits_pred, ys_true.to(device))
            
            # optimization step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
                                
            losses.append(loss.cpu().item())
    
    return losses

In [13]:
# TESTS for train()

input_size = dataset[0][0].size()
fc = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(np.prod(input_size), NUM_CLASSES)
)
losses_fc = train(fc, dataset, num_epochs=1)

assert len(losses_fc) == len(dataset) // BATCH_SIZE
assert np.mean(losses_fc[:10]) > np.mean(losses_fc[-10:])

plot(losses_fc, "fc")

#### Реализация простой сети типа ResNet

Взглянем на примерную схему сети ResNet34:

![resnet34](res/resnet34.png)

Реализуем свой "ResNet10":

![resnet10](res/resnet10.png)

Пересчитайте число слоев с весами - их как раз 10.

Кроме того, для простоты все операции уменьшения размера сведем к пулингу и вынесем в отдельные слои (а не встроим в сами сверточные блоки, как, например, в [torchvision](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py)).

Для начала реализуем вспомогательную функцию `get_conv(...)`, которая приготовит нам последовательность (свертка + активация + батчнорм). Это очень распространенная комбинация операций, которая на будет нужна неоднократно:

In [14]:
def get_conv(kernel_size, in_features, out_features, with_bn=True, with_relu=True):
    """Create conv -> [relu] -> [bn] layers, embedded in torch.nn.Sequential module.
    
    ! Conv layer must preserve spatial tensor dims (i.e. apply zero padding).
    
    Args:
        kernel_size: int
        in_features: int
        out_features: int
        with_bn: bool
        with_relu: bool
        
    Returns:
        torch.nn.Sequential
    """
    layers = [
        # nn.Conv2d(... # YOUR CODE HERE
        nn.Conv2d(in_features, out_features, kernel_size, 1, kernel_size // 2)
    ]
    
    if with_relu:
        # YOUR CODE HERE
        layers.append(nn.ReLU(inplace=(with_bn is True)))
        
    if with_bn:
        # YOUR CODE HERE
        layers.append(nn.BatchNorm2d(out_features))
    
    return nn.Sequential(*layers)

In [15]:
# TESTS for get_conv()
conv = get_conv(3, 8, 16)

assert len(conv) == 3
assert isinstance(conv[0], torch.nn.Conv2d)
assert conv[0].in_channels == 8
assert conv[0].out_channels == 16

assert isinstance(conv[1], torch.nn.ReLU)

assert isinstance(conv[2], torch.nn.BatchNorm2d)
assert conv[2].num_features == 16


conv = get_conv(3, 8, 16, with_bn=False, with_relu=False)

assert len(conv) == 1
assert isinstance(conv[0], torch.nn.Conv2d)
assert conv[0].in_channels == 8
assert conv[0].out_channels == 16

Также реализуем слой `GlobalAveragePooling`. Помните, зачем он нужен?

In [16]:
class GlobalAveragePooling2d(nn.Module):
    def forward(self, x):
        """GAP forward pass.
        
        Args:
            x: torch.Tensor, size B x C x H x W.
        
        Returns:
            torch.Tensor, size B x C.
        """
        # y = # YOUR CODE HERE
        y = torch.mean(x, dim=(2, 3))
        return y

In [17]:
# TESTS for GAP

gap = GlobalAveragePooling2d()
x = torch.randn(4, 3, 16, 16)
y = gap(x)

assert y.size() == (4, 3)

Перейдем к главному в ResNet - собственно, residual-блоку:

![residual_block](res/residual_block.png)

Под "блоком" мы будем иметь в виду последовательность сверточных слоев (с активацией и BN), "вокруг" которых прокинут skip-connection. Таким образом, в нашей сети "ResNet10" будет 4 таких блока (найдите их на картинке c ResNet10 выше).

Важный момент: обратите внимание, что тензоры **x** и **F(x)** могут иметь разное число каналов (например, 64 и 128). Как же сложить два таких тензора?..

Ответ: сделать "проекцию", см. `projection` в коде.

In [18]:
class ResidualBlock(nn.Module):
    def __init__(self, num_input_features, num_features, num_layers, with_bn=True):
        super().__init__()
        
        if num_input_features != num_features:
            self.projection = nn.Conv2d(num_input_features, num_features, 1, 1, 0)
        else:
            self.projection = None
        
        layers = []
        for i in range(num_layers):
            conv = get_conv(3, num_input_features, num_features, with_bn=with_bn)
            layers.append(conv)
            num_input_features = num_features
        self.layers = nn.Sequential(*layers)
        
        self.num_input_features = num_input_features
        self.num_features = num_features
        self.num_layers = num_layers
        self.with_bn = with_bn
        
    def forward(self, x):
        """Forward pass.
        Applies convolution layers and skip-connection; self.projection, if necessary.
        
        Args:
            x: torch.Tensor, size B x C x H x W.
            
        Returns:
            torch.Tensor, size B x C x H x W.
        """
        x_input = x
        # YOUR CODE HERE
        
        for layer in self.layers:
            x = layer(x)
        
        if self.projection is not None:
            x += self.projection(x_input)
        else:
            x += x_input
            
        return relu(x)
    
    def __repr__(self):
        out = f"ResidualBlock(num_input_features={self.num_input_features}, num_features={self.num_features}, num_layers={self.num_layers}, with_bn={self.with_bn})"
        for l in self.layers:
            out += "\n" + "\t" + repr(l)
        return out

In [19]:
# TESTS for ResidualBlock
block = ResidualBlock(4, 4, 2)

assert len(block.layers) == 2
assert len(block.layers[0]) == 3
assert len(block.layers[1]) == 3
assert isinstance(block.layers[1][2], nn.BatchNorm2d)

print(block)

Дальше дело за малым - собрать из готовых блоков нашу сеть:

![resnet10](res/resnet10.png)


In [20]:
def create_resnet10(num_input_features=IMAGE_CHANNELS, num_classes=NUM_CLASSES, with_bn=True):
    pool = nn.MaxPool2d((2, 2))
    gap = GlobalAveragePooling2d()
    fc = nn.Linear(512, num_classes)
    return nn.Sequential(
        get_conv(7, num_input_features, 64, with_bn=with_bn),
        pool,
        ResidualBlock(64, 64, 2, with_bn=with_bn),
        pool,
        ResidualBlock(64, 128, 2, with_bn=with_bn),
        pool,
        ResidualBlock(128, 256, 2, with_bn=with_bn),
        pool,
        ResidualBlock(256, 512, 2, with_bn=with_bn),
        gap,
        fc
    )

In [21]:
resnet10 = create_resnet10()
resnet10.to(DEVICE)

Обучим полученную модель с помощью реализованной в начале функции `train()`:

In [22]:
NUM_EPOCHS = 5
losses_resnet10 = train(resnet10, num_epochs=NUM_EPOCHS)

In [23]:
plot(losses_resnet10, label="resnet10")

In [31]:
def all_accuracy(net, testloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.to(DEVICE)
            outputs = net(images).to("cpu")
            # дополните код
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(
        100 * correct / total))

In [27]:
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transforms)

testloader = torch.utils.data.DataLoader(testset, batch_size=256,
                                         shuffle=False, num_workers=2)

In [32]:
all_accuracy(resnet10, testloader)

In [34]:
def class_accuracy(net, testloader):
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.to(DEVICE)
            outputs = net(images).to("cpu")
            _, predicted = torch.max(outputs, 1)
            c = (predicted == labels).squeeze()
            for i in range(4):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1


    for i in range(10):
        print('Accuracy of {} : {} %'.format(
            classes[i], 100 * class_correct[i] / class_total[i]))

In [35]:
class_accuracy(resnet10, testloader)

#### Проверка эффекта от BatchNormalization


Теперь проведем небольшой эксперимент: создадим сеть с аналогичной нашему ResNet10 архитектурой, но без слоев BatchNormalization.

#### Проверка эффекта от Residual-блоков


Теперь построим модель, у которой будет аналогичное число параметров (и FLOPS), но у которой не будет skip-connections. Называться она будет... Net10?

Для этого напишем класс для обычного блока - это легко сделать, ощипав уже написанный класс `ResidualBlock` (да, все вот так наоборот).

In [38]:
class Block(nn.Module):
    def __init__(self, num_input_features, num_features, num_layers, with_bn=True):
        super().__init__()

        layers = []
        for i in range(num_layers):
            conv = get_conv(3, num_input_features, num_features, with_bn=with_bn)
            layers.append(conv)
            num_input_features = num_features
        self.layers = nn.Sequential(*layers)

        self.num_input_features = num_input_features
        self.num_features = num_features
        self.num_layers = num_layers
        self.with_bn = with_bn

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def __repr__(self):
        out = f"Block(num_input_features={self.num_input_features}, num_features={self.num_features}, num_layers={self.num_layers}, with_bn={self.with_bn})"
        for l in self.layers:
            out += "\n" + "\t" + repr(l)
        return out

Функция для создания модели - тоже аналогична:

In [39]:
def create_net10(num_input_features=IMAGE_CHANNELS, num_classes=NUM_CLASSES, with_bn=True):
    pool = nn.MaxPool2d((2, 2))
    gap = GlobalAveragePooling2d()
    fc = nn.Linear(512, num_classes)
    return nn.Sequential(
        get_conv(7, num_input_features, 64, with_bn=with_bn),
        pool,
        Block(64, 64, 2, with_bn=with_bn),
        pool,
        Block(64, 128, 2, with_bn=with_bn),
        pool,
        Block(128, 256, 2, with_bn=with_bn),
        pool,
        Block(256, 512, 2, with_bn=with_bn),
        gap,
        fc
    )

In [41]:
net10 = create_net10()
net10

In [42]:
losses_net10 = train(net10,  device=DEVICE, num_epochs=NUM_EPOCHS)

In [43]:
all_accuracy(net10, testloader)

In [44]:
class_accuracy(net10, testloader)

In [45]:
plot(losses_resnet10, label="resnet10")
plot(losses_net10, label="net10")

При обучении данных моделей разницы почти не видно... Почему?

Ок, сделаем сети поглубже:

In [46]:
def create_net34(num_input_features=IMAGE_CHANNELS, num_classes=NUM_CLASSES, with_bn=True):
    pool = nn.MaxPool2d((2, 2))
    gap = GlobalAveragePooling2d()
    fc = nn.Linear(512, num_classes)
    return nn.Sequential(
        get_conv(7, num_input_features, 64, with_bn=with_bn),
        pool,
        Block(64, 64, 2, with_bn=with_bn),
        Block(64, 64, 2, with_bn=with_bn),
        Block(64, 64, 2, with_bn=with_bn),
        pool,
        Block(64, 128, 2, with_bn=with_bn),
        Block(128, 128, 2, with_bn=with_bn),
        Block(128, 128, 2, with_bn=with_bn),
        pool,
        Block(128, 256, 2, with_bn=with_bn),
        Block(256, 256, 2, with_bn=with_bn),
        Block(256, 256, 2, with_bn=with_bn),
        Block(256, 256, 2, with_bn=with_bn),
        Block(256, 256, 2, with_bn=with_bn),
        pool,
        Block(256, 512, 2, with_bn=with_bn),
        Block(512, 512, 2, with_bn=with_bn),
        Block(512, 512, 2, with_bn=with_bn),
        gap,
        fc
    )

In [47]:
def create_resnet34(num_input_features=IMAGE_CHANNELS, num_classes=NUM_CLASSES, with_bn=True):
    pool = nn.MaxPool2d((2, 2))
    gap = GlobalAveragePooling2d()
    fc = nn.Linear(512, num_classes)
    return nn.Sequential(
        get_conv(7, num_input_features, 64, with_bn=with_bn),
        pool,
        ResidualBlock(64, 64, 2, with_bn=with_bn),
        ResidualBlock(64, 64, 2, with_bn=with_bn),
        ResidualBlock(64, 64, 2, with_bn=with_bn),
        pool,
        ResidualBlock(64, 128, 2, with_bn=with_bn),
        ResidualBlock(128, 128, 2, with_bn=with_bn),
        ResidualBlock(128, 128, 2, with_bn=with_bn),
        pool,
        ResidualBlock(128, 256, 2, with_bn=with_bn),
        ResidualBlock(256, 256, 2, with_bn=with_bn),
        ResidualBlock(256, 256, 2, with_bn=with_bn),
        ResidualBlock(256, 256, 2, with_bn=with_bn),
        ResidualBlock(256, 256, 2, with_bn=with_bn),
        pool,
        ResidualBlock(256, 512, 2, with_bn=with_bn),
        ResidualBlock(512, 512, 2, with_bn=with_bn),
        ResidualBlock(512, 512, 2, with_bn=with_bn),
        gap,
        fc
    )

In [48]:
net34 = create_net34()
net34;

In [49]:
losses_net34 = train(net34,  device=DEVICE, num_epochs=NUM_EPOCHS)

In [50]:
resnet34 = create_resnet34()
resnet34;

In [51]:
losses_resnet34 = train(resnet34,  device=DEVICE, num_epochs=NUM_EPOCHS)

In [52]:
plot(losses_resnet34, "resnet34")
plot(losses_net34, "net34")

In [53]:
all_accuracy(net34, testloader)
class_accuracy(net34, testloader)

In [54]:
all_accuracy(resnet34, testloader)
class_accuracy(resnet34, testloader)

Gotcha!

#### (Bonus) Свертки первого слоя

Возьмем ResNet18, предобученный (долго и качественно, в отличие от наших поделок) на датасете ImageNet и отрисуем ядра сверток первого сверточного слоя:

In [55]:
from torchvision.models import resnet18
resnet18_pt = resnet18(pretrained=True)
show_kernels(resnet18_pt.conv1)

#### Для любознательных

* Поучите сети подольше. Как соотносятся итоговые результаты с residual-блоками и без? С BN и без?
* Мы замеряли только train loss. Попробуйте замерить другие метрики, использовать validation set, ...
* Реализуйте SEBlock (Squeeze-n-Excitation), а затем соберите (с помощью кода из этого семинара) SE-Net. Как она себя ведет?
* Реализуйте Bottleneck для ResNet50/101/152. Подсмотреть можно [тут](https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py).