# 8.2. Networks Using Blocks (VGG)

In [ ]:
import torch
import torch.nn as nn
from d2l import torch as d2l
from d2l import plt as plt

## 8.2.1. VGG Blocks

The basic building block of CNNs is a sequence of the following:

1. a convolutional layer with padding to maintain the resolution
2. a nonlinearity such as a ReLU
3. a pooling layer such as max-pooling to reduce the resolution


In [ ]:
def vgg_block(num_convs, out_channels):
    layers = []
    
    for _ in range(num_convs):
        layers.append(nn.LazyConv2d(out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
    
    layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
    return nn.Sequential(*layers)

### VGG block
3x3 Conv, padding 1

2x2 MaxPooling, stride 2

原始VGG网络有5个卷积块，其中前两个块各有一个卷积层，后三个块各包含两个卷积层。 第一个模块有64个输出通道，每个后续模块将输出通道数量翻倍，直到该数字达到512。由于该网络使用8个卷积层和3个全连接层，因此它通常被称为VGG-11。

In [ ]:
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))

In [ ]:
def vgg(num_convs, num_out_channels):
    net = []
    for _ in range(num_convs):
        # 3x3 conv, padding 1
        net.append(nn.LazyConv2d(num_out_channels, kernel_size = 3, padding = 1))
        # 2x2 MaxPooling, stride 2
        net.append(nn.MaxPool2d(kernel_size=2, stride=2))
    
    return nn.Sequential(*net) 

## 8.2.2. VGG Network

In [ ]:
class VGG(d2l.Classifier):
    def __init__(self, arch, lr=0.1, num_classes=10):
        super().__init__()
        self.save_hyperparameters()
        conv_blks = []
        for (num_convs, out_channels) in arch:
            conv_blks.append(vgg_block(num_convs, out_channels))
        self.net = nn.Sequential(
            *conv_blks, nn.Flatten(),
            nn.LazyLinear(4096), nn.ReLU(), nn.Dropout(0.5),
            nn.LazyLinear(4096), nn.ReLU(), nn.Dropout(0.5),
            nn.LazyLinear(num_classes))
        self.net.apply(d2l.init_cnn)

In [ ]:
VGG(arch=((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))).layer_summary(
    (1, 1, 224, 224))

## 8.2.3. Training

In [ ]:
model = VGG(arch=((1, 16), (1, 32), (2, 64), (2, 128), (2, 128)), lr=0.01)
trainer = d2l.Trainer(max_epochs=10, num_gpus=1)
data = d2l.FashionMNIST(batch_size=128, resize=(224, 224))
model.apply_init([next(iter(data.get_dataloader(True)))[0]], d2l.init_cnn)
trainer.fit(model, data)