# Part 3.4. VGG

## 1. VGG Net
Oxford VGG(Visual Geometry Group)에서 만든 네트워크이다. 아래 표처럼 다양하다.

![14-1.png](./img/14-1.png)

VGG의 `conv layer`는 모두 `3x3` 그리고 `stride=1`로 되어 있다.

![14-2.png](./img/14-2.png)

## 2. 모델 구현

In [0]:
import torch.nn as nn
import torch.utils.model_zoo as model_zoo

In [0]:
__all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]

# 이미 pretraining된 모델들
model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
    'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
    'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
    'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
    'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
}

In [0]:
# VGG 모듈
class VGG(nn.Module):
    def __init__(self, features, num_classes=1000, init_weights=True):
        super(VGG, self).__init__()
        
        # convolution layer
        self.features = features 
        # pooling layer
        # self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        # fc layer
        self.classifier = nn.Sequential(
            nn.Linear(512 * 4 * 4, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )
        
        # weight initialization
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)        # convolution layer
        # x = self.avgpool(x)       # avgpool layer
        x = x.view(x.size(0), -1)   # flattening
        x = self.classifier(x)      # fc layer
        return x

    def _initialize_weights(self):
        # 각 layer마다의 weight값들을 맞추어서 초기화
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

In [0]:
# cfg의 key값에 따라 layer를 생성해주는 함수
def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            
            # 다음 layer를 위해 input channel 업데이트
            in_channels = v   
                     
    return nn.Sequential(*layers)

In [0]:
cfg = {                                                                                                           # conv + fc
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],                                        # 8 + 3 = vgg11
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],                               # 10 + 3 = vgg 13
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],                # 13 + 3 = vgg 16
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], # 16 + 3 =vgg 19
    'custom' : [64, 64, 64,'M',128, 128, 128,'M',256, 256, 256,'M']
}

In [8]:
CNN = VGG(make_layers(cfg['custom']), num_classes=10, init_weights=True)
print(CNN)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU(inplace=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), paddin

## 3. CIFAR10 Example

### 3.1. 모듈 임포트 및 GPU 설정

In [0]:
import torch
import torch.nn as nn

import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

In [0]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device =='cuda':
    torch.cuda.manual_seed_all(777)

### 3.2. 데이터 전처리


In [11]:
# Tensor로 변환 및 정규화
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./cifar10', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=512, shuffle=True, num_workers=0)

testset = torchvision.datasets.CIFAR10(root='./cifar10', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=0)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar10/cifar-10-python.tar.gz


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


Extracting ./cifar10/cifar-10-python.tar.gz to ./cifar10
Files already downloaded and verified


### 3.3. 모델 설계

In [17]:
cfg = [32, 32, 'M', 64, 64, 128, 128, 128, 'M', 256, 256, 256, 512, 512, 512, 'M']  # 13 + 3 = vgg16
vgg16 = VGG(make_layers(cfg), 10, True).to(device)
print(vgg16)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (10): ReLU(inplace=True)
    (11): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (16): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=

In [18]:
# 모델 확인
a = torch.Tensor(1,3,32,32).to(device)
out = vgg16(a)
print(out)

tensor([[-4.2243e+34, -1.1487e+35, -2.5790e+35,  3.4456e+33,  1.1356e+34,
         -1.2112e+35, -1.7755e+35,  2.3448e+35,  1.3895e+35,  2.3837e+34]],
       device='cuda:0', grad_fn=<AddmmBackward>)


### 3.4. Cost Function 및 Optimizer 설정

In [0]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(vgg16.parameters(), lr = 0.005,momentum=0.9)
# learning rate를 step 5번 마다 learning rate * 0.9를 해줌
lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)

### 3.5. 학습

In [22]:
print(len(trainloader))
epochs = 50

# loop over the dataset multiple times
for epoch in range(epochs):  
    running_loss = 0.0
    lr_sche.step()
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = vgg16(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 30 == 29:    # print every 30 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 30))
            running_loss = 0.0
        

print('Finished Training')

98
[1,    30] loss: 2.299
[1,    60] loss: 2.292
[1,    90] loss: 2.264
[2,    30] loss: 2.133
[2,    60] loss: 2.059
[2,    90] loss: 1.924
[3,    30] loss: 1.815
[3,    60] loss: 1.762
[3,    90] loss: 1.696
[4,    30] loss: 1.645
[4,    60] loss: 1.599
[4,    90] loss: 1.578
[5,    30] loss: 1.512
[5,    60] loss: 1.500
[5,    90] loss: 1.469
[6,    30] loss: 1.436
[6,    60] loss: 1.446
[6,    90] loss: 1.398
[7,    30] loss: 1.352
[7,    60] loss: 1.368
[7,    90] loss: 1.326
[8,    30] loss: 1.308
[8,    60] loss: 1.252
[8,    90] loss: 1.218
[9,    30] loss: 1.198
[9,    60] loss: 1.175
[9,    90] loss: 1.146
[10,    30] loss: 1.147
[10,    60] loss: 1.087
[10,    90] loss: 1.083
[11,    30] loss: 1.084
[11,    60] loss: 1.058
[11,    90] loss: 1.028
[12,    30] loss: 0.989
[12,    60] loss: 0.984
[12,    90] loss: 0.988
[13,    30] loss: 0.954
[13,    60] loss: 0.984
[13,    90] loss: 0.943
[14,    30] loss: 0.906
[14,    60] loss: 0.878
[14,    90] loss: 0.890
[15,    30] loss

### 3.6. 모델 성능 테스트

In [23]:
correct = 0
total = 0

with torch.no_grad():
    for data in testloader:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = vgg16(images)
        
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 77 %
