# VGG-16 구현

### import libraries

In [7]:
import numpy as np
import torch
import torch.nn as nn
from torchsummary import summary

# pip install torchsummary
# https://deepbaksuvision.github.io/Modu_ObjectDetection/posts/03_04_torchsummary.html

## sequential로 묶어서 하기

### 1. sequential 1개

In [10]:
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU(),
            
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU(),
            
            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU(),
       
            nn.Conv2d(256, 512, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU(),
          
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU()
        )
        
        self.fc_layer=nn.Sequential(
            nn.Linear(9*9*512, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 1000)
        )  
    
    def forward(self, x):
        out=self.layer(x)
        print(out.shape)
#         out=out.view(batch_size, -1)
        out=out.view(out.size(0), -1)
        out=self.fc_layer(out)
        print("최종 shape:", out.shape)
        return out
          
model = VGG16()

In [11]:
input_image = torch.randn(1, 3, 300, 300, dtype=torch.float)
print("input image:", input_image.shape)

vgg = VGG16()
vgg.__init__()
vgg.forward(input_image)

input image: torch.Size([1, 3, 300, 300])
torch.Size([1, 512, 9, 9])
최종 shape: torch.Size([1, 1000])


tensor([[ 1.1667e-02, -9.0218e-04,  1.8828e-02, -8.7136e-04, -8.8349e-03,
         -5.5008e-03,  1.0682e-02,  7.7926e-03,  9.9613e-03, -7.2921e-03,
         -1.1382e-02, -1.3517e-02,  4.3119e-03, -5.8750e-04, -3.9414e-03,
          2.4665e-03, -7.9875e-03, -8.6256e-04, -1.0061e-02, -1.5009e-02,
          5.1601e-03,  1.3538e-02, -4.6324e-03, -7.4240e-03,  4.8207e-03,
          1.7910e-02,  1.2387e-02, -7.0516e-03,  1.0797e-02,  1.0968e-02,
          8.7332e-03,  8.4310e-03,  1.6224e-03,  5.9640e-03, -4.9594e-03,
          1.7580e-03, -6.2760e-03,  5.7726e-03, -1.0735e-02, -2.2033e-03,
         -8.2752e-03, -1.9055e-03, -1.4209e-02,  8.2118e-03, -1.5375e-02,
         -3.8090e-03,  1.2895e-02,  1.2538e-02, -6.3928e-03, -1.4047e-02,
          1.0555e-02, -1.7609e-04,  4.3523e-03, -9.4448e-03,  6.1869e-03,
          2.0335e-03,  1.0966e-02,  4.1934e-03,  5.1101e-03, -7.7356e-03,
         -1.1596e-02,  1.3500e-02, -7.0593e-03, -6.4340e-03,  1.2341e-02,
         -2.8317e-03,  8.6287e-03,  7.

### 2. feature map을 추출할 수 있도록 sequential 3개로 구성

<img src="img/vgg16.png" width="700"/>

<!-- ![VGG-16](img/vgg16.png) -->

In [4]:
class VGG16_v2(nn.Module):
    def __init__(self):
        super(VGG16_v2, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU(),
            
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU(),
            
            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU()
        )
        
        self.layer2 = nn.Sequential(            
            nn.Conv2d(256, 512, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU()
        )
        
        self.layer3 = nn.Sequential(            
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.ReLU()
        )
        
        self.fc_layer=nn.Sequential(
            nn.Linear(9*9*512, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 1000)
        )  

    def forward(self, x):
        out=self.layer1(x)
        print("1st feature map:", out.shape)
        out=self.layer2(out)
        print("2nd feature map:", out.shape)
        out=self.layer3(out)
        print("3rd feature map:", out.shape)
        out=out.view(out.size(0), -1)
        out=self.fc_layer(out)
        print("fc layer shape:", out.shape)
        return out
        
model = VGG16_v2()

In [5]:
summary(model, (3, 300, 300))

1st feature map: torch.Size([2, 256, 37, 37])
2nd feature map: torch.Size([2, 512, 18, 18])
3rd feature map: torch.Size([2, 512, 9, 9])
fc layer shape: torch.Size([2, 1000])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 300, 300]           1,792
              ReLU-2         [-1, 64, 300, 300]               0
            Conv2d-3         [-1, 64, 300, 300]          36,928
              ReLU-4         [-1, 64, 300, 300]               0
         MaxPool2d-5         [-1, 64, 150, 150]               0
              ReLU-6         [-1, 64, 150, 150]               0
            Conv2d-7        [-1, 128, 150, 150]          73,856
              ReLU-8        [-1, 128, 150, 150]               0
            Conv2d-9        [-1, 128, 150, 150]         147,584
             ReLU-10        [-1, 128, 150, 150]               0
        MaxPool2d-11          [-1, 128, 75, 75]          

In [27]:
input_image = torch.randn(1, 3, 300, 300, dtype=torch.float)
print("input image:", input_image.shape)

vgg = VGG16_v2()
vgg.__init__()
vgg.forward(input_image)

input image: torch.Size([1, 3, 300, 300])
1st feature map: torch.Size([1, 256, 37, 37])
2nd feature map: torch.Size([1, 512, 18, 18])
3rd feature map: torch.Size([1, 512, 9, 9])
fc layer shape: torch.Size([1, 1000])


tensor([[ 4.6627e-03, -5.4811e-03,  7.1940e-03,  2.8413e-03,  3.5779e-03,
          1.2086e-02, -1.0418e-02,  2.4029e-02,  5.7935e-03,  1.0993e-02,
          3.3750e-03,  5.5346e-03, -8.7306e-03, -8.7923e-04, -1.3079e-02,
          1.1408e-02,  7.0815e-03,  3.3063e-03, -1.8808e-03,  4.0098e-04,
          3.7818e-03,  7.5980e-04,  4.6099e-03, -4.4130e-03, -9.7615e-03,
         -2.6397e-03,  2.2819e-03, -1.5259e-02, -2.2287e-03,  1.3305e-03,
          7.6571e-03,  1.0547e-02,  1.0508e-03, -1.3387e-02,  1.9768e-02,
          1.1887e-02, -3.9477e-03, -6.9013e-03, -1.0001e-03,  1.8150e-02,
         -8.6054e-03,  1.0927e-02,  7.4750e-03,  9.7499e-03, -4.0206e-03,
         -4.3561e-03, -1.8670e-03,  1.8295e-03, -7.3207e-03, -2.9670e-03,
          4.4422e-03, -4.9124e-03,  1.4957e-02, -5.0116e-03,  1.2069e-02,
         -4.6408e-03, -1.2279e-02, -3.4053e-03,  9.6161e-03,  1.2978e-02,
          6.0171e-03,  4.9745e-03,  4.1003e-03, -1.5143e-02,  4.8552e-03,
         -1.2275e-03,  1.1815e-02,  1.

# ====================================================

# layer 하나씩 쌓기

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# layer 하나씩 만들기

class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 128, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv5 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv6 = nn.Conv2d(256, 256, 3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        self.conv7 = nn.Conv2d(256, 512, 3, padding=1)
        self.conv8 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv9 = nn.Conv2d(512, 512, 3, padding=1)
        self.pool4 = nn.MaxPool2d(2, 2)
        
        self.conv10 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv11 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv12 = nn.Conv2d(512, 512, 3, padding=1)
        self.pool5 = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(9*9*512, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 1000)
            
    def forward(self, x):
#         out = F.relu(self.conv2(x)), (2, 2)
#         out = F.relu(self.conv4(x)), (2, 2)
#         out = F.relu(self.conv6(x)), (2, 2)
#         out = F.relu(self.conv9(x)), (2, 2)
#         out = F.relu(self.conv12(x)), (2, 2)

        x = F.relu(self.pool1(x)), (2, 2)
        x = F.relu(self.pool2(x)), (2, 2)
        x = F.relu(self.pool3(x)), (2, 2)
        x = F.relu(self.pool4(x)), (2, 2)
        x = F.relu(self.pool5(x)), (2, 2)
        
        x=x.view(out.size(0), -1)
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=F.relu(self.fc3(x))
        print(x.shape)
        return x
        
model = VGG16()

In [3]:
in_image = torch.randn(1, 3, 300, 300, dtype=torch.float)
print("input image: ", in_image.shape)


vgg = VGG16()
vgg.__init__()
vgg.forward(in_image)
# out1 = layer1(in_image)
# print(out1.shape)

input image:  torch.Size([1, 3, 300, 300])


TypeError: max_pool2d(): argument 'input' (position 1) must be Tensor, not tuple

### 잘 모르겠어서 한줄씩 확인

In [None]:
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1)
        self.conv2 = nn.Conv2d(64, 64, 3, padding=1)
        self.pool1 = nn.MaxPool2d(3, 3)
        
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 128, 3, padding=1)
        self.pool2 = nn.MaxPool2d(3, 3)
        
        
        

In [53]:
in_image = torch.randn(1, 3, 300, 300, dtype=torch.float)
print("input image: ", in_image.shape)

conv1 = nn.Conv2d(3, 64, 3, padding=1)  # 입력, 출력, 필터크기, 
conv2 = nn.Conv2d(64, 64, 3, padding=1)
pool = nn.MaxPool2d(2,2)

feature_map1 = conv1(in_image)
print("conv1:", feature_map1.shape)
feature_map1 = conv2(feature_map1)
print("conv2:", feature_map1.shape)
feature_map2 = pool(feature_map1)
print("pool1: ", feature_map2.shape)
print("")


conv3 = nn.Conv2d(64, 128, 3, padding=1)  # 입력, 출력, 필터크기, 
conv4 = nn.Conv2d(128, 128, 3, padding=1)

feature_map3 = conv3(feature_map2)
print("conv3:", feature_map3.shape)
feature_map3 = conv4(feature_map3)
print("conv4:", feature_map3.shape)
feature_map4 = pool(feature_map3)
print("pool2: ", feature_map4.shape)
print("")

conv5 = nn.Conv2d(128, 256, 3, padding=1)  # 입력, 출력, 필터크기, 
conv6 = nn.Conv2d(256, 256, 3, padding=1)

feature_map5 = conv5(feature_map4)
print("conv5:", feature_map5.shape)
feature_map5 = conv6(feature_map5)
print("conv6:", feature_map5.shape)
feature_map6 = pool(feature_map5)
print("pool3: ", feature_map6.shape)
print("")

conv7 = nn.Conv2d(256, 512, 3, padding=1)  # 입력, 출력, 필터크기, 
conv8 = nn.Conv2d(512, 512, 3, padding=1)

feature_map7 = conv7(feature_map6)
print("conv7:", feature_map7.shape)
feature_map7 = conv8(feature_map7)
print("conv8:", feature_map7.shape)
feature_map8 = pool(feature_map7)
print("pool4: ", feature_map8.shape)
print("")


conv9 = nn.Conv2d(512, 512, 3, padding=1)  # 입력, 출력, 필터크기, 
conv10 = nn.Conv2d(512, 512, 3, padding=1)

feature_map9 = conv9(feature_map8)
print("conv9:", feature_map9.shape)
feature_map9 = conv10(feature_map9)
print("conv10:", feature_map9.shape)
feature_map10 = pool(feature_map9)
print("pool5: ", feature_map10.shape)
print("")

input image:  torch.Size([1, 3, 300, 300])
conv1: torch.Size([1, 64, 300, 300])
conv2: torch.Size([1, 64, 300, 300])
pool1:  torch.Size([1, 64, 150, 150])

conv3: torch.Size([1, 128, 150, 150])
conv4: torch.Size([1, 128, 150, 150])
pool2:  torch.Size([1, 128, 75, 75])

conv5: torch.Size([1, 256, 75, 75])
conv6: torch.Size([1, 256, 75, 75])
pool3:  torch.Size([1, 256, 37, 37])

conv7: torch.Size([1, 512, 37, 37])
conv8: torch.Size([1, 512, 37, 37])
pool4:  torch.Size([1, 512, 18, 18])

conv9: torch.Size([1, 512, 18, 18])
conv10: torch.Size([1, 512, 18, 18])
pool5:  torch.Size([1, 512, 9, 9])

