# VGG net

In [1]:
import time
import torch
from torch import nn, optim
import torchvision
from torchsummary import summary

import renyan_utils as ry

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

## VGG block

In [3]:
def vgg_block(num_convs, in_channels, out_channels):
    blk = []
    for i in range(num_convs):
        # double conv layer
        if i == 0:
            blk.append(nn.Conv2d(in_channels, out_channels, kernel_size = 3, padding = 1))
        else:
            blk.append(nn.Conv2d(out_channels, out_channels, kernel_size = 3, padding = 1))
    blk.append(nn.MaxPool2d(kernel_size = 2, stride = 2))
    return nn.Sequential(*blk)

In [4]:
conv_arch = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256 ,512), (2, 512, 512))
fc_features = 512 * 7 * 7
fc_hidden_units = 4096 # or other customized numbers

## VGG net

In [5]:
def vgg(conv_arch, fc_features, fc_hidden_units = 4096):
    net = nn.Sequential()
    for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch):
        net.add_module("vgg_block_" + str(i+1),
                      vgg_block(num_convs, in_channels, out_channels))
    net.add_module("fc", nn.Sequential(ry.FlattenLayer(),
                                       nn.Linear(fc_features, fc_hidden_units),
                                       nn.ReLU(),
                                       nn.Dropout(0.5),
                                       nn.Linear(fc_hidden_units, fc_hidden_units),
                                       nn.ReLU(),
                                       nn.Dropout(0.5),
                                       nn.Linear(fc_hidden_units, 10)))
    
    return net

In [6]:
net = vgg(conv_arch, fc_features, fc_hidden_units)

In [7]:
net

Sequential(
  (vgg_block_1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block_2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block_3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block_4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (vgg_block_5): Sequential(
    (0): Conv2d(512, 512, kernel_

In [8]:
summary(net, (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]             640
         MaxPool2d-2         [-1, 64, 112, 112]               0
            Conv2d-3        [-1, 128, 112, 112]          73,856
         MaxPool2d-4          [-1, 128, 56, 56]               0
            Conv2d-5          [-1, 256, 56, 56]         295,168
            Conv2d-6          [-1, 256, 56, 56]         590,080
         MaxPool2d-7          [-1, 256, 28, 28]               0
            Conv2d-8          [-1, 512, 28, 28]       1,180,160
            Conv2d-9          [-1, 512, 28, 28]       2,359,808
        MaxPool2d-10          [-1, 512, 14, 14]               0
           Conv2d-11          [-1, 512, 14, 14]       2,359,808
           Conv2d-12          [-1, 512, 14, 14]       2,359,808
        MaxPool2d-13            [-1, 512, 7, 7]               0
     FlattenLayer-14                [-1

## Data

In [9]:
# downsize channel number, not the size of images
ratio = 8
small_conv_arch = [(1, 1, 64//ratio), (1, 64//ratio, 128//ratio), (2, 128//ratio, 256//ratio), (2, 256//ratio, 512//ratio), (2, 512//ratio, 512//ratio)]

In [10]:
net = vgg(small_conv_arch, fc_features//ratio, fc_hidden_units//ratio)

In [11]:
summary(net, (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 8, 224, 224]              80
         MaxPool2d-2          [-1, 8, 112, 112]               0
            Conv2d-3         [-1, 16, 112, 112]           1,168
         MaxPool2d-4           [-1, 16, 56, 56]               0
            Conv2d-5           [-1, 32, 56, 56]           4,640
            Conv2d-6           [-1, 32, 56, 56]           9,248
         MaxPool2d-7           [-1, 32, 28, 28]               0
            Conv2d-8           [-1, 64, 28, 28]          18,496
            Conv2d-9           [-1, 64, 28, 28]          36,928
        MaxPool2d-10           [-1, 64, 14, 14]               0
           Conv2d-11           [-1, 64, 14, 14]          36,928
           Conv2d-12           [-1, 64, 14, 14]          36,928
        MaxPool2d-13             [-1, 64, 7, 7]               0
     FlattenLayer-14                 [-

In [13]:
batch_size = 16
train_iter, test_iter = ry.load_data_fashion_mnist_resize(batch_size, resize = 224)

In [15]:
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr = lr)
ry.train_mnist_net(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)