## VGG 16 layer Implementation
based on VERY DEEP CONVOLUTIONAL NETWORKS FOR LARGE-SCALE IMAGE RECOGNITION paper

module 짜고 training -> test 짜기

1. preprocessing 
2. convolutional layers
3. FC layer
4. soft-max layer

In [4]:
!pip install scipy

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.8/bin/python3 -m pip install --upgrade pip' command.[0m


In [5]:
# import all needed modules 
import torch, torch.nn as nn, torch.nn.functional as F
import torchvision.datasets as dsets
import torch.optim as optim
import torch.utils as utils
import random
import torchvision.transforms as transforms
import numpy as np
import PIL
from torch.utils.data.sampler import SubsetRandomSampler

### download ImageNet dataset

In [6]:
batch_size = 256
validation_ratio = 0.1
shuffle_dataset = True

In [7]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
random.seed(111) # random seed
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [9]:
transform_train = transforms.Compose([
#     transforms.Resize((224,224)),
    transforms.RandomCrop((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225])])

# transform_valid = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize(
#         mean = [0.485, 0.456, 0.406],
#         std = [0.229, 0.224, 0.225])])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225])])

In [11]:
# Imagenet Dataset
path = './data' # data folder 
trainset = dsets.ImageNet(root=path, train=True,
                               transform=transform_train, download=False)
# validset = dsets.ImageNet(root=path, train=True,
#                                     transform=transform_valid, download=False)
testset = dsets.ImageNet(root=path, train=False,
                              transform=transform_test, download=False)

num_train = len(trainset)
indices = list(range(num_train))
split = int(np.floor(validation_ratio * num_train))

if shuffle_dataset:
    np.random.seed()



RuntimeError: The archive ILSVRC2012_devkit_t12.tar.gz is not present in the root directory or is corrupted. You need to download it externally and place it in ./data.

In [None]:
# preprocessing : mean subtraction from image pixel
X -= np.mean(X)

### parameters

In [2]:
# weight initialization for configuration A
def init_weights(net):
    if type(net) == nn.Linear:
        nn.init.normal_(net.weight, mean=0.0, std = 0.1)
        net.bias.data.fill_(0.0)
    if type(net) == nn.Conv2d:
        nn.init.normal_(net.weight, mean=0.0, std=0.1)
        net.bias.data.fill_(0.0)

### ! need preprocessing stage

In [8]:
# VGG16 configuration A
class VGG16_A(nn.Module):
    def __init__(self, dropout_ratio=0.5):
        super(VGG16_A, self).__init__()
        # input is 32x32 RGB image 3@32x32
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = True
        self.dropout_ratio = dropout_ratio
        
        # convolutional layers (with ReLU and MaxPooling)
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size = 3, stride=1, padding=1), # 64@32x32
            self.relu,
            self.maxpool) # 64@16x16
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size = 3, stride=1, padding=1), # 128@16x16
            self.relu,
            self.maxpool) #128@8x8
        
        self.conv3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size = 3, stride=1, padding=1), # 256@8x8
            self.relu,
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), # 256@8x8
            self.relu,
            self.maxpool) # 256@4x4
        
        self.conv4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), # 512@4x4
            self.relu,
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), # 512@4x4
            self.relu,
            self.maxpool) #512@2x2
        
        self.conv5 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), # 512@2x2
            self.relu,
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), # 512@2x2
            self.relu,
            self.maxpool) # 512@1x1
        
        # Fully Connected Layers
        self.fc1 = nn.Linear(512,4096)
        self.fc2 = nn.Linear(4096,4096)
        self.fc3 = nn.Linear(4096,1000)
        
        # Dropout Module
        self.drop_layer = nn.Dropout(p=self.dropout_ratio)
        
        # fully connected module
        self.fc_module_train = nn.Sequential(
            self.fc1,
            self.drop_layer,
            self.relu,
            self.fc2,
            self.drop_layer,
            self.relu,
            self.fc3
        )
        
        self.fc_module_eval = nn.Sequential(
            self.fc1,
            self.relu,
            self.fc2,
            self.relu,
            self.fc3
        )  
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.conv5(out)
        
        if self.dropout:
            out = self.fc_module_train(out)
        else:
            out = self.fc_module_eval(out)
        
        # soft-max layer
        out = F.softmax(out, dim=1)
        return out
    
    def train():
        self.dropout = True
        
    def evaluate():
        self.dropout = False

In [7]:
# VGG16 configuration D
class VGG16_D(nn.Module):
    def __init__(self):
        super(VGG16_D, self).__init__()
        # input is 32x32 RGB image 3@32x32
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # convolutional layers (with ReLU and MaxPooling)
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size = 3, stride=1, padding=1), # 64@32x32
            self.relu,
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), # 64@32x32
            self.relu,
            self.maxpool) # 64@16x16
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size = 3, stride=1, padding=1), # 128@16x16
            self.relu,
            nn.Conv2d(128,128, kernel_size=3, stride=1, padding=1), # 128@16x16
            self.relu,
            self.maxpool) #128@8x8
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size = 3, stride=1, padding=1), # 256@8x8
            self.relu,
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), # 256@8x8
            self.relu,
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), # 256@8x8
            self.relu,
            self.maxpool) # 256@4x4
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), # 512@4x4
            self.relu,
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), # 512@4x4
            self.relu,
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), # 512@4x4
            self.relu,
            self.maxpool) #512@2x2
        
        self.layer5 = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), # 512@2x2
            self.relu,
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), # 512@2x2
            self.relu,
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1), # 512@2x2
            self.relu,
            self.maxpool) # 512@1x1
        
        # Fully Connected Layers
        self.fc1 = nn.Linear(512,4096)
        self.fc2 = nn.Linear(4096,4096)
        self.fc3 = nn.Linear(4096,1000)
        
        # Dropout Module
        self.drop_layer = nn.Dropout(p=self.dropout_ratio)
        
        # fully connected module
        self.fc_module_train = nn.Sequential(
            self.fc1,
            self.drop_layer,
            self.relu,
            self.fc2,
            self.drop_layer,
            self.relu,
            self.fc3
        )
        
        self.fc_module_eval = nn.Sequential(
            self.fc1,
            self.relu,
            self.fc2,
            self.relu,
            self.fc3
        )  
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.conv5(out)
        
        if self.dropout:
            out = self.fc_module_train(out)
        else:
            out = self.fc_module_eval(out)
        
        # soft-max layer
        out = F.softmax(out, dim=1)
        return out
    
    def train():
        self.dropout = True
        
    def evaluate():
        self.dropout = False

### set device to cuda and set random seed for reproducibility

use GPU

In [16]:
model = VGG16_A()
model.apply(init_weights)
if torch.cuda.device_count() > 1:
    model = nn.DataParallel(model)
model.to(device)

VGG16_A(
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=

### Define a Loss Function and Optimizer

Optimizer : 
1. multinomial logistic regression 
2. mini-batch gradient descent 
3. batch size = 256
4. momentum = 0.9 
5. weight decay (L2 penalty multiplier set to 5 * 10^-4) - first two fully-connected layer
6. dropout = 0.5 - first two fully-connected layer
7. learning rate : initial 0.01 decreased by a factor of 10 when the validation set accuracy stopped improving

In [None]:
# parameters
# batch_size = 256
momentum = 0.9
init_lr = 1e-2 # initial learning rate
epochs = 74 # total epoch number
dropout_ratio = 0.5

# Training

In [None]:
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=init_lr, momentum=momentum, weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, verbose=True)

In [None]:
for epoch in range(epochs):
    # train
    
    # validation
    
    # scheduler.step(accuracy)
    

# Test