In [1]:
import os
import numpy as np
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch import Tensor
from typing import Any, Callable, List, Optional, Tuple
from easydict import EasyDict

## Model
Make Model(VGG-16, VGG-19) by PyTorch

In [7]:
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, bias=False, stride = 1, padding="same", **kwargs),
            nn.ReLU()
        )
    
    def forward(self, x):
        return self.conv(x)

In [30]:
class VGGNet(nn.Module):
    def __init__(self, nblocks, num_classes=10, init_weights=True):
        # nblocks = number of conv3, 4, 5 layer
        # if nblocks = 3 -> VGGNet16, if nblocks=4 -> VGGNet19
        
        super(VGGNet, self).__init__()

        # conv1
        self.conv1 = nn.Sequential(
            BasicConv2d(3, 64),
            BasicConv2d(64, 64),
            nn.MaxPool2d(kernel_size = 2, stride = 2)
        )

        # conv2
        self.conv2 = nn.Sequential(
            BasicConv2d(64, 128),
            BasicConv2d(128, 128),
            nn.MaxPool2d(kernel_size = 2, stride = 2)
        )

        # conv3
        layers = []
        layers.append(BasicConv2d(128, 256))
        for i in range(nblocks-1):
            layers.append(BasicConv2d(256, 256))
        layers.append(nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.conv3 = nn.Sequential(*layers)

        # conv4
        layers = []
        layers.append(BasicConv2d(256, 512))
        for i in range(nblocks-1):
            layers.append(BasicConv2d(512, 512))
        layers.append(nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.conv4 = nn.Sequential(*layers)

        # conv5, number of channels don't change
        layers = []
        for i in range(nblocks):
            layers.append(BasicConv2d(512, 512))
        layers.append(nn.MaxPool2d(kernel_size = 2, stride = 2))
        
        self.conv5 = nn.Sequential(*layers)

        # classifier
        self.fc = nn.Sequential(
            # since 224x224 -> 7x7, 512x7x7 must be input
            # paper does not use average pooling, so I didn't use adaptive average pooling
            nn.Linear(512*7*7, 4096),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.Linear(4096, num_classes)
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = nn.Flatten()(x)
        x = self.fc(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # according to paper, random initialization = (mean=0, variance=10^-2)
                nn.init.normal_(m.weight, mean=0.0, std=0.1)
                if m.bias is not None:
                    # if bias exist, bias = 0
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

In [31]:
def VGGNet16():
    return VGGNet(3)

def VGGNet19():
    return VGGNet(4)

In [32]:
# test
model = VGGNet16()
x = torch.randn(3, 3, 224, 224)
print(model(x).size())

torch.Size([3, 10])


In [33]:
from torchsummary import summary
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
summary(model, (3,224,224), device = device.type)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,728
              ReLU-2         [-1, 64, 224, 224]               0
       BasicConv2d-3         [-1, 64, 224, 224]               0
            Conv2d-4         [-1, 64, 224, 224]          36,864
              ReLU-5         [-1, 64, 224, 224]               0
       BasicConv2d-6         [-1, 64, 224, 224]               0
         MaxPool2d-7         [-1, 64, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]          73,728
              ReLU-9        [-1, 128, 112, 112]               0
      BasicConv2d-10        [-1, 128, 112, 112]               0
           Conv2d-11        [-1, 128, 112, 112]         147,456
             ReLU-12        [-1, 128, 112, 112]               0
      BasicConv2d-13        [-1, 128, 112, 112]               0
        MaxPool2d-14          [-1, 128,

## Train
Train Model by CIFAR-10

In [34]:
def load_dataset():
    # preprocess
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        transforms.Resize((224, 224))
    ])

    # load data
    train = datasets.CIFAR10(root="./data", train=True, transform=transform, download=True) 
    test = datasets.CIFAR10(root="./data", train=False, transform=transform, download=True)
    train_loader = DataLoader(train, batch_size=args.batch_size, shuffle = True)
    test_loader = DataLoader(test, batch_size=args.batch_size, shuffle=True)
    return train_loader, test_loader

In [35]:
# hyperparameter
args = EasyDict()
args.batch_size = 64
args.learning_rate = 0.01
args.n_epochs = 1

# functions
criterion = nn.CrossEntropyLoss(reduction = 'sum').to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

In [38]:
import time

# load dataset
train_loader, test_loader = load_dataset()

loss_hist = []
accuracy_hist = []

start_time = time.time()

# train
for epoch in range(args.n_epochs):
    model.train()
    train_loss = 0
    correct, count = 0, 0
    for batch_idx, (images, labels) in enumerate(train_loader, start=1):
        print(f"\rcurrent batch: {batch_idx} \t Total batch: {len(train_loader)}", end="")
        images, labels = images.to(device), labels.to(device)
        output = model(images)
        optimizer.zero_grad()
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, preds = torch.max(output, 1) # torch max output is (max, max_index)
        count += labels.size(0)
        correct += torch.sum(preds == labels)
        
    loss_hist.append(train_loss/count)
    accuracy_hist.append(correct/count)
    print(f"\n[*] Epoch: {epoch} \tTrain accuracy: {correct/count} \tTrain Loss: {train_loss/count}")

end_time = time.time()

print(f"Training time : {end_time - start_time}")

Files already downloaded and verified
Files already downloaded and verified
current batch: 782 	 Total batch: 782[*] Epoch: 0 	Train accuracy: 0.09865999966859818 	Train Loss: 2.3035429692077636
Training time : 730.4574925899506


In [43]:
# test
model.eval()
correct, count = 0, 0
test_loss = 0

with torch.no_grad():
    for batch_idx, (images, labels) in enumerate(test_loader, start=1):
        print(f"\rcurrent batch: {batch_idx} \t Total batch: {len(test_loader)}", end="")
        images, labels = images.to(device), labels.to(device)
        output = model(images)
        loss = criterion(output, labels)
        test_loss += loss.item()
        _, preds = torch.max(output, 1)
        count += labels.size(0)
        correct += torch.sum(preds == labels)

print(f"\nTest accuracy: {correct/count} \tTest Loss: {test_loss/count}")

current batch: 157 	 Total batch: 157
Test accuracy: 0.09999999403953552 	Test Loss: 2.302982970046997
