# VGGNet using MNIST Dataset

`Author: YUAN Yanzhe`

- This notebook is a reproduction of the [VGG paper](https://arxiv.org/abs/1409.1556).
  - If you want to do parameter fine-tuning (like fc_layer_num, fc_hidden_layer_num: (512*7*7,4096)), setting hyperparameters on the entrance of the model is recommended.
    - e.g. def \_\_init\_\_(param) 
- The code runs on Google Colab, GPU mode
    

**一些细节：**
- VGG的结构：
  - vgg层，根据输入的conv_info(卷积层个数, 输入特征数, 输出特征数)循环累加vgg_block。**增加特征数**
    - vgg_block，根据卷积层个数循环
      - 每个循环的第一次用conv(c_in,c_out,3,1,1)。311是kernel,std,pad的组合，让特征数增加，imagede size不变
      - 接上俩conv(c_out,c_out,3,1,1)不做改变
      - 接上relu增加non-linearity
    - 循环结束加上(2，2)的pool，让image的height和width减半
  - fc层：**减少特征到10**
    - linear-relu-dropout：减少特征数+dropout
    - linear-relu-dropout：特征数不变+dropout
    - output：linear降特征数到10
      
- AlexNet在LeNet的基础上增加了3个卷积层。但AlexNet作者对它们的卷积窗口、输出通道数和构造顺序均做了大量的调整。而VGG提出了可以通过重复使用简单的基础块来构建深度模型的思路。
  
- nn.Sequential 的写法：
  - nn.Sequential(*list)
  - nn.Sequential(nn.,nn.,...
  - nn.Sequential(OrderedDict)
  - net = nn.Sequential()
    - net.addModule(...)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
os.chdir('/content/drive/MyDrive/Colab Notebooks/d2dl_pytorch')

In [8]:
# Import Packages
import torch
from torch import nn as nn
from torch import optim as optim
from torch.utils import data as Data

import torchvision
from torchvision import datasets
from torchvision import transforms

import numpy as np
import pandas as pd 
import time

import d2lzh_pytorch as d2dl

print(torch.__version__)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device on:', device)

1.7.0+cu101
device on: cuda


In [9]:
# Hyperparameters
batch_size = 256
num_epochs = 5
learning_rate = 0.001

# original conv_info
#conv_info = ((1, 1, 64), (1, 64, 128), (2, 128, 256), (2, 256, 512), (2, 512, 512))

conv_info = [(1, 1, 8), (1, 8, 16), (2, 16, 32), (2, 32, 64), (2, 64, 64)]

# Load Data
# non-default argument follows default argument, has to define non-default value first
def load_data_from_mnist(batch_size, resize=None, root=''):
    trans = []
    if resize:
        trans.append(transforms.Resize(resize))
    trans.append(transforms.ToTensor())
    transform = transforms.Compose(trans)

    train_data = torchvision.datasets.MNIST(root=root,train=True,transform=transform,download=False)
    test_data = torchvision.datasets.MNIST(root=root,train=False,transform=transform,download=False)
    train_iterator = Data.DataLoader(train_data,batch_size=batch_size,shuffle=True,num_workers=4)
    test_iterator = Data.DataLoader(test_data,batch_size=batch_size,shuffle=True,num_workers=4)

    return train_iterator, test_iterator

def load_data_fashion_mnist(batch_size, resize=None, root=''):
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)

    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=4)

    return train_iter, test_iter

#train_iterator, test_iterator = load_data_fashion_mnist(batch_size,resize=224)
train_iterator, test_iterator = load_data_from_mnist(batch_size,resize=224)


In [10]:
for X,y in train_iterator:
    print(X.size())
    break

torch.Size([256, 1, 224, 224])


In [13]:
# Define Model
class vggNet(nn.Module):
    def __init__(self, conv_info):
        super(vggNet,self).__init__()
        self.vgg_layer = nn.Sequential()
        for i, (conv_num, c_in, c_out) in enumerate(conv_info):
            self.vgg_layer.add_module('vgg_block_'+str(i), self.vgg_block(conv_num, c_in, c_out))
        self.fc_layer = nn.Sequential(
            # FNN Layer
            nn.Linear(64*7*7,512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Dropout(0.5),
            # Output Layer
            nn.Linear(512,10)
        )
        
    def vgg_block(self, conv_num, c_in, c_out):
        block = []
        for i in range(conv_num):
            if i == 0:
                block.append(nn.Conv2d(c_in,c_out,3,1,1))
            else: 
                block.append(nn.Conv2d(c_out,c_out,3,1,1))
            block.append(nn.ReLU())
        block.append(nn.MaxPool2d(2,2))
        return nn.Sequential(*block)
    
    def forward(self, x):
        y = self.vgg_layer(x)
        y = self.fc_layer(y.view(x.shape[0],-1))  # faltten layer
        return y

net = vggNet(conv_info)
print(net)

loss_func = nn.CrossEntropyLoss()
optimizor = optim.Adam(net.parameters(), lr=learning_rate)

vggNet(
  (vgg_layer): Sequential(
    (vgg_block_0): Sequential(
      (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (vgg_block_1): Sequential(
      (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (vgg_block_2): Sequential(
      (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU()
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (vgg_block_3): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU()
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(

In [14]:
# Train Model
def evaluate_model(net, test_iterator, device):
    net = net.to(device)
    print('testing on:', device)
    with torch.no_grad():
        correct,num_exp = 0.0,0
        for X,y in test_iterator:
            if isinstance(net, nn.Module):
                net.eval()  # eval mode will shut off dropout function
                correct += (net(X.to(device)).argmax(1)==y.to(device)).float().sum().cpu().item()
                net.train()
            else: 
                print('is this your self-defined nn module?? we are not considering GPU if so')
                if('is_training' in net.__code__.co_varnames): 
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            num_exp += y.size(0)
     
    return correct/num_exp*100

def train_model(num_epochs, train_iterator, test_iterator, loss_func, optimizor, net, device):
    net = net.to(device)
    print('training on:', device)
    for epoch in range(num_epochs):
        total_loss,total_batch,total_acc,total_num,start_time = 0.0,0,0.0,0,time.time()
        for X, y in train_iterator:
            X = X.to(device)
            y = y.to(device)

            output = net(X)
            loss = loss_func(output,y)
            optimizor.zero_grad()
            loss.backward()
            optimizor.step()
            
            total_loss += loss.cpu().item()
            total_batch += 1
            total_acc += (output.argmax(1)==y).sum().cpu().item()
            total_num += y.size(0)
        
        test_acc = evaluate_model(net, test_iterator, device)
        print('Epoch: {}, Average loss: {:.4f}, Average accuracy: {:.2f}%, Test Accuracy: {:.2f}%, time: {:.1f}sec' \
              .format(epoch+1, total_loss/total_batch, total_acc/total_num*100, test_acc, time.time()-start_time))

train_model(num_epochs,train_iterator,test_iterator,loss_func,optimizor,net,device)
        
# Prediction

training on: cuda
testing on: cuda
Epoch: 1, Average loss: 0.5010, Average accuracy: 82.78%, Test Accuracy: 98.31%, time: 48.8sec
testing on: cuda
Epoch: 2, Average loss: 0.0843, Average accuracy: 97.53%, Test Accuracy: 98.73%, time: 49.2sec
testing on: cuda
Epoch: 3, Average loss: 0.0585, Average accuracy: 98.31%, Test Accuracy: 99.05%, time: 49.6sec
testing on: cuda
Epoch: 4, Average loss: 0.0468, Average accuracy: 98.67%, Test Accuracy: 99.07%, time: 50.2sec
testing on: cuda
Epoch: 5, Average loss: 0.0384, Average accuracy: 98.88%, Test Accuracy: 99.08%, time: 50.2sec
