In [1]:
import time 
import sys
sys.path.insert(0,'../')

import torch
import torch.nn as nn 
import torch.optim as optim 

* VGG block (basic building block) 
* Simplified version


In [2]:
def vgg_block(num_convs, in_channels, out_channels):
    layers=[]
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels
    
    layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
    
    blk = nn.Sequential(*layers)
    
    return blk

* the VGG Network can be partitioned into two parts:
    * (1st) consisting mostly of ```convolutional``` and ```pooling``` layers 
    * (2nd) consisting of ```fully-connected``` linear layers

* The convolutional portion of the net connects several ```vgg_block``` modules in succession. 

<br/> 

* the variable ```conv_arch``` consists of a list of tuples (one per block), where each contains two values:
    * (1) the number of convolutional layers
    * (2) the number of output channels

In [3]:
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))

* Implement ```VGG-11```
* This is a simple matter of executing a for loop over ```conv_arch```

In [4]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.view(x.shape[0], -1)
    
def vgg(conv_arch):
    # The convulational layer part
    conv_layers=[]
    in_channels=1
    
    for (num_convs, out_channels) in conv_arch:
        conv_layers.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels = out_channels
    
    net=nn.Sequential(
                      *conv_layers,
                      # The fully connected layer part
                      Flatten(),
                      nn.Linear(in_features=512*7*7, out_features=4096),
                      nn.ReLU(),
                      nn.Dropout(0.5),
                      nn.Linear(4096, 4096),
                      nn.ReLU(),
                      nn.Dropout(0.5),
                      nn.Linear(4096, 10)
                     )
    return net

In [5]:
net = vgg(conv_arch)

* In order to Check the model 
* Construct a single-channel data example with a height and width of 224 to observe the output shape of each layer

In [6]:
X = torch.randn(size=(1,1,224,224), dtype=torch.float32)

for blk in net:
    X = blk(X)
    print(blk.__class__.__name__,'output shape:\t',X.shape)

Sequential output shape:	 torch.Size([1, 64, 112, 112])
Sequential output shape:	 torch.Size([1, 128, 56, 56])
Sequential output shape:	 torch.Size([1, 256, 28, 28])
Sequential output shape:	 torch.Size([1, 512, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
Flatten output shape:	 torch.Size([1, 25088])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 10])


***

* Reading Data (Fashion-MNIST)
* Preprocess: Fashion-MNIST has 28x28 pixels -> upsample them to 244x244

In [7]:
import sys 
import os 

import torchvision 
from torchvision import transforms 
from torch.utils.data import DataLoader 

def load_data_fashion_mnist(batch_size, resize=None, root=os.path.join(os.getcwd(), 'datasets', 'fashion-mnist')):
    """Download the Fashion-MNIST dataset and then load into memory."""
    root = os.path.expanduser(root)
    transformer = []
    if resize:
        transformer += [transforms.Resize(resize)]
    transformer += [transforms.ToTensor()]
    transformer = transforms.Compose(transformer)

    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, transform=transformer, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, transform=transformer, download=True)
    num_workers = 0 if sys.platform.startswith('win32') else 4

    train_iter = DataLoader(mnist_train, batch_size, shuffle=True, num_workers=num_workers)
    test_iter = DataLoader(mnist_test, batch_size, shuffle=False, num_workers=num_workers)
    return train_iter, test_iter

In [8]:
batch_size = 128
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)

* Model Training 

In [9]:
def try_gpu():
    """If GPU is available, return torch.device as cuda:0; else return torch.device as cpu."""
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    return device

In [10]:
def evaluate_accuracy(data_iter, net, device=torch.device('cpu')):
    """Evaluate accuracy of a model on the given data set."""
    net.eval()  # Switch to evaluation mode for Dropout, BatchNorm etc layers.
    acc_sum, n = torch.tensor([0], dtype=torch.float32, device=device), 0
    for X, y in data_iter:
        # Copy the data to device.
        X, y = X.to(device), y.to(device)
        with torch.no_grad():
            y = y.long()
            acc_sum += torch.sum((torch.argmax(net(X), dim=1) == y))
            n += y.shape[0]
    return acc_sum.item()/n

In [11]:
def train_ch5(net, train_iter, test_iter, criterion, num_epochs, batch_size, device, lr=None):
    """Train and evaluate a model with CPU or GPU."""
    print('training on', device)
    net.to(device)
    optimizer = optim.SGD(net.parameters(), lr=lr)
    for epoch in range(num_epochs):
        net.train() # Switch to training mode
        n, start = 0, time.time()
        train_l_sum = torch.tensor([0.0], dtype=torch.float32, device=device)
        train_acc_sum = torch.tensor([0.0], dtype=torch.float32, device=device)
        for X, y in train_iter:
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device) 
            y_hat = net(X)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()
            with torch.no_grad():
                y = y.long()
                train_l_sum += loss.float()
                train_acc_sum += (torch.sum((torch.argmax(y_hat, dim=1) == y))).float()
                n += y.shape[0]

        test_acc = evaluate_accuracy(test_iter, net, device) 
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'\
            % (epoch + 1, train_l_sum/n, train_acc_sum/n, test_acc, time.time() - start))

In [12]:
lr, num_epochs, batch_size, device = 0.05, 5, 64, try_gpu()

def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.xavier_uniform_(m.weight)

In [13]:
net.apply(init_weights)
net = net.to(device)

In [14]:
train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)
criterion = nn.CrossEntropyLoss()

train_ch5(net, train_iter, test_iter, criterion, num_epochs, batch_size, device, lr)

training on cuda:0
epoch 1, loss 0.0112, train acc 0.736, test acc 0.856, time 387.9 sec
epoch 2, loss 0.0052, train acc 0.879, test acc 0.836, time 284.0 sec
epoch 3, loss 0.0042, train acc 0.901, test acc 0.906, time 284.4 sec
epoch 4, loss 0.0037, train acc 0.914, test acc 0.908, time 284.8 sec
epoch 5, loss 0.0032, train acc 0.925, test acc 0.914, time 285.1 sec
