# Fine Tuning

In [1]:
%matplotlib inline
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
import numpy as np
import torchvision
import torchvision.datasets as datasets
from torchvision import transforms
import torch.optim as optim
import time

## Hot Dog Recognition Dataset


### Download the Data Set

In [2]:
import requests, zipfile, io
r = requests.get('https://apache-mxnet.s3-accelerate.amazonaws.com/gluon/dataset/hotdog.zip')
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("./data")
# list dataset folder as a tree
# source: https://stackoverflow.com/questions/3455625/linux-command-to-print-directory-structure-in-the-form-of-a-tree
! ls -R ./data/hotdog/ | grep ":$" | sed -e 's/:$//' -e 's/[^-][^\/]*\//--/g' -e 's/^/   /' -e 's/-/|/'

   |-----
   |-----test
   |-------hotdog
   |-------not-hotdog
   |-----train
   |-------hotdog
   |-------not-hotdog


### Read the Dataset and Image Augmentation

In [3]:
def load_dataset(type = 'train'):
    normalize = transforms.Normalize(
        [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    if type == 'train':
        data_path = './data/hotdog/train/'
        # We specify the mean and variance of the three RGB channels to normalize the image channel.
        transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize])
    elif type == 'test':
        data_path = './data/hotdog/test/'
        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize])
    else:
        raise Exception('Undefined data type')

    train_dataset = torchvision.datasets.ImageFolder(
        root=data_path,
        transform=transform
    )
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=64,
        num_workers=0,
        shuffle=True
    )
    return train_loader

train_loader = load_dataset('train')
test_loader = load_dataset('test')


## Model 

### Train with pre-trained Models

*Note that in PyTorch model resnet18, we cannot modify the number of output classes directly, since it will fail to load the fc layer weights and bias from the checkpoint.*

Nevertheless, we can remove the last layer (the incorrect fc layer) of the pretrained model and then add a suitable fc layer and initialize it.

ref: [toch forum](https://discuss.pytorch.org/t/how-to-replace-last-layer-in-sequential/14422)



In [4]:
import torchvision.models as models

def fc_weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
        # m.bias.data.normal_(0.0, 0.01)

class flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)

# pre-trained net with unwanted layer structures.
pretrained_net = models.resnet18(pretrained=True)

# remove last layer (fc layer) from new_net
removed = list(pretrained_net.children())[:-1]
# Now new_new come with desired fc layer but still does not come with 
# pre-trained weights and bias
pretrained_net= torch.nn.Sequential(*removed)

pretrained_net = torch.nn.Sequential(pretrained_net, 
                                     flatten(),
                                     torch.nn.Linear(512,2))

# here we only init the last fc layer
pretrained_net = pretrained_net.apply(fc_weights_init)
# print(pretrained_net)

In [5]:
def evaluate_accuracy(data_iter, net):
    """Evaluate accuracy of a model on the given data set."""
    acc_sum,n = 0,0
    for (imgs, labels) in data_iter:
        # send data to the GPU if cuda is availabel
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
        net.eval()
        with torch.no_grad():
            labels = labels.long()
            acc_sum += torch.sum((torch.argmax(net(imgs), dim=1) == labels)).float()
            n += labels.shape[0]
    return acc_sum.item()/n

if torch.cuda.is_available():
    print('Training using GPU.')
    pretrained_net.cuda()
else:
    print('Training using CPU.')

lr, num_epochs, batch_size, weight_decay = 0.01, 5, 128, 0.001
optimizer = torch.optim.SGD(pretrained_net.parameters(), lr=lr, weight_decay=weight_decay)

criterion = nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    pretrained_net.train() # Switch to training mode
    n, start = 0, time.time()
    train_l_sum = torch.tensor([0.0], dtype=torch.float32)
    train_acc_sum = torch.tensor([0.0], dtype=torch.float32)
    train_iter = iter(train_loader)
    # for _, (X, y) in tqdm(enumerate(train_iter)):
    for X, y in train_iter:
        optimizer.zero_grad()
        if torch.cuda.is_available():
            X = X.cuda()
            y = y.cuda()
            train_l_sum = train_l_sum.cuda()
            train_acc_sum = train_acc_sum.cuda()
        y_hat = pretrained_net(X)
        loss = criterion(y_hat, y)
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            y = y.long()
            train_l_sum += loss.float()
            train_acc_sum += (torch.sum((torch.argmax(y_hat, dim=1) == y))).float()
            n += y.shape[0]

    test_acc = evaluate_accuracy(iter(test_loader), pretrained_net) 
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'\
        % (epoch + 1, train_l_sum/n, train_acc_sum/n, test_acc, time.time() - start))

Training using GPU.
epoch 1, loss 0.0066, train acc 0.821, test acc 0.886, time 13.3 sec
epoch 2, loss 0.0039, train acc 0.905, test acc 0.904, time 13.1 sec
epoch 3, loss 0.0031, train acc 0.924, test acc 0.927, time 13.3 sec
epoch 4, loss 0.0025, train acc 0.939, test acc 0.936, time 13.2 sec
epoch 5, loss 0.0024, train acc 0.945, test acc 0.944, time 13.2 sec


### Train with Non-trained Models

In [8]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
    elif classname.find('Linear') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.01)

pretrained_net = models.resnet18(num_classes=2, pretrained=False)
pretrained_net = pretrained_net.apply(weights_init)

if torch.cuda.is_available():
    print('Training using GPU.')
    pretrained_net.cuda()
else:
    print('Training using CPU.')

lr, num_epochs, batch_size, weight_decay = 0.01, 5, 128, 0.001
optimizer = torch.optim.SGD(pretrained_net.parameters(), lr=lr, weight_decay=weight_decay)

criterion = nn.CrossEntropyLoss()

for epoch in range(num_epochs):
    pretrained_net.train() # Switch to training mode
    n, start = 0, time.time()
    train_l_sum = torch.tensor([0.0], dtype=torch.float32)
    train_acc_sum = torch.tensor([0.0], dtype=torch.float32)
    train_iter = iter(train_loader)
    # for _, (X, y) in tqdm(enumerate(train_iter)):
    for X, y in train_iter:
        optimizer.zero_grad()
        if torch.cuda.is_available():
            X = X.cuda()
            y = y.cuda()
            train_l_sum = train_l_sum.cuda()
            train_acc_sum = train_acc_sum.cuda()
        y_hat = pretrained_net(X)
        loss = criterion(y_hat, y)
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            y = y.long()
            train_l_sum += loss.float()
            train_acc_sum += (torch.sum((torch.argmax(y_hat, dim=1) == y))).float()
            n += y.shape[0]

    test_acc = evaluate_accuracy(iter(test_loader), pretrained_net) 
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'\
        % (epoch + 1, train_l_sum/n, train_acc_sum/n, test_acc, time.time() - start))

Training using GPU.
epoch 1, loss 0.0076, train acc 0.778, test acc 0.560, time 13.2 sec
epoch 2, loss 0.0066, train acc 0.812, test acc 0.789, time 13.2 sec
epoch 3, loss 0.0062, train acc 0.829, test acc 0.735, time 13.2 sec
epoch 4, loss 0.0058, train acc 0.845, test acc 0.845, time 13.2 sec
epoch 5, loss 0.0058, train acc 0.839, test acc 0.764, time 13.2 sec
