# fine_tuning.ipynb

### This code is also uses pre-trained VGG-16 network to identify ant and bee.
### This time, all three of the last fully-connected layers are subject to retraining.

In [1]:
import numpy as np
import random

import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import models

from tqdm import tqdm

In [2]:
# init random seeds
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

In [3]:
#
# preparation for the training 
#
from utils.dataloader_image_classification import ImageTransform, make_datapath_list, HymenopteraDataset

# Create file path for ant and bee (hymenoptera) images
train_list = make_datapath_list(phase="train")
val_list = make_datapath_list(phase="val")

# create Dataset
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
train_dataset = HymenopteraDataset(
    file_list=train_list, transform=ImageTransform(size, mean, std), phase='train')

val_dataset = HymenopteraDataset(
    file_list=val_list, transform=ImageTransform(size, mean, std), phase='val')


# create DataLoader
batch_size = 16 #32 make it smaller if GPU has not enough memory

train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True)

val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False)

# Summarize data path in a dictionary object
dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

./data/hymenoptera_data/train/**/*.jpg
./data/hymenoptera_data/val/**/*.jpg


In [4]:
#
# Load pre-trained VGG-16 network and modify the network architecture
#

# Load and prepare a VGG-16 model instance
use_pretrained = True  # use pre-trained network
net = models.vgg16(pretrained=use_pretrained)

# Replaced the output unit of the last output layer of the VGG16 with ants and bees
net.classifier[6] = nn.Linear(in_features=4096, out_features=2)

# set training mode
net.train()

print('network setting done： load pretrained model and set it as the training mode')

network setting done： load pretrained model and set it as the training mode


In [5]:
# definition of the loss function (cross-entropy loss)
criterion = nn.CrossEntropyLoss()

In [6]:
# Store the parameters to be trained by fine tuning in the variable params_to_update, from 1 to 3

params_to_update_1 = []
params_to_update_2 = []
params_to_update_3 = []

# Specifies the names of the parameters of the layer to be trained
# Only specified weights will be updated 
#
# all conv kernels (filters)
update_param_names_1 = ["features"]
# weights and bias in the full connection layers (conv5_3 - FC1 - FC2)
update_param_names_2 = ["classifier.0.weight",
                        "classifier.0.bias", "classifier.3.weight", "classifier.3.bias"]
# weights and bias in the full connection layer (FC2-output)
update_param_names_3 = ["classifier.6.weight", "classifier.6.bias"]

# Store parameters in parms_to_update lists
for name, param in net.named_parameters():
    if update_param_names_1[0] in name:
        param.requires_grad = True
        params_to_update_1.append(param)
        print("stored in params_to_update_1:", name)

    elif name in update_param_names_2:
        param.requires_grad = True
        params_to_update_2.append(param)
        print("stored in params_to_update_2:", name)

    elif name in update_param_names_3:
        param.requires_grad = True
        params_to_update_3.append(param)
        print("stored in params_to_update_3:", name)

    else:
        param.requires_grad = False
        print("No calculation of gradients, no updates", name)

stored in params_to_update_1: features.0.weight
stored in params_to_update_1: features.0.bias
stored in params_to_update_1: features.2.weight
stored in params_to_update_1: features.2.bias
stored in params_to_update_1: features.5.weight
stored in params_to_update_1: features.5.bias
stored in params_to_update_1: features.7.weight
stored in params_to_update_1: features.7.bias
stored in params_to_update_1: features.10.weight
stored in params_to_update_1: features.10.bias
stored in params_to_update_1: features.12.weight
stored in params_to_update_1: features.12.bias
stored in params_to_update_1: features.14.weight
stored in params_to_update_1: features.14.bias
stored in params_to_update_1: features.17.weight
stored in params_to_update_1: features.17.bias
stored in params_to_update_1: features.19.weight
stored in params_to_update_1: features.19.bias
stored in params_to_update_1: features.21.weight
stored in params_to_update_1: features.21.bias
stored in params_to_update_1: features.24.weight

In [7]:
# Definition of optimization method
# SDG: stochastic gradient descent
optimizer = optim.SGD([
    {'params': params_to_update_1, 'lr': 1e-4},
    {'params': params_to_update_2, 'lr': 5e-4},
    {'params': params_to_update_3, 'lr': 1e-3}
], momentum=0.9)

In [8]:
# define a function to train the model

def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):

    # initial setting
    # confirm if GPU is available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Use device", device)

    # copy network into GPU if available
    net.to(device)

    # cudnn auto-tuner will look for the optimal set of algorithms for that particular configuration
    # (usually it makes the training faster with fixed input size)
    torch.backends.cudnn.benchmark = True

    # [epoch] loop
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-------------')

        # training or validatino loop for each epoch
        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()  # set the model training mode
            else:
                net.eval()   # set the model validation mode

            epoch_loss = 0.0    # sum of epoch loss
            epoch_corrects = 0  # number of correct answer per epoch

            # to confirm the performance before the training
            # skip following training process at epoch==0
            if (epoch == 0) and (phase == 'train'):
                continue

            # [per iteratation process]
            # extraction of mini-batch from data loader 
            for inputs, labels in tqdm(dataloaders_dict[phase]):

                # send data and label to GPU if available
                inputs = inputs.to(device)
                labels = labels.to(device)

                # initialize an optimizer
                optimizer.zero_grad()

                 # calculation of forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = net(inputs)
                    loss = criterion(outputs, labels)  # calc loss
                    _, preds = torch.max(outputs, 1)   # predict label

                    # back-propagation if in the training
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    # calc loss
                    # update the sum of the loss
                    epoch_loss += loss.item() * inputs.size(0)  
                    # update the number accurate prediction
                    epoch_corrects += torch.sum(preds == labels.data)

            # display loass and accuracy in each epoch
            epoch_loss = epoch_loss / len(dataloaders_dict[phase].dataset)
            epoch_acc = epoch_corrects.double(
            ) / len(dataloaders_dict[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

In [9]:
# execution
num_epochs=10
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

Use device cuda:0


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1/10
-------------


100%|██████████| 10/10 [00:05<00:00,  1.79it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

val Loss: 0.7704 Acc: 0.4444
Epoch 2/10
-------------


100%|██████████| 16/16 [00:09<00:00,  1.73it/s]
 10%|█         | 1/10 [00:00<00:01,  6.04it/s]

train Loss: 0.3606 Acc: 0.7984


100%|██████████| 10/10 [00:01<00:00,  6.14it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

val Loss: 0.1407 Acc: 0.9477
Epoch 3/10
-------------


100%|██████████| 16/16 [00:04<00:00,  3.27it/s]
 10%|█         | 1/10 [00:00<00:01,  6.64it/s]

train Loss: 0.1274 Acc: 0.9342


100%|██████████| 10/10 [00:01<00:00,  6.08it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

val Loss: 0.1646 Acc: 0.9477
Epoch 4/10
-------------


100%|██████████| 16/16 [00:04<00:00,  3.28it/s]
 10%|█         | 1/10 [00:00<00:01,  6.68it/s]

train Loss: 0.0511 Acc: 0.9835


100%|██████████| 10/10 [00:01<00:00,  6.11it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

val Loss: 0.1235 Acc: 0.9412
Epoch 5/10
-------------


100%|██████████| 16/16 [00:04<00:00,  3.27it/s]
 10%|█         | 1/10 [00:00<00:01,  6.73it/s]

train Loss: 0.0676 Acc: 0.9712


100%|██████████| 10/10 [00:01<00:00,  6.06it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

val Loss: 0.1114 Acc: 0.9346
Epoch 6/10
-------------


100%|██████████| 16/16 [00:04<00:00,  3.27it/s]
 10%|█         | 1/10 [00:00<00:01,  6.39it/s]

train Loss: 0.0461 Acc: 0.9877


100%|██████████| 10/10 [00:01<00:00,  6.10it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

val Loss: 0.1215 Acc: 0.9281
Epoch 7/10
-------------


100%|██████████| 16/16 [00:04<00:00,  3.26it/s]
 10%|█         | 1/10 [00:00<00:01,  6.82it/s]

train Loss: 0.0223 Acc: 0.9918


100%|██████████| 10/10 [00:01<00:00,  6.11it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

val Loss: 0.1294 Acc: 0.9281
Epoch 8/10
-------------


100%|██████████| 16/16 [00:04<00:00,  3.26it/s]
 10%|█         | 1/10 [00:00<00:01,  6.31it/s]

train Loss: 0.0194 Acc: 0.9918


100%|██████████| 10/10 [00:01<00:00,  6.15it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

val Loss: 0.1355 Acc: 0.9346
Epoch 9/10
-------------


100%|██████████| 16/16 [00:04<00:00,  3.27it/s]
 10%|█         | 1/10 [00:00<00:01,  6.77it/s]

train Loss: 0.0292 Acc: 0.9877


100%|██████████| 10/10 [00:01<00:00,  6.15it/s]
  0%|          | 0/16 [00:00<?, ?it/s]

val Loss: 0.1433 Acc: 0.9281
Epoch 10/10
-------------


100%|██████████| 16/16 [00:04<00:00,  3.27it/s]
 10%|█         | 1/10 [00:00<00:01,  6.87it/s]

train Loss: 0.0363 Acc: 0.9877


100%|██████████| 10/10 [00:01<00:00,  6.04it/s]

val Loss: 0.1437 Acc: 0.9412





In [10]:
#GPU memory free
torch.cuda.empty_cache()