# ファイル準備

In [None]:
import os
import urllib.request
import zipfile

In [None]:
data_dir = './data'
if not os.path.exists(data_dir):
    os.mkdir(data_dir)

In [None]:
url = "https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json"
save_path = os.path.join(data_dir, "imagenet_class_index.json")

if not os.path.exists(save_path):
    urllib.request.urlretrieve(url, save_path)

In [None]:
url = "https://download.pytorch.org/tutorial/hymenoptera_data.zip"
save_path = os.path.join(data_dir, 'hymenoptera_data.zip')

if not os.path.exists(save_path):
    urllib.request.urlretrieve(url, save_path)

    zip_file = zipfile.ZipFile(save_path)
    zip_file.extractall(data_dir)
    zip_file.close()

    os.remove(save_path)

# version確認

In [None]:
import numpy as np
import json
from PIL import Image
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision
from torchvision import models, transforms

In [None]:
torchvision.__version__

'0.7.0+cu101'

# Classification of image, using VGG-16

## loading pre-trained model

In [None]:
use_pretrained = True
model = models.vgg16(pretrained=use_pretrained)
model.eval()

print(model)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=

## pre-processing

In [None]:
"""pre-processing of input image"""

class BaseTransform():
    """Resize image and Normalize color

    Attributes
        resize(int): the scale after resizing image

        mean(R, G, B): average of color channels

        std(R, G, B): standard deviation of color channels
    """

    def __init__(self, resize, mean, std):
        self.base_transform = transforms.Compose([
            transforms.Resize(resize), # 短い辺の長さがresizeの大きさになる
            transforms.CenterCrop(resize), # 画像中央をresize * resizeで切り取り
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])

    def __call__(self, img):
        return self.base_transform(img)


In [None]:
class BaseTransform():
    """
    画像のサイズをリサイズし、色を標準化する。

    Attributes
    ----------
    resize : int
        リサイズ先の画像の大きさ。
    mean : (R, G, B)
        各色チャネルの平均値。
    std : (R, G, B)
        各色チャネルの標準偏差。
    """

    def __init__(self, resize, mean, std):
        self.base_transform = transforms.Compose([
            transforms.Resize(resize),  # 短い辺の長さがresizeの大きさになる
            transforms.CenterCrop(resize),  # 画像中央をresize × resizeで切り取り
            transforms.ToTensor(),  # Torchテンソルに変換
            transforms.Normalize(mean, std)  # 色情報の標準化
        ])

    def __call__(self, img):
        return self.base_transform(img)

In [None]:
"""check pre-processing"""

# load image
img_file_path = './data/goldenretriever-3724972_640.jpg'
img = Image.open(img_file_path).convert('RGB')

# print original image
plt.imshow(img)
plt.show()

In [None]:
# pre-processing image, and print pre-processed image
resize = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
transform = BaseTransform(resize, mean, std)
img_transformed = transform(img)  # torch.Size([3, 224, 224])

# change (color, height, width) to (height, width, color)
img_transformed = img_transformed.permute(1, 2, 0)
img_transformed = np.clip(img_transformed, 0, 1)
plt.imshow(img_transformed)
plt.show()

## post-processing

In [None]:
ILSVRC_class_index = json.load(open('./data/imagenet_class_index.json', 'r'))
ILSVRC_class_index

In [None]:
# post-processing class predicting label from output result

class ILSVRCPredictor():
    """ Predict label from model output
    
    attribute:
    class index(dict): class index -> label

    """

    def __init__(self, class_index):
        self.class_index = class_index
    
    def predict_max(self, out):
        """ Get label of ILSVRC to maximize prob.

        arg:
            out: torch.size([1, 1000])
        
        return:
            predicted_label_name(str): label name of most highest predicted prob
        """

        maxid = np.argmax(out.detach().numpy()) # detach out from network, then change type of numpy
        predicted_label_name = self.class_index[str(maxid)][1]

        return predicted_label_name

## predict image using trained VGG-16

In [None]:
# generate valuable(type(dict))
ILSVRC_class_index = json.load(open('./data/imagenet_class_index.json', 'r'))

# generate instance
predictor = ILSVRCPredictor(ILSVRC_class_index)

# load input image
img_file_path = './data/goldenretriever-3724972_640.jpg'
img = Image.open(img_file_path)

# pre-processing
transform = BaseTransform(resize, mean, std)
img_transformed = transform(img) # torch.size([3, 224, 224])
inputs = img_transformed.unsqueeze_(0) # add dimension of batch_size # torch.size([1, 3, 224, 224])

# input to model, then transform model_output to label
out = model(inputs) # torch.size([1, 1000])
result = predictor.predict_max(out)

print(f'predicted label: {result}')

# Transfer learning

In [None]:
import glob
import os
import random
import numpy as np
import json
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision
from torchvision import models, transforms
from torch.utils.data import DataLoader

In [None]:
# setting seed of random number
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

## Create Dataset

### pre-processing

In [None]:
# pre-process input image
class ImageTransform():
    """Pre-process input image(Resize image, and Normalize color)

    Attribute
    ---------
    resize(int)
    mean(R,G,B)
    std(R,G,B)
    """
    def __init__(self, resize, mean, std):
        self.data_transform = {
            'train': transforms.Compose([
                transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                transforms.Resize(resize),
                transforms.CenterCrop(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
        }

    def __call__(self, img, phase='train'):
        """
        Parameters
        ----------
        phase: 'train' or 'val'
        """
        return self.data_transform[phase](img)

In [None]:
# # Check operation of pre-processing when training

img_file_path = './data/goldenretriever-3724972_640.jpg'
img = Image.open(img_file_path)

plt.imshow(img)
plt.show()

size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

transform = ImageTransform(size, mean, std)
img_transformed = transform(img, phase='train')

img_transformed = img_transformed.permute(1, 2, 0)
img_transformed = np.clip(img_transformed, 0, 1)
plt.imshow(img_transformed)
plt.show()

### Create file_path_list

In [None]:
def make_data_path_list(phase='train'):

    root_path = './data/hymenoptera_data/'
    target_path = os.path.join(root_path + phase + '/**/*.jpg')
    print(target_path)

    path_list = []

    # Get file path to sub directory using glob
    for path in glob.glob(target_path):
        path_list.append(path)
    return path_list

In [None]:
train_list = make_data_path_list('train')
val_list = make_data_path_list('val')

In [None]:
train_list

### Create Dataset of hymenoptera

In [None]:
class HymenopteraDataset(data.Dataset):
    """ The Dataset of ant & bee(hymenoptera). Inherit Dataset class

    Attribute:
        file_list:(list)
            the list of image_path
        transform:(object)
            the instance of pre-processing class
        phase: 'train' or 'test'
    """

    def __init__(self, file_list, transform=None, phase='train'):
        self.file_list = file_list
        self.transform = transform
        self.phase = phase
    
    def __len__(self):
        return len(self.file_list) # return the number of image
    
    def __getitem__(self, index):
        """ 
        Get data typed Tensor and label after pre-processing
        """

        # load index_th image
        img_path = self.file_list[index]
        img = Image.open(img_path)

        # pre-process image
        img_transformed = self.transform(img, self.phase)

        # extract label of image from file_name
        if self.phase == 'train':
            label = img_path[30:34]
        elif self.phase == 'val':
            label = img_path[28:32]
        
        # transform label into number
        if label == 'ants':
            label = 0
        elif label == 'bees':
            label = 1
        
        return img_transformed, label

In [None]:
train_dataset = HymenopteraDataset(
    file_list=train_list, transform=ImageTransform(size, mean, std), phase='train')
val_dataset = HymenopteraDataset(
    file_list=val_list, transform=ImageTransform(size, mean, std), phase='val')

In [None]:
index = 0
print(train_dataset.__getitem__(index)[0].size())
print(train_dataset.__getitem__(index)[1])

## Create DataLoader

In [None]:
batch_size = 32

# Create DataLoader
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

dataloaders_dict = {'train': train_dataloader, 'val': val_dataloader}

In [None]:
# Check operation
batch_iter = iter(dataloaders_dict['train'])
inputs, labels = next(batch_iter)

print(f'1st input_size: {inputs.size()}')
print(f'1st label: {labels}')

## Create network model

In [None]:
# Load pre-trained VGG-16
# Generate insatance of VGG-16
use_pretrained = True
model = models.vgg16(pretrained=use_pretrained)

# Change output_unit of last layer to out_features=2
model.classifier[6] = nn.Linear(in_features=4096, out_features=2)

# Setting training mode
model.train()
print('Complete setting network! Loaded pre-trained parameter, set training mode.')

## Setting loss function, optimizer

In [None]:
# Define loss function
criterion = nn.CrossEntropyLoss()

In [None]:
#parameter to learn when transfer-learning
params_to_update = []

#parameter_name
update_params_name = ['classifier.6.weight', 'classifier.6.bias']

for name, param in model.named_parameters():
    if name in update_params_name:
        param.requires_grad = True
        params_to_update.append(param)
        print(name)
    else:
        param.requires_grad = False

print('################')
print(params_to_update)

In [None]:
# Set optimizer
optimizer = optim.SGD(params=params_to_update, lr=0.001, momentum=0.9)

## Do training and validation

In [None]:
def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs):

    # train_loss = []
    # train_acc = []
    # val_loss = []
    # val_acc = []

    for epoch in range(num_epochs):
        print(f'epoch{epoch+1}/{num_epochs}')
        print('----------------------------')

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            
            epoch_loss = 0.0 # sum of epoch loss
            epoch_corrects = 0 # the number of epoch correction

            if (epoch==0) and (phase=='train'):
                continue # to check validation performance when un-learning
            
            for inputs, labels in tqdm(dataloaders_dict[phase]):

                # initialize optimizer
                optimizer.zero_grad()

                # calculate forward
                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs,1)

                    # when training, do backpropagation
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    epoch_loss += loss.item() * inputs.size(0)
                    epoch_corrects += torch.sum(preds==labels.data)
                    
            epoch_loss /= len(dataloaders_dict[phase].dataset)
            epoch_acc = epoch_corrects.float()/len(dataloaders_dict[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}')
    # plt.plot(train_loss, label='train loss')
    # plt.plot(val_loss, label='validation loss')

In [None]:
num_epochs = 3
train_model(model, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

# Fine-tuning

In [30]:
train_list = make_data_path_list('train')
val_list = make_data_path_list('val')

./data/hymenoptera_data/train/**/*.jpg
./data/hymenoptera_data/val/**/*.jpg


In [None]:
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

# Create Dateset
train_dataset = HymenopteraDataset(train_list, transform=ImageTransform(size, mean, std), phase='train')
val_dataset = HymenopteraDataset(val_list, transform=ImageTransform(size, mean, std), phase='val')

In [None]:
# Create DataLoader
batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
dataloaders_dict = {'train': train_dataloader, 'val': val_dataloader}

In [None]:
use_pretrained = True
model = models.vgg16(pretrained=use_pretrained)

model.classifier[6] = nn.Linear(in_features=4096, out_features=2)
model.train()

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
# Storage each parameters to learn in fine-tuning

params_to_update_1 = []
params_to_update_2 = []
params_to_update_3 = []

update_params_name_1 = ['features']
update_params_name_2 = ['classifier.0.weight', 'classifier.0.bias', 'classifier.3.weight', 'classifier.3.bias']
update_params_name_3 = ['classifier.6.weight', 'classifier.6.bias']

for name, param in model.named_parameters():
    if update_params_name_1[0] in name:
        param.requires_grad = True
        params_to_update_1.append(param)

    elif name in update_params_name_2:
        param.requires_grad = True
        params_to_update_2.append(param)

    elif name in update_params_name_3:
        param.requires_grad = True
        params_to_update_3.append(param)
    
    else:
        param.requires_grad = False

In [None]:
# Setting optimizer
optimizer = optim.SGD([
    {'params': params_to_update_1, 'lr': 1e-4},
    {'params': params_to_update_2, 'lr': 5e-4},
    {'params': params_to_update_3, 'lr': 1e-3}
], momentum=0.9)

In [36]:
def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs):
    """
    training model
    """

    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f'using device: {device}')

    model.to(device)
    
    # Accelerate GPU speed
    torch.backends.cudnn.benchmark = True

    for epoch in range(num_epochs):
        print(f'Epoch:{epoch+1}/{num_epochs}')
        print('-----------------------------')

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            elif phase == 'val':
                model.eval()
            
            epoch_loss = 0.0
            epoch_corrects = 0

            if (epoch==0) and (phase=='train'):
                continue
            
            for inputs, labels in dataloaders_dict[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase=='train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _ , preds = torch.max(outputs, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    
                    epoch_loss += loss.item() * inputs.size(0)
                    epoch_corrects += torch.sum(preds==labels.data)
            
            epoch_loss /= len(dataloaders_dict[phase].dataset)
            epoch_acc = epoch_corrects.float()/len(dataloaders_dict[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}')

In [38]:
num_epochs = 10
train_model(model, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

using device: cuda
Epoch:1/10
-----------------------------
val Loss: 0.1654, Acc: 0.9608
Epoch:2/10
-----------------------------
train Loss: 0.1643, Acc: 0.9424
val Loss: 0.1201, Acc: 0.9542
Epoch:3/10
-----------------------------
train Loss: 0.0982, Acc: 0.9588
val Loss: 0.1100, Acc: 0.9608
Epoch:4/10
-----------------------------
train Loss: 0.0741, Acc: 0.9712
val Loss: 0.1019, Acc: 0.9608
Epoch:5/10
-----------------------------
train Loss: 0.0533, Acc: 0.9835
val Loss: 0.0989, Acc: 0.9608
Epoch:6/10
-----------------------------
train Loss: 0.0585, Acc: 0.9835
val Loss: 0.0982, Acc: 0.9608
Epoch:7/10
-----------------------------
train Loss: 0.0339, Acc: 0.9918
val Loss: 0.1019, Acc: 0.9542
Epoch:8/10
-----------------------------
train Loss: 0.0444, Acc: 0.9918
val Loss: 0.1106, Acc: 0.9542
Epoch:9/10
-----------------------------
train Loss: 0.0315, Acc: 0.9918
val Loss: 0.1138, Acc: 0.9542
Epoch:10/10
-----------------------------
train Loss: 0.0162, Acc: 1.0000
val Loss: 0.

In [40]:
save_path = './param_fine_tuning.pth'
torch.save(model.state_dict(), save_path)