# [ LG전자_DX_Intensive_Course  ]<br><br> 딥러닝 기반 시계열 분석 3: CNN 주요 모델 2 & CAM/Grad-CAM <br>
### ** Key Points of Using Pretrained CNN Models
- 1. 데이터 구축<br>
    - Pretrained model은 사전에 설정한 input size에 적합한 구조를 가지고 있으므로 새로운 데이터를 pretrained model의 input size로 변환해야 함 <br>
- 2. 모델 구축<br>
    - Pretrained model은 ImageNet에 학습되어 1000개의 output node를 가지고 있으므로 해당 모델을 새로운 데이터에 사용하기 위해 pretrained model의 마지막 layer를 새로운 데이터의 class 개수에 맞는 layer로 대체해야 함 <br>
- 3. 모델 학습<br>
    - Pretrained model의 weight를 그대로 사용할 layer와 새로운 데이터에 맞게 업데이트할 layer를 구분하여 optimizer가 업데이트할 weight를 설정해야 함

# <br>0. Hyperparameter Setting
- data_dir: Top level data directory. Here we assume the format of the directory conforms to the ImageFolder structure
- model_name: Models to choose from [inception, resnet, densenet]
- batch_size: Batch size for training (change depending on how much memory you have)
- num_classes: Number of classes in the dataset
- num_epochs: Number of epochs to train for
- feature_extract: Flag for feature extracting. When False, we finetune the whole model, when True we only update the reshaped layer params

In [1]:
# 모듈 불러오기
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms

import matplotlib.pyplot as plt
import time
import os
import copy

In [2]:
# Hyperparameter setting
data_dir = "../input/hymenoptera-data/hymenoptera_data"
model_name = "resnet"
batch_size = 32
num_classes = 2
num_epochs = 15
feature_extract = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Detect if we have a GPU available

In [3]:
input_size_dict = {"inception": 299, "resnet": 224, "densenet": 224}
input_size = input_size_dict["resnet"]

# <br><br>__1. Data: Hymenoptera Data__
- 데이터 description <br>
    - Hymenoptera Data는 개미와 벌의 이미지로 구성된 데이터셋이다. 해당 데이터셋은 총 398개의 이미지와 해당 이미지에 매칭되는 개미/벌의 label로 구성되어 있으며, 이 중 245개는 train 데이터이고 153개는 test 데이터이다. Hymenoptera Data를 활용한 이미지 분류 task는 이미지를 input으로 받아 이를 개미/벌 중 하나의 class로 분류하는 것을 목표로 한다. (출처: https://www.kaggle.com/ajayrana/hymenoptera-data)

### Step 1-1. Data Transform 설정하기
주의) train 데이터에는 data augmentation을 적용하여 한 이미지로부터 다양한 이미지를 추출하여 학습함으로써 학습의 효과를 높이지만, validation 데이터에는 input size를 맞추기 위한 변환과 normalization만 적용함
- transforms.RandomResizedCrop(input_size): 이미지의 일부 영역을 랜덤으로 crop한 후, 이를 input_size의 크기로 resize함
- transforms.RandomHorizontalFlip(): 50%의 확률로 이미지를 좌우 반전함
- transforms.Resize(input_size): 이미지를 input_size의 크기로 resize함
- transforms.CenterCrop(input_size): 이미지의 중심부에서 input_size의 크기의 영역을 crop함
- transforms.ToTensor(): 0-1 사이의 값을 가진 Tensor 형태로 변환함
- transforms.Normalize(mean, std): mean을 빼고 std로 나누는 정규화를 적용함

In [4]:
# input 데이터에 적용할 transform 설정하기
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

### Step 1-2. Dataset &  DataLoader 구축하기

In [5]:
# Dataset 구축하기
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}

In [6]:
# Dataloader 구축하기
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}

# <br><br>__2. Model: GoogLeNet, ResNet, DenseNet__
## 2-1. GoogLeNet (Inception V3)
- Fine-tuning을 위해 auxiliary output을 도출하는 **(AuxLogits.fc)** layer와 primary output을 도출하는 **(fc)** layer를 변경해야 함<br>
- Inveption V3는 다른 pretrained CNN models와 다르게 auxiliary task를 수행하는 layer를 가지므로 해당 layer도 변경해야 함

In [7]:
# Pretrained Inveption V3 불러오기
inception = torchvision.models.inception_v3(pretrained=True)
print(inception)

In [8]:
# (AuxLogits.fc) layer와 (fc) layer 변경하기
inception.AuxLogits.fc = nn.Linear(768, num_classes)
inception.fc = nn.Linear(2048, num_classes)
print(inception)

## <br>2-2. ResNet
- Fine-tuning을 위해 output을 도출하는 **(fc)** layer를 변경해야 함

In [9]:
# Pretrained Resnet18 불러오기
resnet = torchvision.models.resnet18(pretrained=True)
print(resnet)

In [10]:
# (fc) layer 변경하기
resnet.fc = nn.Linear(512, num_classes)
print(resnet)

## <br>2-3. DenseNet
- Fine-tuning을 위해 output을 도출하는 **(classifier)** layer를 변경해야 함

In [11]:
# Pretrained Densenet121 불러오기
densenet = torchvision.models.densenet121(pretrained=True)
print(densenet)

In [12]:
# (classifier) layer 변경하기
densenet.classifier = nn.Linear(1024, num_classes)
print(densenet)

## <br>2-4. Fine-tuning을 위한 모델 구축 및 학습 parameter 설정하기

In [13]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [14]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    model_ft = None

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)

    elif model_name == "densenet":
        """ Densenet121
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)

    return model_ft

In [15]:
# Initialize the model for model_name
model_ft = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
model_ft = model_ft.to(device)
print(model_ft)

# <br><br>__3. Training the Pretrained CNN Model__

### Step 3-1. Optimizer 구축하기
- Finetuning: 모델의 모든 parameter를 업데이트함
- Feature extraction: 초기화한 layer의 parameter만 업데이트하고 나머지는 고정함

In [16]:
# 업데이트 할 parameter 설정하기
params_to_update = model_ft.parameters()

print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name, param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("-", name)
else:
    for name, param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("-", name)

In [17]:
# params_to_update의 parameter만 업데이트 할 SGD optimizer 구축하기
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

### Step 3-2. 모델 학습하기

In [18]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [19]:
# loss function 설정하기
criterion = nn.CrossEntropyLoss()

In [None]:
# 모델 학습하기
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

# <br><br>__4. GradCAM__

In [31]:
# 모듈 불러오기
from torchvision.utils import make_grid

from gradcam.utils import visualize_cam
from gradcam import GradCAM, GradCAMpp
from PIL import Image

In [21]:
# test 이미지 불러오기
img_path = "../input/hymenoptera-data/hymenoptera_data/val/bees/6a00d8341c630a53ef00e553d0beb18834-800wi.jpg"
img = Image.open(img_path)
img

In [22]:
# input에 transform 적용하기 
normed_img = data_transforms['val'](img)[None].to(device)
normed_img.shape

In [35]:
torch_img = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])(img).to(device)
normed_torch_img = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(torch_img)[None]

configs = [
    dict(model_type='resnet', arch=resnet, layer_name='layer4')
]

for config in configs:
    config['arch'].to(device).eval()

cams = [
    [cls.from_config(**config) for cls in (GradCAM, GradCAMpp)]
    for config in configs
]

images = []
for gradcam, gradcam_pp in cams:
    mask, _ = gradcam(normed_torch_img)
    heatmap, result = visualize_cam(mask, torch_img)

    mask_pp, _ = gradcam_pp(normed_torch_img)
    heatmap_pp, result_pp = visualize_cam(mask_pp, torch_img)
    
    images.extend([torch_img.cpu(), heatmap, heatmap_pp, result, result_pp])
    
grid_image = make_grid(images, ncol=1)
transforms.ToPILImage()(grid_image)