### Fine Tuning (미세 조정)
- ImageNet으로 학습된 사전 훈련 모델을 다른 목적이나 용도로 활용할 때  
  Feature Extractor(CNN)의 Weight(가중치)를 제어하기 위한 기법이다.
- 특정 Layer들을 Freeze시켜 학습에서 제외시키고, Learning Rate를 점차 감소시켜 적용한다.
- ImageNet과 유사한 데이터 세트거나, 클래스 별 데이터 건 수가 적을 경우 사용하는 것을 권장한다.
- **학습시간 단축**이 주 목적으로, 위 상황이 아닌 경우에는 Sequence까지만 진행하는 것이 좋다.
- Fine Tuning이 언제나 모델의 성능을 향상시키는 것은 아니기 때문에, 적절한 상황에 사용할 수 있어야 한다.
- 먼저 Classification Layer(분류기)만 학습시킨 뒤 전체를 학습시키는 순서로 진행하며,  
  이를 위해 fit을 최소 2번 이상 실행한다.
- 층별로 Freeze 또는 Unfreeze 여부를 결정하기 위해 미세 조정을 진행할 때,  
  학습률이 높으면 이전에 학습한 것을 잃을 위험이 있기 때문에 학습률은 작게 설정한다.

<div style='display: flex;'>
    <div>
        <img src='./images/transfer_learning03.png' width='600px'>
    </div>
    <div>
        <img src='./images/transfer_learning04.png' width='500px' style='margin-left: -80px;'>
    </div>
</div>

In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

root = './datasets/animals/original/'

# ImageDataGenerator 선언 - RGB 값을 0 ~ 1사이로 스케일링만
image_data_generator = ImageDataGenerator(rescale=1./255)

generator = image_data_generator.flow_from_directory(root,
                                                     target_size=(64, 64),
                                                     batch_size=64,
                                                     class_mode='categorical')

print(generator.class_indices)

Found 26179 images belonging to 10 classes.
{'butterfly': 0, 'cat': 1, 'chicken': 2, 'cow': 3, 'dog': 4, 'elephant': 5, 'horse': 6, 'sheep': 7, 'spider': 8, 'squirrel': 9}


In [2]:
# class_indices의 key: value 순서를 반대로 한 새로운 dict 생성
target_name = {v: k for k, v in generator.class_indices.items()}
target_name

{0: 'butterfly',
 1: 'cat',
 2: 'chicken',
 3: 'cow',
 4: 'dog',
 5: 'elephant',
 6: 'horse',
 7: 'sheep',
 8: 'spider',
 9: 'squirrel'}

In [3]:
target_names = []

# 반복문으로 위 dict의 문자열(value)을 target_names에 추가
for target in generator.classes:
    target_names.append(target_name[target])

In [4]:
import pandas as pd

# 경로, target_names(list) 및 target 클래스로 데이터프레임 생성
a_df = pd.DataFrame({'file_paths': generator.filepaths, 'target_names': target_names ,'targets': generator.classes})
a_df

Unnamed: 0,file_paths,target_names,targets
0,./datasets/animals/original/butterfly\butterfl...,butterfly,0
1,./datasets/animals/original/butterfly\butterfl...,butterfly,0
2,./datasets/animals/original/butterfly\butterfl...,butterfly,0
3,./datasets/animals/original/butterfly\butterfl...,butterfly,0
4,./datasets/animals/original/butterfly\butterfl...,butterfly,0
...,...,...,...
26174,./datasets/animals/original/squirrel\squirrel9...,squirrel,9
26175,./datasets/animals/original/squirrel\squirrel9...,squirrel,9
26176,./datasets/animals/original/squirrel\squirrel9...,squirrel,9
26177,./datasets/animals/original/squirrel\squirrel9...,squirrel,9


In [5]:
# file_path의 '\\'를 '/'로 대체
a_df.loc[:, 'file_paths'] = a_df.file_paths.apply(lambda x: x.replace('\\', '/'))
a_df

Unnamed: 0,file_paths,target_names,targets
0,./datasets/animals/original/butterfly/butterfl...,butterfly,0
1,./datasets/animals/original/butterfly/butterfl...,butterfly,0
2,./datasets/animals/original/butterfly/butterfl...,butterfly,0
3,./datasets/animals/original/butterfly/butterfl...,butterfly,0
4,./datasets/animals/original/butterfly/butterfl...,butterfly,0
...,...,...,...
26174,./datasets/animals/original/squirrel/squirrel9...,squirrel,9
26175,./datasets/animals/original/squirrel/squirrel9...,squirrel,9
26176,./datasets/animals/original/squirrel/squirrel9...,squirrel,9
26177,./datasets/animals/original/squirrel/squirrel9...,squirrel,9


In [6]:
from sklearn.model_selection import train_test_split

# train, test 데이터 세트 분할
train_images, test_images, train_targets, test_targets = \
            train_test_split(a_df.file_paths, a_df.targets, stratify=a_df.targets, test_size=0.2, random_state=124)

print(train_targets.value_counts())
print(test_targets.value_counts())

targets
4    3890
8    3857
2    2478
6    2098
0    1690
3    1493
9    1490
7    1456
1    1334
5    1157
Name: count, dtype: int64
targets
4    973
8    964
2    620
6    525
0    422
3    373
9    372
7    364
1    334
5    289
Name: count, dtype: int64


In [7]:
from sklearn.model_selection import train_test_split

# train, validation 데이터 세트 분할
train_images, validation_images, train_targets, validation_targets = \
train_test_split(train_images, train_targets, stratify=train_targets, test_size=0.2, random_state=124)

print(train_targets.value_counts())
print(validation_targets.value_counts())

targets
4    3112
8    3086
2    1982
6    1678
0    1352
3    1194
9    1192
7    1165
1    1067
5     926
Name: count, dtype: int64
targets
4    778
8    771
2    496
6    420
0    338
3    299
9    298
7    291
1    267
5    231
Name: count, dtype: int64


In [8]:
# train, validation, test Dataframe 생성
train_df = a_df.iloc[train_images.index].reset_index(drop=True)
validation_df = a_df.iloc[validation_images.index].reset_index(drop=True)
test_df = a_df.iloc[test_images.index].reset_index(drop=True)

print(train_df.shape)
print(validation_df.shape)
print(test_df.shape)

(16754, 3)
(4189, 3)
(5236, 3)


In [11]:
from tensorflow.keras.utils import Sequence
from sklearn.utils import shuffle
import numpy as np
import cv2

IMAGE_SIZE = 224
BATCH_SIZE = 64

class Dataset(Sequence):
    def __init__(self, file_paths, targets, batch_size=BATCH_SIZE, aug=None, preprocess=None, shuffle=False):
        self.file_paths = file_paths
        self.targets = targets
        self.batch_size = batch_size
        self.aug = aug
        self.preprocess = preprocess
        self.shuffle = shuffle

        if self.shuffle:
            # shuffle=True일 경우, 매 epoch 종료 시 객체 생성 및 데이터 섞기
            self.on_epoch_end()

    # __len__(): 전체 데이터 건 수에서 batch_size 단위로 나눈 데이터 수 (소수점 올림)
    # ex) 전체 1000 건, batch_size 30 → batch 하나 당 데이터 수는 33.333... 개
    # 이 때, 소수점 아래를 올림해서 1 batch 당 데이터 수를 34개로 설정한다
    def __len__(self, ):
        return int(np.ceil(len(self.targets) / self.batch_size))

    # __getitem__(): batch_size 단위로 이미지 배열과 target 데이터들을 가져온 뒤, 변환한 값 리턴
    # 쉽게 말해 전처리 메소드
    def __getitem__(self, index):
        file_paths_batch = self.file_paths[index * self.batch_size: (index + 1) * self.batch_size]
        targets_batch = self.targets[index * self.batch_size: (index + 1) * self.batch_size]

        # 0으로만 채운(초기화) 4차원 (batch_size, IMAGE_SIZE, IMAGE_SIZE, 3(RGB)) ndarray 할당
        results_batch = np.zeros((file_paths_batch.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3))

        # 각 이미지 별로 아래의 전처리 실행
        for i in range(file_paths_batch.shape[0]):
            image = cv2.cvtColor(cv2.imread(file_paths_batch[i]), cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))

            # aug가 전달되었을 경우, 해당 Augmentation 적용
            if self.aug is not None:
                image = self.aug(image=image)['image']

            # 전처리 함수가 전달되었을 경우, 해당 함수로 image 추가 전처리
            if self.preprocess is not None:
                self.preprocess(image)

            # results_batch의 i번 인덱스에 전처리된 이미지 추가
            results_batch[i] = image

        # 재정의 된 __getitem__() 메소드의 반환값
        # Augmentation 적용(또는 미적용) 된 문제(이미지, results_batch)와 정답(targets_batch) list 반환
        return results_batch, targets_batch

    # __init__() 안 shuffle 하는 함수 정의
    def on_epoch_end(self):
        if self.shuffle:
            self.file_paths, self.targets = shuffle(self.file_paths, self.targets)

In [12]:
import albumentations as A
from tensorflow.keras.applications.xception import preprocess_input as xception_preprocess_input

train_file_paths = train_df['file_paths'].values
# train_targets = train_df['targets'].values # SparseCategoricalCrossentropy 사용 시
train_targets = pd.get_dummies(train_df['targets']).values # CategoricalCrossEntropy 사용 시

validation_file_paths = validation_df['file_paths'].values
# validation_targets = validation_df['targets'].values
validation_targets = pd.get_dummies(validation_df['targets']).values

test_file_paths = test_df['file_paths'].values
# test_targets = test_df['targets'].values
test_targets = pd.get_dummies(test_df['targets']).values

aug = A.Compose([
    A.ShiftScaleRotate(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, p=0.5)
])

train_dataset = Dataset(train_file_paths,
                        train_targets,
                        batch_size=BATCH_SIZE,
                        aug=aug,
                        preprocess=xception_preprocess_input,
                        shuffle=True)

validation_dataset = Dataset(validation_file_paths,
                             validation_targets,
                             batch_size=BATCH_SIZE,
                             preprocess=xception_preprocess_input)

test_dataset = Dataset(test_file_paths,
                       test_targets,
                       batch_size=BATCH_SIZE,
                       preprocess=xception_preprocess_input)

In [13]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense , Conv2D , Dropout , Flatten , Activation, MaxPooling2D , GlobalAveragePooling2D
from tensorflow.keras.layers import BatchNormalization

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications import Xception
from tensorflow.keras.applications import MobileNetV2

def create_model(model_name='vgg16', verbose=False):
    # 입력받은 model_name에 따라 다른 모델 사용
    input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    
    if model_name == 'vgg16':
        model = VGG16(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'resnet50': # ResNet50, 74.9% ; ResNet50V2, 76.0%
        model = ResNet50V2(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'xception': # Inception을 기초로 한 모델
        model = Xception(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'mobilenet':
        model = MobileNetV2(input_tensor=input_tensor, include_top=False, weights='imagenet')

    x = model.output

    # Classifier
    # VGG16 이외의 모델은 층 구조가 깊기 때문에 Dropout 사용 
    x = GlobalAveragePooling2D()(x)
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    x = Dense(50, activation='relu')(x)
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    output = Dense(10, activation='softmax', name='output')(x)
    
    model = Model(inputs=input_tensor, outputs=output)
    
    if verbose:
        model.summary()
    
    return model

In [14]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy, SparseCategoricalCrossentropy

# MobileNetV2 모델 생성 후 컴파일링
model = create_model(model_name='mobilenet', verbose=True)
model.compile(optimizer=Adam(), loss=CategoricalCrossentropy(), metrics=['acc'])

  model = MobileNetV2(input_tensor=input_tensor, include_top=False, weights='imagenet')


In [15]:
# 모델의 Layer 구조 출력
model.layers

[<InputLayer name=input_layer, built=True>,
 <Conv2D name=Conv1, built=True>,
 <BatchNormalization name=bn_Conv1, built=True>,
 <ReLU name=Conv1_relu, built=True>,
 <DepthwiseConv2D name=expanded_conv_depthwise, built=True>,
 <BatchNormalization name=expanded_conv_depthwise_BN, built=True>,
 <ReLU name=expanded_conv_depthwise_relu, built=True>,
 <Conv2D name=expanded_conv_project, built=True>,
 <BatchNormalization name=expanded_conv_project_BN, built=True>,
 <Conv2D name=block_1_expand, built=True>,
 <BatchNormalization name=block_1_expand_BN, built=True>,
 <ReLU name=block_1_expand_relu, built=True>,
 <ZeroPadding2D name=block_1_pad, built=True>,
 <DepthwiseConv2D name=block_1_depthwise, built=True>,
 <BatchNormalization name=block_1_depthwise_BN, built=True>,
 <ReLU name=block_1_depthwise_relu, built=True>,
 <Conv2D name=block_1_project, built=True>,
 <BatchNormalization name=block_1_project_BN, built=True>,
 <Conv2D name=block_2_expand, built=True>,
 <BatchNormalization name=block_2

In [18]:
# 모델의 Layer 별 학습 가능 여부 조정 (default=True)
for layer in model.layers:
    layer.trainable = False # freezing
    print(layer.trainable)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False


In [22]:
# Classifier 제외한 나머지를 freezing
# 슬라이싱은 층 구조 보고 조정할 것
# 유사도는 높지만 데이터 수가 적은 경우, 이렇게 Classifier를 제외한 나머지만 freezing 시킨다
for layer in model.layers[:-5]:
    layer.trainable = False

In [23]:
from tensorflow.keras import layers

# 우선 모든 레이어를 freezing 시킨 다음 Fine Tuning 하는 것이 좋다
for layer in model.layers:
    layer.trainable = False

# model.fit()

# layers.(레이어) = 해당 레이어 클래스
# 아래 예시는 모든 레이어 중 BatchNormalization(배치 정규화) 레이어만 freezing 시키는 코드이다.
for layer in model.layers:
    if not isinstance(layer, layers.BatchNormalization):
        layer.trainable = True # layer가 BatchNormalization이 아닐 경우 unfreeze

# model.compile(optimizer=Adam(0.00001))
# model.fit()

In [28]:
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy, SparseCategoricalCrossentropy

IMAGE_SIZE = 224
BATCH_SIZE = 64

# Fine Tuning Module
def fine_tune(datas, model_name, aug, preprocess):
    FIRST_EPOCHS = 10
    SECOND_EPOCHS = 10

    # 받은 데이터(Tuple)로 train, validation, test 데이터 분할 
    train_file_paths, train_targets, \
    validation_file_paths, validation_targets, \
    test_file_paths, test_targets = datas

    # train, validation 데이터 세트 객체 생성
    train_dataset = Dataset(train_file_paths,
                        train_targets,
                        batch_size=BATCH_SIZE,
                        aug=aug,
                        preprocess=preprocess,
                        shuffle=True)

    validation_dataset = Dataset(validation_file_paths,
                                 validation_targets,
                                 batch_size=BATCH_SIZE,
                                 preprocess=preprocess)

    # 모델 생성 후 컴파일링
    model = create_model(model_name=model_name, verbose=True)
    model.compile(optimizer=Adam(), loss=CategoricalCrossentropy(), metrics=['acc'])

    # feature extractor(Convolutional Base) layer들을 전부 freeze
    for layer in model.layers[:-5]:
        layer.trainable = False

    # Classifier만 fit
    model.fit(train_dataset,
              batch_size=BATCH_SIZE,
              epochs=FIRST_EPOCHS,
              validation_data=validation_dataset)

    # 배치 정규화만 freeze
    for layer in model.layers:
        if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True

    # 부분 freeze
    model.compile(optimizer=Adam(1e-5), loss=CategoricalCrossentropy(), metrics=['acc'])

    # freeze 이후 모델 fit
    history = model.fit(train_dataset,
                        batch_size=BATCH_SIZE,
                        epochs=SECOND_EPOCHS,
                        validation_data=validation_dataset)

    # 모델과 history 반환
    return model, history

In [27]:
from tensorflow.keras.applications.mobilenet import preprocess_input as moblienet_preprocess_input

train_file_paths = train_df['file_paths'].values
train_targets = pd.get_dummies(train_df['targets']).values

validation_file_paths = validation_df['file_paths'].values
validation_targets = pd.get_dummies(validation_df['targets']).values

test_file_paths = test_df['file_paths'].values
test_targets = pd.get_dummies(test_df['targets']).values

aug = A.Compose([
    A.ShiftScaleRotate(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, p=0.5)
])

# Fine Tuning 함수 사용 - MobileNet 모델 생성 + Fine Tuning
fine_tune((train_file_paths, train_targets, \
           validation_file_paths, validation_targets, \
           test_file_paths, test_targets),
          'mobilenet',
          aug,
          moblienet_preprocess_input)

  model = MobileNetV2(input_tensor=input_tensor, include_top=False, weights='imagenet')


Epoch 1/10


  self._warn_if_super_not_called()


[1m260/262[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m2s[0m 1s/step - acc: 0.1755 - loss: 2.3571

KeyboardInterrupt: 

In [None]:
# freezing 된 layer 확인
for i, layer in enumerate(model.layers[:-5]):
    layer.trainable = False
    print(i + 1, '.', layer.name, 'trainable:', layer.trainable)

print('\n######### classifier layers ######### ')
for layer in model.layers[-5:]:
    print(layer.name, 'trainable:', layer.trainable)