<a href="https://colab.research.google.com/github/0ginny/TIL/blob/main/240416_Transfer_learning_and_fine_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 전이학습을 하는 상황

유사한 모델이 이미 오픈되어 있을 때,

비슷한 가중치를 유지한 상태에서 학습을 하면 더 빨리 배울 수 있지

# Transfer learning and fine tuning for image classification

## Transfer learning

### Importing the libraries

In [1]:
import tensorflow as tf
import seaborn as sns
import zipfile
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Dropout
tf.__version__

'2.15.0'

### Loading the images

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
%cd /content/gdrive/MyDrive/Colab Notebooks/Vision_AZ

/content/gdrive/MyDrive/Colab Notebooks/Vision_AZ


### Train and test set

In [4]:
training_generator = ImageDataGenerator(rescale = 1./255, rotation_range = 7, horizontal_flip = True, zoom_range = 0.2)
train_dataset = training_generator.flow_from_directory('./homer_bart_2/training_set/',
                                                        target_size = (256,256),
                                                        batch_size = 8,
                                                        class_mode = 'categorical',
                                                        shuffle = True)

Found 215 images belonging to 2 classes.


In [5]:
test_generator = training_generator = ImageDataGenerator(rescale = 1./255)
test_dataset = test_generator.flow_from_directory('./homer_bart_2/test_set/',
                                                        target_size = (256,256),
                                                        batch_size = 1,
                                                        class_mode = 'categorical',
                                                        shuffle = False)

Found 54 images belonging to 2 classes.


### Pre-trained network

- ResNet: https://arxiv.org/pdf/1512.03385.pdf
- Documentation: https://keras.io/api/applications/
    - 여기에 keras에서 제공하는 모델들을 볼 수 있어.

In [6]:
base_model = tf.keras.applications.ResNet50(weights = 'imagenet', #imagenet에서 학습한 가중치 사용
                                            include_top =False, # 데이터셋에서 구분한 데이터를 분류하 수 있지만, 현재는 구조만 사용할 것이기 때문에 False // Dense만 없애는 느낌? 전 연결이 안된다면, 결과값이 나오지 않았다는 거지?
                                            input_tensor = Input(shape = (256,256,3)))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 262, 262, 3)          0         ['input_3[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 128, 128, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 128, 128, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                       

In [7]:
len(base_model.layers)

175

In [9]:
for layer in base_model.layers:
    layer.trainable = False

In [10]:
for layer in base_model.layers:
    print(layer, layer.trainable) # trainable은 가중치 고정이 안된 거야.
    # fix the weights

<keras.src.engine.input_layer.InputLayer object at 0x7a67d9fa2fb0> False
<keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D object at 0x7a67d9fa08e0> False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7a67d9fa0be0> False
<keras.src.layers.normalization.batch_normalization.BatchNormalization object at 0x7a67d9f47ee0> False
<keras.src.layers.core.activation.Activation object at 0x7a67d9f46b60> False
<keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D object at 0x7a67d9f44970> False
<keras.src.layers.pooling.max_pooling2d.MaxPooling2D object at 0x7a67d9f46080> False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7a67d9f454e0> False
<keras.src.layers.normalization.batch_normalization.BatchNormalization object at 0x7a67d9f44e20> False
<keras.src.layers.core.activation.Activation object at 0x7a6872691b10> False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7a687265fa90> False
<keras.src.layers.normalization.batch_normalization.BatchNormalization

### Custom dense layer

In [12]:
base_model.output

<KerasTensor: shape=(None, 8, 8, 2048) dtype=float32 (created by layer 'conv5_block3_out')>

In [18]:
(2048+2)/2

1025.0

In [14]:
head_model = base_model.output
# flatten
head_model = GlobalAveragePooling2D()(head_model) # connect by Model module
head_model = Dense(units = 1025,
                  activation = 'relu')(head_model)
head_model = Dropout(rate = 0.2)(head_model)
head_model = Dense(units = 1025,
                  activation = 'relu')(head_model)
head_model = Dropout(rate = 0.2)(head_model)
head_model = Dense(units = 2, activation = 'softmax')(head_model)

### Building and training the neural network

In [15]:
base_model.input

<KerasTensor: shape=(None, 256, 256, 3) dtype=float32 (created by layer 'input_1')>

In [16]:
network = Model(inputs = base_model.input, outputs = head_model)

In [17]:
network.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 262, 262, 3)          0         ['input_1[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 128, 128, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 128, 128, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                          

In [21]:
network.compile(optimizer = 'adam', loss  ='categorical_crossentropy', metrics = ['accuracy'])

In [22]:
history = network.fit(train_dataset, epochs = 50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Evaluating the neural network

## Fine tuning

전이 학습은 학습 데이터가 유사할 때 유리해.

그런데, 만약 학습 데이터가 유사하지 않다면??

이전의 합성곱 층 이나 풀링 층을 선택해서, 핵습을 해야해.

그리고 그렇게 이전 단계를 바꿀 경우 학습률은 매우 작게해야해

그렇지 않으면 나머지 구조의 가중치와 많이 달라져서 학습이 잘 안 될 수도 있어.

### Implementing

In [23]:
base_model.trainable = True

In [31]:
for layer in base_model.layers :
    print(layer, layer.trainable)

<keras.src.engine.input_layer.InputLayer object at 0x7a67d9fa2fb0> False
<keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D object at 0x7a67d9fa08e0> False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7a67d9fa0be0> False
<keras.src.layers.normalization.batch_normalization.BatchNormalization object at 0x7a67d9f47ee0> False
<keras.src.layers.core.activation.Activation object at 0x7a67d9f46b60> False
<keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D object at 0x7a67d9f44970> False
<keras.src.layers.pooling.max_pooling2d.MaxPooling2D object at 0x7a67d9f46080> False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7a67d9f454e0> False
<keras.src.layers.normalization.batch_normalization.BatchNormalization object at 0x7a67d9f44e20> False
<keras.src.layers.core.activation.Activation object at 0x7a6872691b10> False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7a687265fa90> False
<keras.src.layers.normalization.batch_normalization.BatchNormalization

In [26]:
len(base_model.layers)

175

In [27]:
fine_tuning_at = 140 # 처음보다는 나중의 가중치를 수정하는 것이 더 좋아. 처음이 일반적인 가중치와 비슷해서
# 너무 낮은 숫자를 넣으면 너무 오래 걸릴 수 있어.

In [30]:
for layer in base_model.layers[:fine_tuning_at] :
    layer.trainable = False

In [33]:
network.compile(optimizer = 'adam', loss = 'categorical_crossentropy' , metrics = ['accuracy'])

In [None]:
# 그런데 network를 다시 설정하진 않아도 되는 건가??

In [36]:
for layer in network.layers:
    print(layer, layer.trainable)
    # 그렇네, 이미 변경되어 있네.

<keras.src.engine.input_layer.InputLayer object at 0x7a67d9fa2fb0> False
<keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D object at 0x7a67d9fa08e0> False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7a67d9fa0be0> False
<keras.src.layers.normalization.batch_normalization.BatchNormalization object at 0x7a67d9f47ee0> False
<keras.src.layers.core.activation.Activation object at 0x7a67d9f46b60> False
<keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D object at 0x7a67d9f44970> False
<keras.src.layers.pooling.max_pooling2d.MaxPooling2D object at 0x7a67d9f46080> False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7a67d9f454e0> False
<keras.src.layers.normalization.batch_normalization.BatchNormalization object at 0x7a67d9f44e20> False
<keras.src.layers.core.activation.Activation object at 0x7a6872691b10> False
<keras.src.layers.convolutional.conv2d.Conv2D object at 0x7a687265fa90> False
<keras.src.layers.normalization.batch_normalization.BatchNormalization

### Evaluate

## Saving and loading the model

## Classifying one single image