Transfer Learning  
다른 곳에서 이미 학습한 내용을 가지고와서 변경해서 사용하는 것  
CIFAR-10, 처음 학습된 데이터의 사이즈 32*32, class의 개수 20개일 경우 어떻게 처리할 것인지?  
이미지넷 : 1400만개 2만개 클래스(보통 실험시에는 1000개사용)  
모델을 어디서 얻느냐? https://tfhub.dev/  
classification => complete model : 모든 데이터를 다가져옴
feature_vector => headless model : 분류를 하는 마지막단계를 없앤것(목적게 맞게 써라)

In [1]:
import tensorflow as tf
from tensorflow.keras import Model, layers
from tensorflow.keras.datasets import mnist, cifar10
import numpy as np
import matplotlib.pyplot as plt
import cv2

In [2]:
(c_x_train, c_y_train), (c_x_test, c_y_test) = cifar10.load_data()

In [3]:
(m_x_train, m_y_train), (m_x_test, m_y_test) = mnist.load_data()

In [4]:
print(c_x_train.shape)
print(c_y_test.shape)
print(m_y_test.shape)

(50000, 32, 32, 3)
(10000, 1)
(10000,)


In [5]:
m_y_train = tf.one_hot(m_y_train, 10)
m_y_test = tf.one_hot(m_y_test, 10)
c_y_train = tf.one_hot(c_y_train.squeeze(), 10)
c_y_test = tf.one_hot(c_y_test.squeeze(), 10)

In [6]:
print("m_x_train:",m_x_train.shape)
print("m_y_train:",m_y_train.shape)
print("c_x_train:",c_x_train.shape)
print("c_y_train:",c_y_train.shape)

m_x_train: (60000, 28, 28)
m_y_train: (60000, 10)
c_x_train: (50000, 32, 32, 3)
c_y_train: (50000, 10)


In [7]:
m_x_train_up = []
for img in m_x_train:
    m_x_train_up.append(cv2.resize(img, dsize=(32,32), interpolation=cv2.INTER_LINEAR))
    
m_x_test_up = []
for img in m_x_test:
    m_x_test_up.append(cv2.resize(img, dsize=(32,32), interpolation=cv2.INTER_LINEAR))

In [8]:
m_x_train = np.array(m_x_train_up).astype("float32") / 255.
m_x_train = tf.stack([m_x_train, m_x_train, m_x_train], axis = -1)

m_x_test = np.array(m_x_test_up).astype("float32") / 255.
m_x_test = tf.stack([m_x_test, m_x_test, m_x_test], axis = -1)

c_x_train = c_x_train.astype("float32") / 255.
c_x_test = c_x_test.astype("float32") / 255.

In [9]:
print(m_x_train.shape)
print(m_y_train.shape)
print(c_x_train.shape)
print(c_y_train.shape)

(60000, 32, 32, 3)
(60000, 10)
(50000, 32, 32, 3)
(50000, 10)


In [11]:
class TransferModel(Model):
    def __init__(self):
        super(TransferModel, self).__init__()
        
        self.conv1 = layers.Conv2D(64, 3, strides=(2,2), padding='same', use_bias=False, input_shape=(32,32,3))
        self.bn1 = layers.BatchNormalization()

        self.conv2 = layers.Conv2D(128, 3, strides=(2,2), padding='same', use_bias=False)
        self.bn2 = layers.BatchNormalization()
        
        self.conv3 = layers.Conv2D(256, 3, strides=(2,2), padding='same', use_bias=False)
        self.bn3 = layers.BatchNormalization()
        
        self.dense1 = layers.Dense(50)
        self.dense2 = layers.Dense(10)
        
    def call(self, x_batch, training=None):
        x = self.conv1(x_batch)
        x = self.bn1(x, training = training)
        x = layers.ReLU()(x)
        
        x = self.conv2(x)
        x = self.bn2(x, training = training)
        x = layers.ReLU()(x)
        
        x = self.conv3(x)
        x = self.bn3(x, training = training)
        x = layers.ReLU()(x)
        
        x = layers.Flatten()(x)
        x = self.dense1(x)
        x = self.dense2(x)
        
        return x
    
## 이쪽 부분이 핵심입니다.
    def freeze_except_final(self):
        self.conv1.trainable = False # backpropagation을 통해 업데이트 되지 않는다.
        self.bn1.trainable = False
        self.conv2.trainable = False
        self.bn2.trainable = False
        self.conv3.trainable = False
        self.bn3.trainable = False
        self.dense1.trainable = False # 마지막 레이어인 dense2만 제외하고 모두 얼림

In [12]:
model_m = TransferModel()
model_m_small = TransferModel()
model_c = TransferModel()

In [15]:
optimizer = tf.keras.optimizers.Adam(1e-3)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True) # one_hot이기 때문에 Sparse 사용안함, activation 없어서 from_logit=True
model_m.compile(optimizer, loss, metrics = ['accuracy'])
model_m_small.compile(optimizer, loss, metrics = ['accuracy'])
model_c.compile(optimizer, loss, metrics = ['accuracy'])

In [17]:
model_m.fit(m_x_train, m_y_train, batch_size=30, epochs=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7f460aa81d90>

In [18]:
model_m_small.fit(m_x_train[:50], m_y_train[:50], batch_size=30, epochs=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7f4693fd9ca0>

In [19]:
model_m_small.evaluate(m_x_test, m_y_test) # 학습을 적게한 모델 accuracy 0.3461



[2.2761852741241455, 0.34610000252723694]

In [20]:
model_c.fit(c_x_train, c_y_train, batch_size=30, epochs=10, validation_split=0.2) # validation_split : validation을 랜덤하게 split해야하는데 뒤에있는 20% 값을 가져오기때문에 위험하다.

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f4693eaf220>

In [21]:
model_c.evaluate(c_x_test, c_y_test)



[1.7263308763504028, 0.6335999965667725]

In [22]:
model_c.summary()

Model: "transfer_model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            multiple                  1728      
_________________________________________________________________
batch_normalization_6 (Batch multiple                  256       
_________________________________________________________________
conv2d_7 (Conv2D)            multiple                  73728     
_________________________________________________________________
batch_normalization_7 (Batch multiple                  512       
_________________________________________________________________
conv2d_8 (Conv2D)            multiple                  294912    
_________________________________________________________________
batch_normalization_8 (Batch multiple                  1024      
_________________________________________________________________
dense_4 (Dense)              multiple             

In [23]:
model_c.evaluate(m_x_test, m_y_test) # cifar의 데이터셋과 mnist의 관계가 없기 때문에 0.0598



[12.729435920715332, 0.05979999899864197]

In [24]:
model_c.freeze_except_final()

In [25]:
model_c.summary()

Model: "transfer_model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            multiple                  1728      
_________________________________________________________________
batch_normalization_6 (Batch multiple                  256       
_________________________________________________________________
conv2d_7 (Conv2D)            multiple                  73728     
_________________________________________________________________
batch_normalization_7 (Batch multiple                  512       
_________________________________________________________________
conv2d_8 (Conv2D)            multiple                  294912    
_________________________________________________________________
batch_normalization_8 (Batch multiple                  1024      
_________________________________________________________________
dense_4 (Dense)              multiple             

In [26]:
model_c.fit(m_x_train, m_y_train, batch_size=30, epochs=8)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x7f460aa9bbb0>

In [27]:
model_c.evaluate(m_x_test, m_y_test) # cifar의 데이터셋의 마지막 layer만 mnist로 학습했더니 accuracy 0.988



[0.041906874626874924, 0.9879999756813049]