##   
# Setting
> * 구글드라이브-코랩 연동
* 사용할 기본 모듈들 import
* 경로설정
* 시드고정함수
##   

In [None]:
# 구글드라이브와 연동
from google.colab import drive
drive.mount('/content/drive')

# modules
import tensorflow as tf
from tensorflow import  keras
!pip install tensorflow_addons
import tensorflow_addons as tfa
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import numpy as np
import pandas as pd 
import random
import os

# 데이터 경로 설정
os.chdir('/content/drive/MyDrive/project_dataset/dacon_v2')
train_dir = "/content/drive/MyDrive/project_dataset/dacon_v2/dirty_mnist_2nd"
test_dir = "/content/drive/MyDrive/project_dataset/dacon_v2/test_route"

# GPU 확인
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

# seed 고정 함수
def seed_everything(seed = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    tf.random.set_seed(seed)

SEED = 1234 # global seed    
seed_everything(SEED)

##  
# Data Preparing
> * 256*256 train image 5만장 45000 : 4000 : 1000 분할하기
* keras의 ImageDataGenerator를 이용한 augmentation
* pretrained 모델을 사용하기 위해 grayscaled 데이터를 rgb로 변환
##  

In [None]:
full_train_size = 49000
train_size = 45000
batch_size = 32


# df format for making generator
meta_df = pd.read_csv('dirty_mnist_2nd_answer.csv')
meta_df['index'] = meta_df['index'].apply(lambda x: str("{0:05d}".format(x))+'.png')
columns = list(meta_df.columns[1:])


# train + valid : test = 49000 : 1000
train_full_df = meta_df.iloc[:full_train_size,:]
holdout_df = meta_df.iloc[full_train_size:,:]


# Augmentation strategy
train_datagen = ImageDataGenerator(rescale=1./255.,
                             rotation_range = 10,
                             width_shift_range = 0.1,
                             height_shift_range = 0.1,
                             horizontal_flip = True,
                             vertical_flip = True,
                             validation_split = 1 - train_size / full_train_size)

holdout_datagen = ImageDataGenerator(rescale=1./255.,
                             rotation_range = 10,
                             width_shift_range = 0.1,
                             height_shift_range = 0.1,
                             horizontal_flip = True,
                             vertical_flip = True)

# generator
train_gen = train_datagen.flow_from_dataframe(dataframe = train_full_df,        
                                        directory = train_dir,       
                                        x_col = 'index',               
                                        y_col = columns,                
                                        batch_size = batch_size,               
                                        seed = SEED,
                                        color_mode = "rgb",           
                                        class_mode = 'raw',
                                        target_size = (256, 256),       
                                        subset = 'training')

valid_gen = train_datagen.flow_from_dataframe(dataframe = train_full_df,        
                                        directory = train_dir,       
                                        x_col = 'index',               
                                        y_col = columns,                
                                        batch_size = batch_size,               
                                        seed = SEED,
                                        color_mode = "rgb",           
                                        class_mode = 'raw',
                                        target_size = (256, 256),       
                                        subset = 'validation')

holdout_gen = holdout_datagen.flow_from_dataframe(dataframe = holdout_df,        
                                        directory = train_dir,       
                                        x_col = 'index',               
                                        y_col = columns,                
                                        batch_size = batch_size,               
                                        seed = SEED,
                                        color_mode = "rgb",           
                                        class_mode = 'raw',
                                        target_size = (256, 256))

##  
# Model
> * seed 고정은 Cell마다 해줘야된다..
* keras.application의 pretrained 모델을 하나의 layer처럼 가져와 사용
* top layers는 따로 선언해준다. output 층은 레이블 수 만큼의 뉴런을 갖도록 한다. 
* RAdam 옵티마이저를 사용하며 warm restart를 하는 cosine decay 방식의 lr schedule를 적용한다.
* Lookahead 기법을 적용하여 수렴 품질을 개선한다.
##  

In [None]:
seed_everything(SEED)
model_save_path = "./model/effi_b5_0227.h5"

# base model
base_model = tf.keras.applications.EfficientNetB5(
    include_top=False,
    weights=None,
    input_shape=(256, 256, 3)
)
# architecture
effi_b5 = keras.Sequential([
                            base_model,
                            tf.keras.layers.GlobalAveragePooling2D(),
                            tf.keras.layers.Dropout(0.4),

                            tf.keras.layers.Dense(1024, kernel_initializer='he_normal'),
                            tf.keras.layers.BatchNormalization(),
                            tf.keras.layers.LeakyReLU(0.1),
                            tf.keras.layers.Dropout(0.3),

                            tf.keras.layers.Dense(512, kernel_initializer='he_normal'),
                            tf.keras.layers.BatchNormalization(),
                            tf.keras.layers.LeakyReLU(0.1),
                            tf.keras.layers.Dropout(0.2),

                            tf.keras.layers.Dense(26, kernel_initializer='glorot_normal', activation='sigmoid'),
]) 

# metrics
BAcc = keras.metrics.BinaryAccuracy(name='binary_accuracy')

# LR schedule - Cosine Annealing
n_epochs = 100
first_decay_steps = ((45000 // batch_size) * n_epochs) // 5
initial_learning_rate = 0.003

lr_decayed_fn = (
  tf.keras.experimental.CosineDecayRestarts(
      initial_learning_rate,
      first_decay_steps,
      t_mul=2.0,
      m_mul=0.95))

# optimizer
radam = tfa.optimizers.RectifiedAdam(learning_rate = lr_decayed_fn,
                                     weight_decay = 0.0001,
                                     warmup_proportion = 0,
                                     min_lr = 1e-6)
# lookahead
ranger = tfa.optimizers.Lookahead(radam, sync_period=6, slow_step_size=0.5)

# compile & callbacks
effi_b5.compile(optimizer = ranger, loss = "binary_crossentropy", metrics = [BAcc])
checkpoint = keras.callbacks.ModelCheckpoint(model_save_path, 
                                             save_best_only=True, verbose=1)
early_stop_cb = keras.callbacks.EarlyStopping(patience = 20, restore_best_weights = True)


# fitting
history = effi_b5.fit(train_gen, epochs = n_epochs, 
                      validation_data = valid_gen, 
                      callbacks = [checkpoint, early_stop_cb])

In [None]:
# check learning curve
import matplotlib.pyplot as plt
plt.style.use("seaborn-dark")

pd.DataFrame(history.history).plot(figsize = (16, 9))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

##  
# (pseudo) Transfer Learning
> * 위와 같이 학습한 모델에서 clf head는 버리고 몸통부분만 가져온다.
* 새로운 clf head를 정의하고 그대로 복사한 몸통부분 가중치는 freezing한다.
* clf head만 우선적으로 몇 에폭 학습시킨 후, 몸통부분의 동결을 해제하고 전체 모델에 대해 update 진행
##  

### STEP 1 - train only head

In [None]:
seed_everything(SEED)

# pretrain model load
pretrain = tf.keras.models.load_model(model_save_path)

# Body weights freezing
pretrain.layers[0].trainable = False

# archtecture
effi_transfer = keras.Sequential([
                                  pretrained.layers[0],
                                  tf.keras.layers.GlobalAveragePooling2D(), 
                                  tf.keras.layers.Dropout(0.3),

                                  tf.keras.layers.Dense(1024, kernel_initializer='he_normal'),
                                  tf.keras.layers.BatchNormalization(),
                                  tf.keras.layers.LeakyReLU(0.2),
                                  tf.keras.layers.Dropout(0.25),

                                  tf.keras.layers.Dense(26, kernel_initializer='glorot_normal', activation='sigmoid'),
])


# compile
effi_transfer.compile(optimizer = 'adam', loss = "binary_crossentropy", metrics = [BAcc])
checkpoint = keras.callbacks.ModelCheckpoint('./model/effi_head_tuning.h5', 
                                             save_best_only=True, verbose=1)

# fitting
effi_transfer.fit(train_gen, epochs = 5, 
                   validation_data = valid_gen, 
                   callbacks = [checkpoint])

### STEP 2 - fine tuning

In [None]:
seed_everything(SEED)

# melting
effi_transfer.layers[0].trainable = True

# compile - 가중치 동결 전 후 필히 compile을 다시 해줘야 함.
effi_transfer.compile(optimizer = ranger, loss = "binary_crossentropy", metrics = [BAcc])

# callbacks
checkpoint = keras.callbacks.ModelCheckpoint('./model/effi_head_change.h5', 
                                             save_best_only=True, verbose=1,
                                             mode = "min",monitor = "val_loss")
early_stop_cb = keras.callbacks.EarlyStopping(patience = 20, restore_best_weights = True)

# fitting
effi_transfer.fit(train_gen, epochs = n_epochs, 
                   validation_data = valid_gen, 
                   callbacks = [checkpoint, early_stop_cb])

##   
# Prediction
> * Test Time Augmentation을 통해 데이터의 변동에 대한 robustness 향상시키기
* Monte Carlo Dropout을 통해 드랍아웃 모델의 앙상블 실현
* for loop 두 개로 위의 두 가지 기법을 동시에 적용한다. (TTA 루프 안에서 MC dropout 루핑)
##   

In [None]:
# 데이터 준비
test_submit = pd.read_csv('sample_submission.csv')
test_df = test_submit.copy()
test_df['index'] = test_submit['index'].apply(lambda x: str("{:0>5d}".format(x))+'.png')

batch_size = 250

# TTA를 위해 train set과 동일한 augmentation을 적용한다.
test_gen = ImageDataGenerator(rescale=1./255.,
                              rotation_range = 10,
                              width_shift_range = 0.1,
                              height_shift_range = 0.1,
                              horizontal_flip = True,
                              vertical_flip = True,
                              fill_mode = "nearest")

# 제출을 위한 예측 데이터 프레임을 만들 때 순서가 유지되어야하므로
# shuffle을 False로 설정해줘야한다.
test_gen = test_gen.flow_from_dataframe(dataframe = test_df,        
                                        directory='./test_route/test_dirty_mnist_2nd',      
                                        x_col='index',                             
                                        batch_size = batch_size,               
                                        shuffle = False,                
                                        color_mode = "rgb",           
                                        class_mode=None,
                                        target_size=(256, 256))

In [None]:
seed_everything(SEED)

# load_model
model = keras.models.load_model("./model/effi_0301.h5")

# MC DO 
class MCDropout(keras.layers.Dropout):
    def call(self, inputs):
        return super().call(inputs, training=True)   # forcing training mode

# replace normal DO layers -> MC DO layers
mc_model = keras.models.Sequential([
    MCDropout(layer.rate) if isinstance(layer, keras.layers.Dropout) else layer
    for layer in model.layers
])


# compile
mc_model.compile(optimizer = ranger, loss = "binary_crossentropy", metrics = [BAcc])
mc_model.set_weights(model.get_weights())


# Setting for TTA and MCDO
tta_steps = 50
mc_steps = 50
predictions = [] 
test_size = len(test_df)
columns = list(test_df.columns[1:])
filename = "efficientB5.csv"


# double loop 
for i in range(tta_steps):
    print("*********** augment iter {} ***********".format(i))
    # 모델의 각 예측은 랜덤하게 드롭아웃된 서로다른 신경망들로부터 나옴
    mc_pred = [mc_model.predict_generator(generator = test_gen, 
                                          steps = test_size // batch_size,
                                          verbose = 1) for sample in range(mc_steps)]
    # 드롭아웃 앙상블
    mc_pred_means = np.mean(mc_pred, axis = 0)
    # 각 augmentation별로 예측 저장
    predictions.append(mc_pred_means)


# final prediction
pred = np.mean(predictions, axis=0) # augmentation에 대한 평균
pred_sub = pred.copy()

# get label
pred_sub = pred_sub.round()

# create DF
res = pd.DataFrame(pred_sub, columns = columns )
int_rest = res.astype(int)
submit = pd.concat([test_submit.iloc[:,0],int_rest], axis = 1)
submit.to_csv(filename, index = False)

# ETC.

In [None]:
# 모델 학습률 확인하기 
import keras.backend as K

print(K.eval(model.optimizer.lr))

In [None]:
# Read the training_labels.csv file and creating the instances
train_data = pd.read_csv('training_labels.csv')
Y = train_data[['label']]

kf = KFold(n_splits = 5)                       

# Create an instance of the ImageDataGenerator class
idg = ImageDataGenerator(width_shift_range=0.1,
                         height_shift_range=0.1,
                         zoom_range=0.3,
                         fill_mode='nearest',
                         horizontal_flip = True,
                         rescale=1./255)

# Auxiliary function for getting model name in each of the k iterations
def get_model_name(k):
    return 'model_'+str(k)+'.h5'


VALIDATION_ACCURACY = []
VALIDAITON_LOSS = []

save_dir = '/saved_models/'
fold_var = 1

for train_index, val_index in kf.split(np.zeros(n_samples),Y):
    training_data = train_data.iloc[train_index]
    validation_data = train_data.iloc[val_index]

    train_data_generator = idg.flow_from_dataframe(training_data, directory = image_dir,
                               x_col = "filename", y_col = "label",
                               class_mode = "categorical", shuffle = True)
    valid_data_generator  = idg.flow_from_dataframe(validation_data, directory = image_dir,
                            x_col = "filename", y_col = "label",
                            class_mode = "categorical", shuffle = True)

    # CREATE NEW MODEL
    model = create_new_model()
    # COMPILE NEW MODEL
    model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
    
    # CREATE CALLBACKS
    checkpoint = tf.keras.callbacks.ModelCheckpoint(save_dir+get_model_name(fold_var), 
                            monitor='val_accuracy', verbose=1, 
                            save_best_only=True, mode='max')
    callbacks_list = [checkpoint]

    history = model.fit(train_data_generator,
                epochs=num_epochs,
                callbacks=callbacks_list,
                validation_data=valid_data_generator)

    # LOAD BEST MODEL to evaluate the performance of the model
    model.load_weights("/saved_models/model_"+str(fold_var)+".h5")
    
    results = model.evaluate(valid_data_generator)
    results = dict(zip(model.metrics_names,results))

    VALIDATION_ACCURACY.append(results['accuracy'])
    VALIDATION_LOSS.append(results['loss'])

    tf.keras.backend.clear_session()

    fold_var += 1