# 3등 endgame님 <Resnet 변형 모델 + Inception v3 기반 모델>

## # import libraries

In [None]:
'''
import gc
gc.collect()
중간중간에 불필요한 메모리를 정리해 준다. 
(https://blog.naver.com/pica4star/221443758311)

import pickle
pickle은 프로그램상에서 사용하고 있는 데이터를 파일형태로 저장한다.
(https://blog.naver.com/wjdwngkdsla/221978274816)
(https://blog.naver.com/mania9899/221624931960)

import seaborn as sns
seaborn은 시각화 라이브러리이다. 
내가 파이참에서 model 구현시 이걸 써줘야 step이 아니라 epoch으로 모니터링되면서 진행됬다.
(https://blog.naver.com/tkdzma8080/221793003678)
'''

# 파일관리 및 파일선택
import os
import pickle
import random
import gc

# 시각화
import seaborn as sns
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

from sklearn.metrics import f1_score
from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import SeparableConv2D, Input, Conv2D, Add, BatchNormalization, concatenate, AveragePooling2D, add, MaxPooling2D, Conv2DTranspose, Activation, Dropout, ZeroPadding2D, LeakyReLU
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger


SEED = 30
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

## # evaluation metric 정의

In [None]:
def mae(y_true, y_pred) :
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    over_threshold = y_true >= 0.1

    return np.mean(np.abs(y_true[over_threshold] - y_pred[over_threshold]))

def fscore(y_true, y_pred):    
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    remove_NAs = y_true >= 0
    y_true = np.where(y_true[remove_NAs] >= 0.1, 1, 0)
    y_pred = np.where(y_pred[remove_NAs] >= 0.1, 1, 0)
    
    return(f1_score(y_true, y_pred))

def maeOverFscore(y_true, y_pred):
    return mae(y_true, y_pred) / (fscore(y_true, y_pred) + 1e-07)

## # 데이터 전처리

In [None]:
dir_train = 'data/train/'
dir_test = 'data/test/'
UPPER = 50

def make_dataset(dir_train, dir_test, UPPER):
    # train dataset
    train = []
    train_y = []

    for i in os.listdir(dir_train):
        npy = np.load(dir_train + i)

        # missing value 제거
        if npy[:, :, -1].sum() < 0:
            continue
        
        # 0.1이상 내린 픽셀이 UPPER 값 이상인 사진만
        if (npy[:, :, -1] >= 0.1).sum() >= UPPER:
            train.append(npy[:, :, :-1])
            train_y.append(npy[:, :, -1])

    train = np.array(train)
    train_y = np.array(train_y)
    
    '''
    여기까진 내가 대회에서 해오던 방식과 유사하다
    아래 구문은 RAM 용량을 효율적으로 사용하기 위해 train, train_y 리스트를 pickle로 저장후 list를 지우는 방법인듯하다.

    pickle.dump protocol

    파이썬 3.6을 쓴다면 프로토콜을 4를 써야 할 것 같다.
    프로토콜이 음수 또는 HIGHEST_PROTOCOL로 지정되면 사용 가능한 최고 프로토콜 버전이 사용됩니다.
    (https://ko.coder.work/so/python/73422)
    '''
    
    with open(f'data/train{UPPER}.pickle', 'wb') as f:
        pickle.dump(train, f, protocol=4)

    with open(f'data/train_y{UPPER}.pickle', 'wb') as f:
        pickle.dump(train_y, f, protocol=4)

    del train
    del train_y

    # test dataset
    test = []

    for i in os.listdir(dir_test):
        npy = np.load(dir_test + i)
        test.append(npy)
    test = np.array(test)
 
    with open('data/test.pickle', 'wb') as f:
        pickle.dump(test, f, protocol=4)
    del test
    
make_dataset(dir_train, dir_test, 50)

## # Data 불러오기

In [None]:
with open('data/train50.pickle', 'rb') as f:
    train = pickle.load(f)
    
# 0~9번채널만 사용
train = train[:, :, :, :10]

with open('data/train_y50.pickle', 'rb') as f:
    train_y = pickle.load(f)
train_y = train_y.reshape(train_y.shape[0], 40, 40, 1)


with open('data/test.pickle', 'rb') as f:
    TEST = pickle.load(f)
TEST = TEST[:, :, :, :10] 

## # 탐색적 자료분석 (Exploratory Data Analysis)

In [None]:
# v 채널과 h 채널의 이미지를 만들어주는 과정. 각 채널의 value.sum()으로 구한다.
def show_img(img):
    ch15_v = 0
    for i in [0,2,4,5,7]:
        ch15_v += img[:,:,i]
    ch15_h = 0
    for i in [1,3,6,8]:
        ch15_h += img[:,:,i]
    ch15_v = ch15_v.reshape(40,40,1)
    ch15_h = ch15_h.reshape(40,40,1)
    img = np.concatenate([img, ch15_v], -1)
    img = np.concatenate([img, ch15_h], -1)
    return img


# image_dir[random.randrange(len(image_dir))] 을 통해 image dir에 있는 사진 중 임의로 한 장을 선택한다.
image_dir = os.listdir('data/train/')
image_sample = np.load(f'data/train/{image_dir[random.randrange(len(image_dir))]}')
image_sample = show_img(image_sample)

color_map = plt.cm.get_cmap('RdBu')
color_map = color_map.reversed()
plt.style.use('fivethirtyeight')
plt.figure(figsize=(10, 10))

for i in range(9):
    plt.subplot(2,6,i+1)
    plt.imshow(image_sample[:, :, i], cmap=color_map)
    plt.title(f'ch_{i}', fontdict= {'fontsize': 16})

plt.subplot(2,6,10)
plt.imshow(image_sample[:,:,-3], cmap = color_map)
plt.title('rain', fontdict= {'fontsize': 16})

plt.subplot(2,6,11)
plt.imshow(image_sample[:,:,-2], cmap = color_map)
plt.title('v_sum', fontdict= {'fontsize': 16})

plt.subplot(2,6,12)
plt.imshow(image_sample[:,:,-1], cmap = color_map)
plt.title('h_sum', fontdict= {'fontsize': 16})

plt.subplots_adjust(top=0.5)
plt.show()


In [None]:
'''
0:'v1',1:'h1',2:'v2',3:'h2',4:'v3',5:'v4',6:'h4',7:'v5',8:'h5',9:'surface',10:'target'
correlation을 분석하기 위해서 데이터를 reshape해서 2차원 Dataframe을 만든다. (pandas의 corr() 함수를 이용)
각 columns는 'v1'~'target'까지 10개의 columns를 갖는다. value가 너무 크다고 예상되면 log를 취한다.

https://blog.naver.com/kiddwannabe/221763497317
https://blog.naver.com/wtracer/221738979637

'''

train2 = train.reshape(train.shape[0] * train.shape[1] * train.shape[2], train.shape[3])
train_y2 = train_y.reshape(train_y.shape[0] * train_y.shape[1] * train_y.shape[2], train_y.shape[3])
train_y2 = np.log(train_y2+1)
train2 = np.concatenate([train2, train_y2], -1)

df_corr = pd.DataFrame(train2).reset_index(drop=True)
del train2, train_y2

df_corr = df_corr.iloc[400::1600, :] # every other element, starting at index 400, 400부터 시작해서 1600만큼 건너뜀
df_corr = df_corr.reset_index(drop=True)
df_corr = df_corr.rename(columns={0:'v1',1:'h1',2:'v2',3:'h2',4:'v3',5:'v4',
                                  6:'h4',7:'v5',8:'h5',9:'surface',10:'target'})

In [None]:
'''
v, h 채널의 합 또는 차이가 도움을 줄 지 확인을 해봤습니다. 아주 조금 상관관계가 상승하는 것을 확인할 수 있었고,
두 피쳐간의 합과 차이는 45도 회전변환 시의 상관관계와 같다는 아이디어에서 착안, 각각 30도, 45, 60도 회전변환해보았습니다.
그 결과 v1-h1, v2-h2, v4-h4는 45도 회전변환시 아주 조금 상관관계가 증가하였고,
v5-h5는 30도 변환 시 상관관계가 매우 크게 증가하는 것을 확인하여 회전변환한 피쳐를 사용하였습니다.
'''

'''
df_corr['ch1_rot1'] = df_corr['v1'] * np.cos(np.pi / 4) + df_corr['h1'] * np.sin(np.pi / 4)
df_corr['ch1_rot2'] = df_corr['v1'] * np.cos(np.pi / 4) - df_corr['h1'] * np.sin(np.pi / 4)

df_corr['ch2_rot1'] = df_corr['v2'] * np.cos(np.pi / 4) + df_corr['h2'] * np.sin(np.pi / 4)
df_corr['ch2_rot2'] = df_corr['v2'] * np.cos(np.pi / 4) - df_corr['h2'] * np.sin(np.pi / 4)

df_corr['ch4_rot1'] = df_corr['v4'] * np.cos(np.pi / 4) + df_corr['h4'] * np.sin(np.pi / 4)
df_corr['ch4_rot2'] = df_corr['v4'] * np.cos(np.pi / 4) - df_corr['h4'] * np.sin(np.pi / 4)

df_corr['ch5_rot1'] = df_corr['v5'] * np.cos(np.pi / 6) + df_corr['h5'] * np.sin(np.pi / 6)
df_corr['ch5_rot2'] = df_corr['v5'] * np.cos(np.pi / 6) - df_corr['h5'] * np.sin(np.pi / 6)

df_corr.corr()['target']

del df_corr
gc.collect()

# 사실 좀 이해 안가는 내용
'''

## # feature engineering

In [None]:
# target 값 log 변환
train_y = np.log(train_y+1)


# v채널/h채널 sum한 feature 추가
def channel_sum(data):
    data_v = data[:, :, :, 0].copy() + data[:, :, :, 2].copy() + data[:, :, :, 4].copy() + data[:, :, :, 5].copy() +data[:, :, :, 7].copy()
    data_h = data[:, :, :, 1].copy() + data[:, :, :, 3].copy() + data[:, :, :, 6].copy() + data[:, :, :, 8].copy()

    data_v = data_v.reshape(data_v.shape[0], data_v.shape[1], data_v.shape[2], 1)
    data_h = data_h.reshape(data_h.shape[0], data_h.shape[1], data_h.shape[2], 1)

    data = np.concatenate([data, data_v.copy()], -1)
    data = np.concatenate([data, data_h.copy()], -1)

    return data

train = channel_sum(train)
TEST = channel_sum(TEST)


# 9번 채널(지표 타입:surface)만 min-max scaling
train[:, :, :, 9] = train[:, :, :, 9] / 322
TEST[:, :, :, 9] = TEST[:, :, :, 9] / 322

In [None]:
# v1-h1, v2-h2, v4-h4, v5-h5 채널을 cos과 sin으로 회전한 feature를 학습 input으로 사용 (1~8채널 그대로 사용하지 않는듯)
def rotation(data):
    v1_m_h1 = data[:, :, :, 0] * np.cos(np.pi / 4) - data[:, :, :, 1] * np.sin(np.pi / 4)
    v1_p_h1 = data[:, :, :, 0] * np.cos(np.pi / 4) + data[:, :, :, 1] * np.sin(np.pi / 4)
    data[:, :, :, 0] = v1_m_h1
    data[:, :, :, 1] = v1_p_h1
    del v1_m_h1
    del v1_p_h1

    v2_m_h2 = data[:, :, :, 2] * np.cos(np.pi / 4) - data[:, :, :, 3] * np.sin(np.pi / 4)
    v2_p_h2 = data[:, :, :, 2] * np.cos(np.pi / 4) + data[:, :, :, 3] * np.sin(np.pi / 4)
    data[:, :, :, 2] = v2_m_h2
    data[:, :, :, 3] = v2_p_h2
    del v2_m_h2
    del v2_p_h2

    v4_p_h4_30 = data[:, :, :, 5] * np.cos(np.pi / 4) + data[:, :, :, 6] * np.sin(np.pi / 4)
    v4_m_h4_30 = data[:, :, :, 5] * np.cos(np.pi / 4) - data[:, :, :, 6] * np.sin(np.pi / 4)
    data[:, :, :, 5] = v4_p_h4_30
    data[:, :, :, 6] = v4_m_h4_30
    del v4_p_h4_30
    del v4_m_h4_30

    v5_p_h5_30 = data[:, :, :, 7] * np.cos(np.pi / 6) + data[:, :, :, 8] * np.sin(np.pi / 6)
    v5_m_h5_30 = data[:, :, :, 7] * np.cos(np.pi / 6) - data[:, :, :, 8] * np.sin(np.pi / 6)
    data[:, :, :, 7] = v5_p_h5_30
    data[:, :, :, 8] = v5_m_h5_30
    del v5_p_h5_30
    del v5_m_h5_30

    return data

train = rotation(train)
TEST = rotation(TEST)

## # model training & evaluation

### resnet

In [None]:
# resnet
def resnet_model(shape):
    inputs = Input(shape)

    bn = BatchNormalization()(inputs)
    conv0 = Conv2D(256, kernel_size=1, strides=1, padding='same',
                   activation='relu', kernel_initializer='he_normal')(bn)

    bn = BatchNormalization()(conv0)
    conv = Conv2D(128, kernel_size=2, strides=1, padding='same',
                  activation='relu', kernel_initializer='he_normal')(bn)
    concat = concatenate([conv0, conv], axis=3)

    bn = BatchNormalization()(concat)
    conv = Conv2D(64, kernel_size=3, strides=1, padding='same',
                  activation='relu', kernel_initializer='he_normal')(bn)
    concat = concatenate([concat, conv], axis=3)

    # 5에서 9로 증가
    for i in range(9):
        bn = BatchNormalization()(concat)
        conv = Conv2D(32, kernel_size=3, strides=1, padding='same',
                      activation='relu', kernel_initializer='he_normal')(bn)
        concat = concatenate([concat, conv], axis=3)

    bn = BatchNormalization()(concat)
    outputs = Conv2D(1, kernel_size=1, strides=1, padding='same',
                     activation='relu', kernel_initializer='he_normal')(bn)

    model = Model(inputs=inputs, outputs=outputs)

    return model

In [None]:
# augmentation

train = np.concatenate([train, train_y], -1)

train1 = np.rot90(train, 1, (1,2))
train2 = np.rot90(train, 2, (1,2))
train3 = np.rot90(train, 3, (1,2))
train_lr = np.fliplr(train)
train_ud = np.flipud(train)

train = np.vstack([train, train1])
del train1

train = np.vstack([train, train2])
del train2

train = np.vstack([train, train3])
del train3

train = np.vstack([train, train_lr])
del train_lr

train = np.vstack([train, train_ud])
del train_ud

train_y = train[:, :, :, -1].copy()
train_y = train_y.reshape(train_y.shape[0], train_y.shape[1], train_y.shape[2], 1)
train = train[:,:,:,:-1]

In [None]:
# training and evaluation

from sklearn.model_selection import train_test_split
train, test, train_y, test_y = train_test_split(train, train_y, test_size=0.025, random_state=SEED)

model_number = 0
history = []
scores = []

# 많은 데이터 셋으로 학습시키기 위해 FOLD를 100으로 설정했습니다. Fold 1 중간에 Stop시켰기에 break 조건을 넣어놨습니다.
FOLD = 100
k_fold = KFold(n_splits=FOLD, shuffle=True, random_state=SEED)

for train_idx, val_idx in k_fold.split(train, train_y):
    x_train, y_train = train[train_idx], train_y[train_idx]
    x_val, y_val = train[val_idx], train_y[val_idx]

    model = resnet_model(train.shape[1:])
    model.summary()
    model.compile(loss="mae", optimizer="adam", metrics=["mae"])


    es = EarlyStopping(patience=9, verbose=1)
    mc = ModelCheckpoint(f'model1_best_{model_number}.h5', save_best_only=True, verbose=1)
    rlp = ReduceLROnPlateau(monitor='val_loss', patience=4, factor=0.8, min_lr=0.0001)
    csv_logger = CSVLogger(f'training_{model_number}.csv')

    model.fit(x_train, y_train, epochs = 53, validation_data=(x_val, y_val), verbose=1, batch_size = 64, callbacks = [es, mc, rlp, csv_logger])
    
    # 에폭 53번까지 돌리다가 실수로 중단시켜
    break
    
# 14번 더 돌렸습니다. (mae가 개선되지 않아 14번 돌리다가 도중에 중단시켰습니다.)
model.fit(x_train, y_train, epochs = 14, validation_data=(x_val, y_val), verbose=1, batch_size = 64, callbacks = [es, mc, rlp, csv_logger])
model.load_weights(f'model1_best_{model_number}.h5')
res = model.predict(TEST)
result = (np.exp(res)-1)
submission = pd.read_csv('data/sample_submission.csv')
submission.iloc[:,1:] = result.reshape(-1, 1600)
submission.to_csv('model1_resnet.csv', index = False)

### inception

In [None]:
# inception
# https://norman3.github.io/papers/docs/google_inception.html
# 인셉션 v3 모델 앞 부분에서 착안하여 모델을 만들어 보았습니다.

In [None]:
def inception(shape_, LOOP):
    
    input_ = Input(shape=shape_)
    activation_ = 'relu'
    
    bn = BatchNormalization()(input_)
    conv0 = Conv2D(256, kernel_size=1, strides=1, padding='same',
                   activation=activation_, kernel_initializer='he_normal')(bn)
    bn = BatchNormalization()(conv0)
    conv = Conv2D(128, kernel_size=2, strides=1, padding='same',
                  activation=activation_, kernel_initializer='he_normal')(bn)
    concat = concatenate([conv0, conv], axis=3)

    bn = BatchNormalization()(concat)
    conv = Conv2D(64, kernel_size=3, strides=1, padding='same',
                  activation=activation_, kernel_initializer='he_normal')(bn)
    concat = concatenate([concat, conv], axis=3)
    
    for i in range(LOOP):
        bn = BatchNormalization()(concat)
        x_1 = Conv2D(32, 1, padding='same', activation=activation_)(bn)

        x_2 = Conv2D(32, 1, padding='same', activation=activation_)(bn)
        x_2 = Conv2D(32, 3, padding='same', activation=activation_)(x_2)

        x_3 = Conv2D(32, 1, padding='same', activation=activation_)(bn)
        x_3 = Conv2D(32, 3, padding='same', activation=activation_)(x_3)
        x_3 = Conv2D(32, 3, padding='same', activation=activation_)(x_3)

        x_4 = AveragePooling2D(
            pool_size=(3, 3), strides=1, padding='same')(bn)
        x_4 = Conv2D(32, 1, padding='same', activation=activation_)(x_4)

        concat = concatenate([x_1, x_2, x_3, x_4])
    
    bn = BatchNormalization()(concat)

    outputs = Conv2D(1, kernel_size=1, strides=1, padding='same',
                     activation=activation_, kernel_initializer='he_normal')(bn)

    model = Model(inputs=input_, outputs=outputs)

    return model

In [None]:
model_number = 0
history = []
scores = []

# 위에서 설명했듯이 많은 데이터 셋으로 학습시키기 위해 FOLD를 100으로 설정했습니다. Fold 1 중간에 Stop시켰기에 break 조건을 넣어놨습니다.
FOLD = 100
k_fold = KFold(n_splits=FOLD, shuffle=True, random_state=SEED)

for train_idx, val_idx in k_fold.split(train, train_y):
    x_train, y_train = train[train_idx], train_y[train_idx]
    x_val, y_val = train[val_idx], train_y[val_idx]

    model = inception(train.shape[1:] , 5)
    model.summary()
    model.compile(loss="mae", optimizer="adam", metrics=["mae"])
    
    es = EarlyStopping(patience=9, verbose=1)
    mc = ModelCheckpoint(f'model2_best_{model_number}.h5', save_best_only=True, verbose=1)
    rlp = ReduceLROnPlateau(monitor='val_loss', patience=4, factor=0.8, min_lr=0.0001)
    
    model.fit(x_train, y_train, epochs = 85, validation_data=(x_val, y_val), verbose=1, batch_size = 64, callbacks = [es, mc, rlp])
    model.load_weights(f'model2_best_{model_number}.h5')
    res = model.predict(TEST)

    break
    
result = (np.exp(res)-1)
submission = pd.read_csv('data/sample_submission.csv')
submission.iloc[:,1:] = result.reshape(-1, 1600)
submission.to_csv('model2_inception.csv', index = False)