## [Dacon] AI프렌즈 시즌2 강수량 산출 경진대회
## 팀: endgame
## 2020년 6월 1일 (제출날짜)

[requirement.txt 다운로드 하기](https://drive.google.com//uc?export=download&id=1_aCOQJsMJk2PzLxCUMOgDRnyX7YL98wY)

## <div style="color:red">README</div>
- 외부데이터 및 pretrained 모델을 사용하지 않았습니다.

- 하드웨어 리소스가 많이 소요되는 코드입니다. 제 컴퓨터의 램이 128GB라서 모든 데이터를 램에 올려놓고 작업을 했습니다.
- 저 같은 경우, EDA, 모델학습을 각각 다른 ipynb 파일에서 작업을 진행했는데, 제출용 파일이다보니 모든 코드를 한 곳에 모아 실행하기에 메모리가 부족할 가능성이 커질 것 같습니다.
- train.zip, test.zip 파일은 각각 data/train, data/test 폴더에 압축을 해제해주세요.
- sample_submission.csv는 data 폴더에 위치시켜 주세요.
- GPU: RTX 2070 super 기준 모델 1: 10시간, 모델2: 10시간, 모델3: 14시간 정도 소요 되었습니다.
- 제가 학습시킨 weight를 로드하고 싶으시다면, model1.h5, model2.h5, model3.h5를 ipynb 파일이 있는 곳에 위치시켜 주세요.

<div style="color:red">혹시 위의 글을 안 읽으셨다면 꼭 읽어주세요!</div>

## 1. 라이브러리 및 데이터 (Library & Data)

In [None]:
# 파일관리 및 파일선택

import os
import pickle
import random
import gc

# 시각화
import seaborn as sns
import matplotlib.pyplot as plt


import numpy as np
import pandas as pd

from sklearn.metrics import f1_score
from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import SeparableConv2D, Input, Conv2D, Add, BatchNormalization, concatenate, AveragePooling2D, add, MaxPooling2D, Conv2DTranspose, Activation, Dropout, ZeroPadding2D, LeakyReLU
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger


SEED = 30
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
def mae(y_true, y_pred) :
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    over_threshold = y_true >= 0.1

    return np.mean(np.abs(y_true[over_threshold] - y_pred[over_threshold]))

def fscore(y_true, y_pred):    
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    y_true = y_true.reshape(1, -1)[0]
    y_pred = y_pred.reshape(1, -1)[0]
    remove_NAs = y_true >= 0
    y_true = np.where(y_true[remove_NAs] >= 0.1, 1, 0)
    y_pred = np.where(y_pred[remove_NAs] >= 0.1, 1, 0)
    
    return(f1_score(y_true, y_pred))

def maeOverFscore(y_true, y_pred):
    
    return mae(y_true, y_pred) / (fscore(y_true, y_pred) + 1e-07)

## 2. 데이터 전처리 (Data Cleansing & Pre-Processing)

### 2.1 데이터셋을 만들고 pickle로 저장
- -9999와 같은 missing value가 들어있으면 제거
- 0.1 이상 내린 픽셀이 UPPER 값 이상인 사진만 데이터셋에 저장

In [None]:
dir_train = 'data/train/'
dir_test = 'data/test/'
UPPER = 50

def make_dataset(dir_train, dir_test, UPPER):
    # train dataset
    train = []
    train_y = []

    for i in os.listdir(dir_train):
        npy = np.load(dir_train + i)

        # missing value 제거
        if npy[:, :, -1].sum() < 0:
            continue
        
        # 0.1이상 내린 픽셀이 UPPER 값 이상인 사진만
        if (npy[:, :, -1] >= 0.1).sum() >= UPPER:
            train.append(npy[:, :, :-1])
            train_y.append(npy[:, :, -1])

    train = np.array(train)
    train_y = np.array(train_y)

    with open(f'data/train{UPPER}.pickle', 'wb') as f:
        pickle.dump(train, f, protocol=4)

    with open(f'data/train_y{UPPER}.pickle', 'wb') as f:
        pickle.dump(train_y, f, protocol=4)

    del train
    del train_y

    # test dataset
    test = []

    for i in os.listdir(dir_test):
        npy = np.load(dir_test + i)
        test.append(npy)
    test = np.array(test)

    with open('data/test.pickle', 'wb') as f:
        pickle.dump(test, f, protocol=4)
    del test
    
make_dataset(dir_train, dir_test, 50)

### 2.2 pickle 파일 로드

In [None]:
with open('data/train50.pickle', 'rb') as f:
    train = pickle.load(f)
    
# 0~9번채널만 사용
train = train[:, :, :, :10]

with open('data/train_y50.pickle', 'rb') as f:
    train_y = pickle.load(f)
train_y = train_y.reshape(train_y.shape[0], 40, 40, 1)


with open('data/test.pickle', 'rb') as f:
    TEST = pickle.load(f)
TEST = TEST[:, :, :, :10] 

## 3. 탐색적 자료분석 (Exploratory Data Analysis)

### 3.1 시각화를 이용한 EDA
- v별, h별 합계 피쳐를 만들고 강수량과의 관계를 시각적으로 파악해봤습니다.

In [None]:
def show_img(img):
    ch15_v = 0
    for i in [0,2,4,5,7]:
        ch15_v += img[:,:,i]
    ch15_h = 0
    for i in [1,3,6,8]:
        ch15_h += img[:,:,i]
    ch15_v = ch15_v.reshape(40,40,1)
    ch15_h = ch15_h.reshape(40,40,1)
    img = np.concatenate([img, ch15_v], -1)
    img = np.concatenate([img, ch15_h], -1)
    return img


image_dir = os.listdir('data/train/')
image_sample = np.load(f'data/train/{image_dir[random.randrange(len(image_dir))]}')
image_sample = show_img(image_sample)

color_map = plt.cm.get_cmap('RdBu')
color_map = color_map.reversed()
plt.style.use('fivethirtyeight')
plt.figure(figsize=(10, 10))

for i in range(9):
    plt.subplot(2,6,i+1)
    plt.imshow(image_sample[:, :, i], cmap=color_map)
    plt.title(f'ch_{i}', fontdict= {'fontsize': 16})

plt.subplot(2,6,10)
plt.imshow(image_sample[:,:,-3], cmap = color_map)
plt.title('rain', fontdict= {'fontsize': 16})

plt.subplot(2,6,11)
plt.imshow(image_sample[:,:,-2], cmap = color_map)
plt.title('v_sum', fontdict= {'fontsize': 16})

plt.subplot(2,6,12)
plt.imshow(image_sample[:,:,-1], cmap = color_map)
plt.title('h_sum', fontdict= {'fontsize': 16})

plt.subplots_adjust(top=0.5)
plt.show()

### 3.2 상관관계를 이용한 EDA

In [None]:
train2 = train.reshape(train.shape[0] * train.shape[1] * train.shape[2], train.shape[3])
train_y2 = train_y.reshape(train_y.shape[0] * train_y.shape[1] * train_y.shape[2], train_y.shape[3])
train_y2 = np.log(train_y2+1)
train2 = np.concatenate([train2, train_y2], -1)

df_corr = pd.DataFrame(train2).reset_index(drop=True)
del train2, train_y2
df_corr = df_corr.iloc[400::1600, :]
df_corr = df_corr.reset_index(drop=True)
df_corr = df_corr.rename(columns={0:'v1',1:'h1',2:'v2',3:'h2',4:'v3',5:'v4',
                                  6:'h4',7:'v5',8:'h5',9:'surface',10:'target'})

v, h 채널의 합 또는 차이가 도움을 줄 지 확인을 해봤습니다. 아주 조금 상관관계가 상승하는 것을 확인할 수 있었고,  
두 피쳐간의 합과 차이는 45도 회전변환 시의 상관관계와 같다는 아이디어에서 착안, 각각 30도, 45, 60도 회전변환해보았습니다.  
그 결과 v1-h1, v2-h2, v4-h4는 45도 회전변환시 아주 조금 상관관계가 증가하였고,  
v5-h5는 30도 변환 시 상관관계가 매우 크게 증가하는 것을 확인하여 회전변환한 피쳐를 사용하였습니다.

In [None]:
df_corr

In [None]:
# df_corr['v1_p_h1_30'] = df_corr['v1'] * np.cos(np.pi / 6) + df_corr['h1'] * np.sin(np.pi / 6)
# df_corr['v1_m_h1_30'] = df_corr['v1'] * np.cos(np.pi / 6) - df_corr['h1'] * np.sin(np.pi / 6)
df_corr['ch1_rot1'] = df_corr['v1'] * np.cos(np.pi / 4) + df_corr['h1'] * np.sin(np.pi / 4)
df_corr['ch1_rot2'] = df_corr['v1'] * np.cos(np.pi / 4) - df_corr['h1'] * np.sin(np.pi / 4)
# df_corr['v1_p_h1_60'] = df_corr['v1'] * np.cos(np.pi / 3) + df_corr['h1'] * np.sin(np.pi / 3)
# df_corr['v1_m_h1_60'] = df_corr['v1'] * np.cos(np.pi / 3) - df_corr['h1'] * np.sin(np.pi / 3)



# df_corr['v2_p_h2_30'] = df_corr['v2'] * np.cos(np.pi / 6) + df_corr['h2'] * np.sin(np.pi / 6)
# df_corr['v2_m_h2_30'] = df_corr['v2'] * np.cos(np.pi / 6) - df_corr['h2'] * np.sin(np.pi / 6)
df_corr['ch2_rot1'] = df_corr['v2'] * np.cos(np.pi / 4) + df_corr['h2'] * np.sin(np.pi / 4)
df_corr['ch2_rot2'] = df_corr['v2'] * np.cos(np.pi / 4) - df_corr['h2'] * np.sin(np.pi / 4)
# df_corr['v2_p_h2_60'] = df_corr['v2'] * np.cos(np.pi / 3) + df_corr['h2'] * np.sin(np.pi / 3)
# df_corr['v2_m_h2_60'] = df_corr['v2'] * np.cos(np.pi / 3) - df_corr['h2'] * np.sin(np.pi / 3)


# df_corr['v4_p_h4_30'] = df_corr['v4'] * np.cos(np.pi / 6) + df_corr['h4'] * np.sin(np.pi / 6)
# df_corr['v4_m_h4_30'] = df_corr['v4'] * np.cos(np.pi / 6) - df_corr['h4'] * np.sin(np.pi / 6)
df_corr['ch4_rot1'] = df_corr['v4'] * np.cos(np.pi / 4) + df_corr['h4'] * np.sin(np.pi / 4)
df_corr['ch4_rot2'] = df_corr['v4'] * np.cos(np.pi / 4) - df_corr['h4'] * np.sin(np.pi / 4)
# df_corr['v4_p_h4_60'] = df_corr['v4'] * np.cos(np.pi / 3) + df_corr['h4'] * np.sin(np.pi / 3)
# df_corr['v4_m_h4_60'] = df_corr['v4'] * np.cos(np.pi / 3) - df_corr['h4'] * np.sin(np.pi / 3)


df_corr['ch5_rot1'] = df_corr['v5'] * np.cos(np.pi / 6) + df_corr['h5'] * np.sin(np.pi / 6)
df_corr['ch5_rot2'] = df_corr['v5'] * np.cos(np.pi / 6) - df_corr['h5'] * np.sin(np.pi / 6)
# df_corr['v5_p_h5_45'] = df_corr['v5'] * np.cos(np.pi / 4) + df_corr['h5'] * np.sin(np.pi / 4)
# df_corr['v5_m_h5_45'] = df_corr['v5'] * np.cos(np.pi / 4) - df_corr['h5'] * np.sin(np.pi / 4)
# df_corr['v5_p_h5_60'] = df_corr['v5'] * np.cos(np.pi / 3) + df_corr['h5'] * np.sin(np.pi / 3)
# df_corr['v5_m_h5_60'] = df_corr['v5'] * np.cos(np.pi / 3) - df_corr['h5'] * np.sin(np.pi / 3)

In [None]:
df_corr.corr()['target']

In [None]:
del df_corr
gc.collect()

## 4. 변수 선택 및 모델 구축 (Feature Engineering & Initial Modeling)

### 4.1 target 값 로그 변환

In [None]:
train_y = np.log(train_y+1)    

### 4.2 vertical, horizontal 별로 Sum한 피쳐 추가

In [None]:
def channel_sum(data):
    data_v = data[:, :, :, 0].copy() + data[:, :, :, 2].copy() + data[:, :, :, 4].copy() + data[:, :, :, 5].copy() +data[:, :, :, 7].copy()
    data_h = data[:, :, :, 1].copy() + data[:, :, :, 3].copy() + data[:, :, :, 6].copy() + data[:, :, :, 8].copy()

    data_v = data_v.reshape(data_v.shape[0], data_v.shape[1], data_v.shape[2], 1)
    data_h = data_h.reshape(data_h.shape[0], data_h.shape[1], data_h.shape[2], 1)

    data = np.concatenate([data, data_v.copy()], -1)
    data = np.concatenate([data, data_h.copy()], -1)

    return data

In [None]:
train = channel_sum(train)
TEST = channel_sum(TEST)

### 4.3 9번 채널 min-max scaling

In [None]:
train[:, :, :, 9] = train[:, :, :, 9] / 322
TEST[:, :, :, 9] = TEST[:, :, :, 9] / 322

### 4.4 회전변환

In [None]:
def rotation(data):
    v1_m_h1 = data[:, :, :, 0] * np.cos(np.pi / 4) - data[:, :, :, 1] * np.sin(np.pi / 4)
    v1_p_h1 = data[:, :, :, 0] * np.cos(np.pi / 4) + data[:, :, :, 1] * np.sin(np.pi / 4)
    data[:, :, :, 0] = v1_m_h1
    data[:, :, :, 1] = v1_p_h1
    del v1_m_h1
    del v1_p_h1

    v2_m_h2 = data[:, :, :, 2] * np.cos(np.pi / 4) - data[:, :, :, 3] * np.sin(np.pi / 4)
    v2_p_h2 = data[:, :, :, 2] * np.cos(np.pi / 4) + data[:, :, :, 3] * np.sin(np.pi / 4)
    data[:, :, :, 2] = v2_m_h2
    data[:, :, :, 3] = v2_p_h2
    del v2_m_h2
    del v2_p_h2

    v4_p_h4_30 = data[:, :, :, 5] * np.cos(np.pi / 4) + data[:, :, :, 6] * np.sin(np.pi / 4)
    v4_m_h4_30 = data[:, :, :, 5] * np.cos(np.pi / 4) - data[:, :, :, 6] * np.sin(np.pi / 4)
    data[:, :, :, 5] = v4_p_h4_30
    data[:, :, :, 6] = v4_m_h4_30

    v5_p_h5_30 = data[:, :, :, 7] * np.cos(np.pi / 6) + data[:, :, :, 8] * np.sin(np.pi / 6)
    v5_m_h5_30 = data[:, :, :, 7] * np.cos(np.pi / 6) - data[:, :, :, 8] * np.sin(np.pi / 6)
    data[:, :, :, 7] = v5_p_h5_30
    data[:, :, :, 8] = v5_m_h5_30

    del v4_p_h4_30
    del v4_m_h4_30
    del v5_p_h5_30
    del v5_m_h5_30

    return data

In [None]:
train = rotation(train)
TEST = rotation(TEST)

## 5. 모델 학습 및 검증 (Model Tuning & Evaluation)

### 5.1 모델 1
- https://dacon.io/competitions/official/235591/codeshare/1110
- GoldBar님 코드에서 루프문을 증가시켰습니다.
- 레이어를 깊게 쌓아도 오버피팅 문제가 발생하지 않아 시간을 고려하여 9까지 증가시켰습니다.

### 5.1.1 모델 정의

In [None]:
def resnet_model(shape):
    inputs = Input(shape)

    bn = BatchNormalization()(inputs)
    conv0 = Conv2D(256, kernel_size=1, strides=1, padding='same',
                   activation='relu', kernel_initializer='he_normal')(bn)

    bn = BatchNormalization()(conv0)
    conv = Conv2D(128, kernel_size=2, strides=1, padding='same',
                  activation='relu', kernel_initializer='he_normal')(bn)
    concat = concatenate([conv0, conv], axis=3)

    bn = BatchNormalization()(concat)
    conv = Conv2D(64, kernel_size=3, strides=1, padding='same',
                  activation='relu', kernel_initializer='he_normal')(bn)
    concat = concatenate([concat, conv], axis=3)

    # 5에서 9로 증가
    for i in range(9):
        bn = BatchNormalization()(concat)
        conv = Conv2D(32, kernel_size=3, strides=1, padding='same',
                      activation='relu', kernel_initializer='he_normal')(bn)
        concat = concatenate([concat, conv], axis=3)

    bn = BatchNormalization()(concat)
    outputs = Conv2D(1, kernel_size=1, strides=1, padding='same',
                     activation='relu', kernel_initializer='he_normal')(bn)

    model = Model(inputs=inputs, outputs=outputs)

    return model

### 5.1.2 augmentation

In [None]:
train = np.concatenate([train, train_y], -1)

train1 = np.rot90(train, 1, (1,2))
train2 = np.rot90(train, 2, (1,2))
train3 = np.rot90(train, 3, (1,2))
train_lr = np.fliplr(train)
train_ud = np.flipud(train)

train = np.vstack([train, train1])
del train1

train = np.vstack([train, train2])
del train2

train = np.vstack([train, train3])
del train3

train = np.vstack([train, train_lr])
del train_lr

train = np.vstack([train, train_ud])
del train_ud

train_y = train[:, :, :, -1].copy()
train_y = train_y.reshape(train_y.shape[0], train_y.shape[1], train_y.shape[2], 1)
train = train[:,:,:,:-1]

### 5.1.3 K-fold

In [None]:
# augmentation 이후, 제가 직접 학습시킨 모델을 로드하면 빠르게 submission을 확인할 수 있습니다.

# model = resnet_model(train.shape[1:])
# model.compile(loss="mae", optimizer="adam", metrics=["mae"])
# model.load_weights('model1.h5')
# res = model.predict(TEST)
# result = (np.exp(res)-1) / 1
# submission = pd.read_csv('data/sample_submission.csv')
# submission.iloc[:,1:] = result.reshape(-1, 1600)
# submission.to_csv('model1_resnet.csv', index = False)

In [None]:
from sklearn.model_selection import train_test_split
train, test, train_y, test_y = train_test_split(train, train_y, test_size=0.025, random_state=SEED)

In [None]:
model_number = 0
history = []
scores = []

# 많은 데이터 셋으로 학습시키기 위해 FOLD를 100으로 설정했습니다. Fold 1 중간에 Stop시켰기에 break 조건을 넣어놨습니다.
FOLD = 100
k_fold = KFold(n_splits=FOLD, shuffle=True, random_state=SEED)

for train_idx, val_idx in k_fold.split(train, train_y):
    x_train, y_train = train[train_idx], train_y[train_idx]
    x_val, y_val = train[val_idx], train_y[val_idx]

    model = resnet_model(train.shape[1:])
    model.summary()
    model.compile(loss="mae", optimizer="adam", metrics=["mae"])


    es = EarlyStopping(patience=9, verbose=1)
    mc = ModelCheckpoint(f'model1_best_{model_number}.h5', save_best_only=True, verbose=1)
    rlp = ReduceLROnPlateau(monitor='val_loss', patience=4, factor=0.8, min_lr=0.0001)
    csv_logger = CSVLogger(f'training_{model_number}.csv')

    model.fit(x_train, y_train, epochs = 53, validation_data=(x_val, y_val), verbose=1, batch_size = 64, callbacks = [es, mc, rlp, csv_logger])
    
    # 에폭 53번까지 돌리다가 실수로 중단시켜
    break
    
# 14번 더 돌렸습니다. (mae가 개선되지 않아 14번 돌리다가 도중에 중단시켰습니다.)
model.fit(x_train, y_train, epochs = 14, validation_data=(x_val, y_val), verbose=1, batch_size = 64, callbacks = [es, mc, rlp, csv_logger])
model.load_weights(f'model1_best_{model_number}.h5')
res = model.predict(TEST)
result = (np.exp(res)-1)
submission = pd.read_csv('data/sample_submission.csv')
submission.iloc[:,1:] = result.reshape(-1, 1600)
submission.to_csv('model1_resnet.csv', index = False)

### 5.2 모델 2 - inception

- https://norman3.github.io/papers/docs/google_inception.html  
인셉션 v3 모델 앞 부분에서 착안하여 모델을 만들어 보았습니다.

![](imgs/inception.jpg)

### 5.2.1 모델 정의

In [None]:
def inception(shape_, LOOP):
    
    input_ = Input(shape=shape_)
    activation_ = 'relu'
    
    bn = BatchNormalization()(input_)
    conv0 = Conv2D(256, kernel_size=1, strides=1, padding='same',
                   activation=activation_, kernel_initializer='he_normal')(bn)
    bn = BatchNormalization()(conv0)
    conv = Conv2D(128, kernel_size=2, strides=1, padding='same',
                  activation=activation_, kernel_initializer='he_normal')(bn)
    concat = concatenate([conv0, conv], axis=3)

    bn = BatchNormalization()(concat)
    conv = Conv2D(64, kernel_size=3, strides=1, padding='same',
                  activation=activation_, kernel_initializer='he_normal')(bn)
    concat = concatenate([concat, conv], axis=3)
    
    for i in range(LOOP):
        bn = BatchNormalization()(concat)
        x_1 = Conv2D(32, 1, padding='same', activation=activation_)(bn)

        x_2 = Conv2D(32, 1, padding='same', activation=activation_)(bn)
        x_2 = Conv2D(32, 3, padding='same', activation=activation_)(x_2)

        x_3 = Conv2D(32, 1, padding='same', activation=activation_)(bn)
        x_3 = Conv2D(32, 3, padding='same', activation=activation_)(x_3)
        x_3 = Conv2D(32, 3, padding='same', activation=activation_)(x_3)

        x_4 = AveragePooling2D(
            pool_size=(3, 3), strides=1, padding='same')(bn)
        x_4 = Conv2D(32, 1, padding='same', activation=activation_)(x_4)

        concat = concatenate([x_1, x_2, x_3, x_4])
    
    bn = BatchNormalization()(concat)

    outputs = Conv2D(1, kernel_size=1, strides=1, padding='same',
                     activation=activation_, kernel_initializer='he_normal')(bn)

    model = Model(inputs=input_, outputs=outputs)

    return model

### 5.2.2 augmentation (위와 동일하기에 생략)

In [None]:
# augmentation 이후, 제가 직접 학습시킨 모델을 로드하면 빠르게 submission을 확인할 수 있습니다.
# model = inception(train.shape[1:] , 5) 
# model.compile(loss="mae", optimizer="adam", metrics=["mae"])
# model.load_weights('model2.h5')
# res = model.predict(TEST)
# result = (np.exp(res)-1) / 1
# submission = pd.read_csv('data/sample_submission.csv')
# submission.iloc[:,1:] = result.reshape(-1, 1600)
# submission.to_csv('model2_inception.csv', index = False)

### 5.2.3 K-fold

In [None]:
model_number = 0
history = []
scores = []

# 위에서 설명했듯이 많은 데이터 셋으로 학습시키기 위해 FOLD를 100으로 설정했습니다. Fold 1 중간에 Stop시켰기에 break 조건을 넣어놨습니다.
FOLD = 100
k_fold = KFold(n_splits=FOLD, shuffle=True, random_state=SEED)

for train_idx, val_idx in k_fold.split(train, train_y):
    x_train, y_train = train[train_idx], train_y[train_idx]
    x_val, y_val = train[val_idx], train_y[val_idx]

    model = inception(train.shape[1:] , 5)
    model.summary()
    model.compile(loss="mae", optimizer="adam", metrics=["mae"])
    
    es = EarlyStopping(patience=9, verbose=1)
    mc = ModelCheckpoint(f'model2_best_{model_number}.h5', save_best_only=True, verbose=1)
    rlp = ReduceLROnPlateau(monitor='val_loss', patience=4, factor=0.8, min_lr=0.0001)
    
    model.fit(x_train, y_train, epochs = 85, validation_data=(x_val, y_val), verbose=1, batch_size = 64, callbacks = [es, mc, rlp])
    model.load_weights(f'model2_best_{model_number}.h5')
    res = model.predict(TEST)

    break
    
result = (np.exp(res)-1)
submission = pd.read_csv('data/sample_submission.csv')
submission.iloc[:,1:] = result.reshape(-1, 1600)
submission.to_csv('model2_inception.csv', index = False)

### 5.3 모델 3 - deep inception

### 5.3.1 augmentation

대회 마지막 날인만큼 데이터 augmentation을 증가시켰고, layer 깊이도 증가시켰습니다.  
앙상블을 제외한 단일 모델로서 가장 결과가 좋았습니다.  
augmentation을 증가시키기에 데이터를 처음부터 불러와서 작업을 진행합니다.

In [None]:
with open('data/train50.pickle', 'rb') as f:
    train = pickle.load(f)
    
# 0~9번채널만 사용
train = train[:, :, :, :10]

with open('data/train_y50.pickle', 'rb') as f:
    train_y = pickle.load(f)
train_y = train_y.reshape(train_y.shape[0], 40, 40, 1)

  

with open('data/test.pickle', 'rb') as f:
    TEST = pickle.load(f)
TEST = TEST[:, :, :, :10] 



In [None]:
gc.collect()

In [None]:
train_y = np.log(train_y+1)

In [None]:
train = channel_sum(train)
TEST = channel_sum(TEST)

In [None]:
train[:, :, :, 9] = train[:, :, :, 9] / 322
TEST[:, :, :, 9] = TEST[:, :, :, 9] / 322

In [None]:
train = rotation(train)
TEST = rotation(TEST)

In [None]:
train = np.concatenate([train, train_y], -1)

train1 = np.rot90(train, 1, (1,2))
train2 = np.rot90(train, 2, (1,2))
train3 = np.rot90(train, 3, (1,2))

train_lr = np.fliplr(train)
train_lr1 = np.rot90(train_lr, 1, (1,2))
train_lr2 = np.rot90(train_lr, 2, (1,2))
train_lr3 = np.rot90(train_lr, 3, (1,2))

train = np.vstack([train, train1])
del train1

train = np.vstack([train, train2])
del train2

train = np.vstack([train, train3])
del train3

train = np.vstack([train, train_lr1])
del train_lr1

train = np.vstack([train, train_lr2])
del train_lr2

train = np.vstack([train, train_lr3])
del train_lr3

train = np.vstack([train, train_lr])
del train_lr

In [None]:
train_y = train[:, :, :, -1].copy()
train_y = train_y.reshape(train_y.shape[0], train_y.shape[1], train_y.shape[2], 1)
train = train[:,:,:,:-1]

In [None]:
from sklearn.model_selection import train_test_split
train, test, train_y, test_y = train_test_split(train, train_y, test_size=0.025, random_state=SEED)

In [None]:
# augmentation 및 피쳐 생성 이후, 제가 직접 학습시킨 모델을 로드하면 빠르게 submission을 확인할 수 있습니다.
# model = inception(train.shape[1:] , 7) # loop문을 7번으로 증가.
# model.compile(loss="mae", optimizer="adam", metrics=["mae"])
# model.load_weights('model3.h5')
# res = model.predict(TEST)
# result = (np.exp(res)-1)
# submission = pd.read_csv('data/sample_submission.csv')
# submission.iloc[:,1:] = result.reshape(-1, 1600)
# submission.to_csv('model3_deep_inception.csv', index = False)

In [None]:
from sklearn.model_selection import KFold

# 위에서 설명했듯이 많은 데이터 셋으로 학습시키기 위해 FOLD를 100으로 설정했습니다. Fold 1 중간에 Stop시켰기에 break 조건을 넣어놨습니다.
FOLD = 100
k_fold = KFold(n_splits=FOLD, shuffle=True, random_state=SEED)
model_number = 0

for train_idx, val_idx in k_fold.split(train, train_y):
    x_train, y_train = train[train_idx], train_y[train_idx]
    x_val, y_val = train[val_idx], train_y[val_idx]
    print(x_train.shape)

    # 루프문 7번으로 증가
    model = inception(train.shape[1:] , 7) 
    model.compile(loss="mae", optimizer="adam", metrics=["mae"])

    es = EarlyStopping(patience=9, verbose=1)
    mc = ModelCheckpoint(f'model3_best_{model_number}.h5', save_best_only=True, verbose=1)
    rlp = ReduceLROnPlateau(monitor='val_loss', patience=4, factor=0.8, min_lr=0.0001)

    model.fit(x_train, y_train, epochs = 90, validation_data=(x_val, y_val), verbose=1, batch_size = 64, callbacks = [es, mc, rlp])
    model.load_weights(f'model3_best_{model_number}.h5')
    res = model.predict(TEST)
    
    break
    
result = (np.exp(res)-1)
submission = pd.read_csv('data/sample_submission.csv')
submission.iloc[:,1:] = result.reshape(-1, 1600)
submission.to_csv('model3_deep_inception.csv', index = False)

## 5.4 앙상블

Validation 데이터 셋을 이용하여 점수 개선이 있는지 확인하였고, 점수가 개선되어 각 모델간의 결과를 앙상블하여 제출하였습니다.

In [None]:
result_1 = pd.read_csv('model1_resnet.csv')
result_2 = pd.read_csv('model2_inception.csv')
result_3 = pd.read_csv('model3_deep_inception.csv')

result_1.iloc[:, 1:] = (result_1.iloc[:, 1:] * 0.25) + (result_2.iloc[:, 1:] * 0.25) + (result_3.iloc[:, 1:] * 0.5)

result_1.to_csv('endgame_submission.csv', index = False)

## 6. 결과 및 결언 (Conclusion & Discussion)

- 모델이 오버피팅 되는 경우는 적었습니다. 이에 모델의 layer를 깊게 쌓아 점수가 향상되었습니다.
- 레스넷 모델의 경우, augmentation을 적게 했는데, 더 많은 데이터를 바탕으로 모델을 돌리면 점수가 향상되고, 이를 앙상블하면 더 좋은 점수를 기대할 수 있을 것 같습니다.
- tree 계열 모델을 만들어 보았으나, 결과가 좋지 않았습니다.
- xception, resnet 등의 레이어를 참조하여 모델을 만들어 보았으나 결과가 좋지 않았습니다.
- 대회를 준비하신 모든 분들, 대회에 참여하신 모든 분들 고생많았습니다.
- 마지막으로 GoldBar 님께 감사하다는 말을 전하고 싶습니다.