# Keras + TF LB 0.18 by wvadim - Third Try
* Data analysis - [Exploration & Transforming Images in Python](https://www.kaggle.com/muonneutrino/exploration-transforming-images-in-python)
* Image conversion, Network architecture - [Keras Model for Beginners (0.210 on LB)+EDA+R&D](https://www.kaggle.com/tivigovidiu/keras-model-for-beginners-0-210-on-lb-eda-r-d)
* Some ideas - [A Keras Prototype (0.21174 on PL)](https://www.kaggle.com/knowledgegrappler/a-keras-prototype-0-21174-on-pl)

## 1. Imports

In [None]:
# seed값 설정
import numpy as np
np.random.seed(98643)
import tensorflow as tf
tf.set_random_seed(663)

# 텐서플로우 경고 숨기기
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
# 이미지조작
from skimage.restoration import (denoise_tv_chambolle, denoise_bilateral,
                                denoise_wavelet, estimate_sigma,
                                denoise_tv_bregman, denoise_nl_means)
from skimage.filters import gaussian
from skimage.color import rgb2gray

In [None]:
# 데이터 로드 및 시각화
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import MinMaxScaler

In [None]:
# 데이터 학습
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Input, Flatten, GlobalAveragePooling2D, Lambda
from keras.layers import GlobalMaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.merge import Concatenate
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from keras.preprocessing.image import ImageDataGenerator

## 2. Preprocessing

### 2.1. Define Functions

In [None]:
# RGB 배열 생성
def color_composite(data):
    rgb_arrays = []
    for i, row in data.iterrows():
        band_1 = np.array(row['band_1']).reshape(75, 75)
        band_2 = np.array(row['band_2']).reshape(75, 75)
        band_3 = band_1 / band_2
        
        # rgb 크기 0-1사이의 값으로 변환
        r = (band_1 + abs(band_1.min())) / np.max((band_1 + abs(band_1.min())))
        g = (band_2 + abs(band_2.min())) / np.max((band_2 + abs(band_2.min())))
        b = (band_3 + abs(band_3.min())) / np.max((band_3 + abs(band_3.min())))
        
        rgb = np.dstack((r, g, b))
        rgb_arrays.append(rgb)
        
    return np.array(rgb_arrays)

In [None]:
# 노이즈 제거
def denoise(X, weight, multichannel):
    return np.asarray([denoise_tv_chambolle(item, weight=weight, multichannel=multichannel) for  item in X])

In [None]:
# 가우시안 필터링
def smooth(X, sigma):
    return np.asarray([gaussian(item, sigma=sigma) for item in X])

In [None]:
# 회색조 이미지로 변환
def grayscale(X):
    return np.asarray([rgb2gray(item) for item in X])

### 2.2. Fill in missing values

In [None]:
# 학습 데이터 로드
train = pd.read_json('../data/train.json')
train.head()

In [None]:
# 결측치 개수
print('band_1 :', len(train.loc[train['band_1'] == 'na', 'band_1']))
print('band_2 :', len(train.loc[train['band_2'] == 'na', 'band_2']))
print('inc_angle :', len(train.loc[train['inc_angle'] == 'na', 'inc_angle']))

In [None]:
# inc_angle의 결측치 채우기
train.inc_angle = train.inc_angle.replace('na', 0)
train.inc_angle = train.inc_angle.astype(float).fillna(0.0)

### 2.3. Final Dataset

In [None]:
# 플래그 변수 정의
train_all = True
train_b = True or train_all
train_img = True or train_all
train_total = True or train_all
predict_submission = True or train_all

clean_all = False
clean_b = False or clean_all
clean_img = False or clean_all

load_all = False
load_b = False or load_all
load_img = False or load_all

In [None]:
# 데이터셋 전처리 및 데이터셋 생성
def create_dataset(frame, labeled, smooth_rgb=0.2, smooth_gray=0.5, weight_rgb=0.05, weight_gray=0.05):
    band_1, band_2, images = frame['band_1'].values, frame['band_2'].values, color_composite(frame)
    
    # 데이터프레임을 np.array로 변환
    to_arr = lambda x: np.asarray([np.asarray(item) for item in x])
    band_1 = to_arr(band_1)
    band_2 = to_arr(band_2)
    band_3 = (band_1 + band_2) / 2
    
    # 벡터에서 이미지 형태로 변환
    gray_reshape = lambda x:np.asarray([item.reshape(75, 75) for item in x])
    band_1 = gray_reshape(band_1)
    band_2 = gray_reshape(band_2)
    band_3 = gray_reshape(band_3)
    
    # 노이즈 제거 및 가우시안 필터링
    print('Denoising and reshaping...')
    
    # 1. bandwidth 데이터
    if train_b and clean_b:
        band_1 = smooth(denoise(band_1, weight_gray, False), smooth_gray)
        print('Gray 1 done')
        
        band_2 = smooth(denoise(band_2, weight_gray, False), smooth_gray)
        print('Gray 2 done')
        
        band_3 = smooth(denoise(band_3, weight_gray, False), smooth_gray)
        print('Gray 3 done')
        
    # 2. 이미지 데이터
    if train_img and clean_img:
        images = smooth(denoise(images, weight_rgb, True), smooth_gray)
        print('RGB done')
        
    # 모델에 넣기 적합한 형태로 변환
    tf_reshape = lambda x: np.asarray([item.reshape(75, 75, 1) for item in x])
    band_1 = tf_reshape(band_1)
    band_2 = tf_reshape(band_2)
    band_3 = tf_reshape(band_3)
    band = np.concatenate([band_1, band_2, band_3], axis=3)
    
    # 라벨 추출
    if labeled:
        y = np.array(frame['is_iceberg'])
    else:
        y = None
    
    return y, band, images

In [None]:
# 데이터셋 생성
y_train, X_b, X_images = create_dataset(train, True)

# X_b와 X_images 비교
print('X_b[0][0][0] :', X_b[0][0][0])
print('X_images[0][0][0] :', X_images[0][0][0])

In [None]:
# 9개의 데이터를 임의로 추출해 시각화
fig = plt.figure(200, figsize=(15, 15))
random_idx = np.random.choice(range(len(X_images)), 9, False)
subset = X_images[random_idx]
for i in range(9):
    ax = fig.add_subplot(3, 3, i+1)
    ax.imshow(subset[i])
plt.show()

### 2.4. Compare Original and preprocessed data

In [None]:
# 노이즈 제거 및 필터링 전 band_1 데이터
fig = plt.figure(202, figsize=(15, 15))
band_1_x = train['band_1'].values
subset = np.asarray(band_1_x)[random_idx]
subset = np.asarray([np.asarray(item).reshape(75, 75) for item in subset])
for i in range(9):
    ax = fig.add_subplot(3, 3, i+1)
    ax.imshow(subset[i])
plt.show()

In [None]:
# 노이즈 제거 후 band_1 데이터
fig = plt.figure(202, figsize=(15, 15))
band_1_x = train['band_1'].values
subset = np.asarray(band_1_x)[random_idx]
subset = denoise(np.asarray([np.asarray(item).reshape(75, 75) for item in subset]), 0.05, False)
for i in range(9):
    ax = fig.add_subplot(3, 3, i+1)
    ax.imshow(subset[i])
plt.show()

In [None]:
# 노이즈 제거 및 가우시언 필터링 후 band_1 데이터
fig = plt.figure(202, figsize=(15, 15))
subset = np.asarray(band_1_x)[random_idx]
subset = smooth(denoise(np.asarray([np.asarray(item).reshape(75, 75) for item in subset]), 0.05, False), 0.5)
for i in range(9):
    ax = fig.add_subplot(3, 3, i+1)
    ax.imshow(subset[i])
plt.show()

## 3. Modeling

In [None]:
# bandwidth와 이미지 데이터셋으로 학습하는 기본 모델 정의
def get_model_notebook(lr, decay, channels, relu_type='relu'):
    # 입력 데이터
    input_1 = Input(shape=(75, 75, channels))
    
    ################# 모델 설계 #################
    # Conv Layer 1
    fcnn = Conv2D(32, kernel_size=(3, 3), activation=relu_type)(BatchNormalization()(input_1))
    fcnn = MaxPooling2D((3, 3))(fcnn)
    fcnn = Dropout(0.2)(fcnn)
    
    # Conv Layer 2
    fcnn = Conv2D(64, kernel_size=(3, 3), activation=relu_type)(fcnn)
    fcnn = MaxPooling2D((2, 2), strides=(2, 2))(fcnn)
    fcnn = Dropout(0.2)(fcnn)
    
    # Conv Layer 3
    fcnn = Conv2D(128, kernel_size=(3, 3), activation=relu_type)(fcnn)
    fcnn = MaxPooling2D((2, 2), strides=(2, 2))(fcnn)
    fcnn = Dropout(0.2)(fcnn)
    
    # Con Layer 4
    fcnn = Conv2D(128, kernel_size=(3, 3), activation=relu_type)(fcnn)
    fcnn = MaxPooling2D((2, 2), strides=(2, 2))(fcnn)
    fcnn = Dropout(0.2)(fcnn)
    fcnn = BatchNormalization()(fcnn)
    
    # 완전연결계층을 위해 평탄화
    fcnn = Flatten()(fcnn)
    
    # 완전연결계층 전 모델 저장
    # 결합 모델에서 사용 예정
    local_input = input_1
    partial_model = Model(input_1, fcnn)
    
    dense = Dropout(0.2)(fcnn)
    # Dense Layer 1
    dense = Dense(256, activation=relu_type)(dense)
    dense = Dropout(0.2)(dense)
    
    # Dense Layer 2
    dense = Dense(128, activation=relu_type)(dense)
    dense = Dropout(0.2)(dense)
    
    # Dense Layer 3
    dense = Dense(64, activation=relu_type)(dense)
    dense = Dropout(0.2)(dense)
    
    # Sigmoid Layer
    output = Dense(1, activation = 'sigmoid')(dense)
    #############################################
    
    # 기본 모델저장
    model = Model(local_input, output)
    optimizer = Adam(lr=lr, decay=decay)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model, partial_model

In [None]:
# 2개의 기본 모델로 학습하는 결합 모델 정의
def combined_model(m_b, m_img, lr, decay):
    input_b = Input(shape=(75, 75, 3))
    input_img = Input(shape=(75, 75, 3))
    
    m1 = m_b(input_b)
    m2 = m_img(input_img)
    
    # 2개의 모델의 결과를 입력으로 하여 최종결과 예측
    ################# 모델 설계 #################
    common = Concatenate()([m1, m2])
    
    # Batch Normalization Layer
    common = BatchNormalization()(common)
    common = Dropout(0.3)(common)
    
    # Dense Layer 1
    common = Dense(1024, activation='relu')(common)
    common = Dropout(0.3)(common)
    
    # Dense Layer 2
    common = Dense(512, activation='relu')(common)
    common = Dropout(0.3)(common)
    
    # Sigmoid Layer
    output = Dense(1, activation='sigmoid')(common)
    #############################################
    
    model = Model([input_b, input_img], output)
    optimizer = Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=decay)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [None]:
# Data Augmentation으로 batch_size만큼 데이터 생성
def gen_flow_multi_inputs(I1, I2, y, batch_size):
    # ImageDataGenerator 객체 생성
    gen1 = ImageDataGenerator(horizontal_flip=True, vertical_flip=True,
                             width_shift_range=0., height_shift_range=0.,
                             channel_shift_range=0, zoom_range=0.2,
                             rotation_range=10)
    gen2 = ImageDataGenerator(horizontal_flip=True, vertical_flip=True,
                             width_shift_range=0., height_shift_range=0.,
                             channel_shift_range=0, zoom_range=0.2,
                             rotation_range=10)
    
    # batch_size만큼의 Augmented Data 반환하는 Iterator 생성
    # 해당 y와 I2를 반환하기 위해 I1을 일치시킴
    genI1 = gen1.flow(I1, y, batch_size=batch_size, seed=57, shuffle=False)
    genI2 = gen2.flow(I1, I2, batch_size=batch_size, seed=57, shuffle=False)
    
    while True:
        I1i = genI1.next()
        I2i = genI2.next()
        
        np.testing.assert_array_equal(I2i[0], I1i[0])
        yield [I1i[0], I2i[1]], I1i[1]

In [None]:
# 모델 학습
def train_model(model, batch_size, epochs, checkpoint_name, X_train, y_train, val_data, verbose=2):
    # 콜백 함수 정의
    callbacks = [ModelCheckpoint(checkpoint_name, save_best_only=True, monitor='val_loss')]
    # ImageDataGenerator 객체 생성
    datagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True,
                                width_shift_range=0., height_shift_range=0.,
                                channel_shift_range=0, zoom_range=0.2,
                                rotation_range=10)
    
    # 검증 데이터셋
    X_test, y_test = val_data
    
    try:
        # 모델 학습
        model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size),
                           epochs=epochs, steps_per_epoch=len(X_train)/batch_size,
                           validation_data=(X_test, y_test),
                           verbose=1, callbacks=callbacks)
    except KeyboardInterrupt:
        if verbose > 0:
            print('Interrupted')
    if verbose > 0:
        print('Loading model...')
        
        
    # 최고 성능일 때 weight 값일 때의 모델
    model.load_weights(checkpoint_name)
    return model

In [None]:
# 기본 모델 학습
def gen_model_weights(lr, decay, channels, relu, batch_size, epochs, path_name, data, verbose=2):
    X_train, y_train, X_test, y_test, X_val, y_val = data
    # 모델 객체 생성
    model, partial_model = get_model_notebook(lr, decay, channels, relu)
    # 모델 학습
    model = train_model(model, batch_size, epochs, path_name,
                       X_train, y_train, (X_test, y_test), verbose=verbose)
    
    if verbose > 0:
        # 검증 데이터로 모델 성능 검증
        loss_val, acc_val = model.evaluate(X_val, y_val,
                                          verbose=0, batch_size=batch_size)
        loss_train, acc_train = model.evaluate(X_test, y_test,
                                              verbose=0, batch_size=batch_size)
        print('val / train loss :', str(loss_val) + ' / ' + str(loss_train), \
             '- val / train acc :', str(acc_val) + ' / ' + str(acc_train))
        
    return model, partial_model

In [None]:
# 기본 모델 2개와 결합 모델 1개 모두 학습
def train_models(dataset, lr, batch_size, max_epoch, verbose=2, return_model=False):
    # 학습 데이터셋
    y_train, X_b, X_images = dataset
    
    # 검증 데이터셋 추출
    y_train_full, y_val, X_b_full, X_b_val, X_images_full, X_images_val = train_test_split(
        y_train, X_b, X_images, random_state=687, train_size=0.9)
    
    # 학습 및 테스트 데이터 추출
    y_train, y_test, X_b_train, X_b_test, X_images_train, X_images_test = train_test_split(
        y_train_full, X_b_full, X_images_full, random_state=576, train_size=0.85)
    
    # 1. bandwidth 데이터로 학습
    if train_b:
        if verbose > 0:
            print('Training bandwidth network...')
        data_b1 = (X_b_train, y_train, X_b_test, y_test, X_b_val, y_val)
        model_b, model_b_cut = gen_model_weights(lr, 1e-6, 3, 'relu', batch_size, max_epoch, 'model_b',
                                                 data=data_b1, verbose=verbose)
    
    # 2. 이미지 데이터로 학습
    if train_img:
        if verbose > 0:
            print('Training image network...')
        data_images = (X_images_train, y_train, X_images_test, y_test, X_images_val, y_val)
        model_images, model_images_cut = gen_model_weights(lr, 1e-6, 3, 'relu', batch_size, max_epoch, 'model_img',
                                                       data_images, verbose=verbose)
    
    # 3. 결합 모델로 학습
    if train_total:
        common_model = combined_model(model_b_cut, model_images_cut, lr/2, 1e-7)
        common_x_train = [X_b_full, X_images_full]
        common_y_train = y_train_full
        common_x_val = [X_b_val, X_images_val]
        common_y_val = y_val
        
        if verbose > 0:
            print('Training common network...')
        callbacks = [ModelCheckpoint('common', save_best_only=True, monitor='val_loss')]
        try:
            common_model.fit_generator(gen_flow_multi_inputs(X_b_full, X_images_full, y_train_full, batch_size),
                                      epochs=30, steps_per_epoch=len(X_b_full)/batch_size,
                                      validation_data=(common_x_val, common_y_val), verbose=1,
                                      callbacks=callbacks)
        except KeyboardInterrupt:
            pass
        common_model.load_weights('common')
        
        # 학습/검증 데이터의 손실함수값과 정확도
        loss_val, acc_val = common_model.evaluate(common_x_val, common_y_val,
                                                 verbose=0, batch_size=batch_size)
        loss_train, acc_train = common_model.evaluate(common_x_train, common_y_train,
                                                     verbose=0, batch_size=batch_size)
        
        if verbose > 0:
            print('loss :', loss_val, 'acc : ', acc_val)
    
    # 모델 반환 여부
    if return_model:
        return common_model
    else:
        return (loss_train, acc_train), (loss_val, acc_val)

## 4. Train the data

In [None]:
# 모델 학습
# epoch: 250 / learning rate: 8e-5 / batch size: 32
common_model = train_models((y_train, X_b, X_images),
                           lr=7e-04, batch_size=32, max_epoch=50, 
                            verbose=1, return_model=True)

## 5. Submit

In [None]:
# 최종 예측 submission 파일 생성
if predict_submission:
    print('Reading test dataset...')
    test = pd.read_json('../data/test.json')
    y_fin, X_fin_b, X_fin_img = create_dataset(test, False)
    
    print('Predicting...')
    prediction = common_model.predict([X_fin_b, X_fin_img], verbose=1, batch_size=32)
    
    print('Submitting...')
    submission = pd.DataFrame({'id': test['id'], 'is_iceberg': prediction.reshape((prediction.shape[0]))})
    
    submission.to_csv('../data/submission_3.csv', index=False)
    print('Done')