# CV 혹은 test 결과 산출하는 코드

In [1]:
!nvidia-smi

Mon May 11 13:50:19 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 432.00       Driver Version: 432.00       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN X (Pascal)   WDDM  | 00000000:01:00.0  On |                  N/A |
| 31%   49C    P8    20W / 250W |   1150MiB / 12288MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|    0  

In [2]:
import glob
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
from tqdm import tqdm
import tensorflow as tf
import os
from os.path import join
import random
import lightgbm as lgb
import time
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Conv2DTranspose, MaxPooling2D, BatchNormalization
from tensorflow.keras.layers import Activation, concatenate, Input, GlobalAveragePooling2D
from tensorflow.keras import Model
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
import warnings
 
warnings.filterwarnings("ignore")

  import pandas.util.testing as tm


In [3]:
def conv_block_3(img_layer, start_neurons):
    conv1 = Conv2D(start_neurons, (3, 3), activation="relu", padding="same")(img_layer)
    conv1 = BatchNormalization()(conv1)
    conv2 = Conv2D(start_neurons, (3, 3), activation="relu", padding="same")(conv1)
    conv2 = BatchNormalization()(conv2)
    conv3 = Conv2D(start_neurons, (3, 3), activation="relu", padding="same")(conv2)
    conv3 = BatchNormalization()(conv3)

    return conv3

def conv_residual(img_layer, start_neurons):
    conv1 = Conv2D(start_neurons, (3, 3), activation="relu", padding="same")(img_layer)
    conv1 = BatchNormalization()(conv1)
    conv2 = conv_block_3(conv1, start_neurons)
    conv2 = BatchNormalization()(conv2)
    conv3 = Conv2D(start_neurons, (3, 3), activation="relu", padding="same")(conv1 + conv2)
    conv3 = BatchNormalization()(conv3)

    return conv3

def model_v1(input_layer, start_neurons):
    # divide raw feature to image feature and external feature
    img_layer = input_layer[:,:,:,:9]
    ext_layer = input_layer[:,:,:,9:]
    print('input_layer shape:', input_layer.shape)
    print('img_layer shape:', img_layer.shape)
    print('ext_layer shape:', ext_layer.shape)

    # 40 x 40 -> 20 x 20
    print('img_layer shape:', img_layer.shape)
    conv1 = conv_residual(img_layer, start_neurons * 1)
    print('conv1 shape:', conv1.shape)
    pool1 = MaxPooling2D((2, 2))(conv1)
    print('maxpool 1 shape:', pool1.shape)
    pool1 = Dropout(0.25)(pool1)

    # 20 x 20 -> 10 x 10
    conv2 = conv_residual(pool1, start_neurons * 2)
    print('conv2 shape:', conv2.shape)
    pool2 = MaxPooling2D((2, 2))(conv2)
    print('maxpool 2 shape:', pool2.shape)
    pool2 = Dropout(0.25)(pool2)

    # 10 x 10 
    convm = conv_residual(pool2, start_neurons * 4)
    print('convm shape:', convm.shape)

    # 10 x 10 -> 20 x 20
    deconv2 = Conv2DTranspose(start_neurons * 2, (3, 3), strides=(2, 2), padding="same")(convm)
    uconv2 = concatenate([deconv2, conv2])
    uconv2 = conv_residual(uconv2, start_neurons * 2)
    print('upconv2 shape:', uconv2.shape)
    uconv2 = Dropout(0.25)(uconv2)

    # 20 x 20 -> 40 x 40
    deconv1 = Conv2DTranspose(start_neurons * 1, (3, 3), strides=(2, 2), padding="same")(uconv2)
    uconv1 = concatenate([deconv1, conv1])
    uconv1 = conv_residual(uconv1, start_neurons * 1)
    print('upconv1 shape:', uconv1.shape)
    uconv1 = Dropout(0.25)(uconv1)
    output_layer = Conv2D(1, (1,1), padding="same", activation='relu')(uconv1)
    print('output shape:', output_layer.shape)
    
    return output_layer

input_layer = Input((40, 40, 14))
output_layer = model_v1(input_layer, 32)
model = Model(input_layer, output_layer)

input_layer shape: (None, 40, 40, 14)
img_layer shape: (None, 40, 40, 9)
ext_layer shape: (None, 40, 40, 5)
img_layer shape: (None, 40, 40, 9)
conv1 shape: (None, 40, 40, 32)
maxpool 1 shape: (None, 20, 20, 32)
conv2 shape: (None, 20, 20, 64)
maxpool 2 shape: (None, 10, 10, 64)
convm shape: (None, 10, 10, 128)
upconv2 shape: (None, 20, 20, 64)
upconv1 shape: (None, 40, 40, 32)
output shape: (None, 40, 40, 1)


In [4]:
# custom loss fuction (maeOverFscore)
def recall_m(y_true, y_pred):
    true_positives = tf.dtypes.cast(K.sum(K.round(K.clip(y_true * y_pred, 0, 1))), dtype=tf.float32)
    possible_positives = tf.dtypes.cast(K.sum(K.round(K.clip(y_true, 0, 1))), dtype=tf.float32)
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = tf.dtypes.cast(K.sum(K.round(K.clip(y_true * y_pred, 0, 1))), dtype=tf.float32)
    predicted_positives = tf.dtypes.cast(K.sum(K.round(K.clip(y_pred, 0, 1))), dtype=tf.float32)
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    idx_one_true = tf.greater_equal(y_true, 0.1)
    y_true = tf.where(idx_one_true == True, 1, 0)
    idx_one_pred = tf.greater_equal(y_pred, 0.1)
    y_pred = tf.where(idx_one_pred == True, 1, 0)
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

def mae(y_true, y_pred):
    over_threshold = tf.greater_equal(y_true, 0.1)
    return K.mean(math_ops.abs(y_pred - y_true), axis=-1)

def maeOverFscore(y_true, y_pred):
    y_true = tf.reshape(y_true, [-1])
    y_pred = tf.reshape(y_pred, [-1])
    return mae(y_true, y_pred) / f1_m(y_true, y_pred)

## Cross Validation

In [0]:
# cv,ftr 파일에 맞는 input 생성 코드
def input_generator_ftr_cv():
    # cv feather 파일 로드 및 orbit, subset 조합 생성
    CV_FILE = './ftr_data/cv.ftr'
    cv = pd.read_feather(CV_FILE)
    file_cnt = 0
    temp_info = cv[['orbit', 'subset']].drop_duplicates()
    orbit = temp_info['orbit'].tolist()
    subset = temp_info['subset'].tolist()
    del temp_info
    size = len(orbit)
  
  # 하나의 이미지 데이터에 해당하는 것만 3차원 변환 및 피처/라벨 생성
    for i in range(size):
        one_img = cv.loc[(cv['orbit'] == orbit[i]) & (cv['subset'] == subset[i])].sort_values('pixel')
        one_img = np.array(one_img.drop(['orbit', 'subset', 'pixel'], axis=1)).reshape([40,40,15])
        target = one_img[:,:,-1].reshape(40,40,1)
        cutoff_labels = np.where(target < 0,0, target)
        feature = one_img[:,:,:-1]
        # if (cutoff_labels > 0).sum() < 50: continue
        yield(feature, cutoff_labels)
        file_cnt += 1

cv_dataset = tf.data.Dataset.from_generator(input_generator_ftr_cv, (tf.float32, tf.float32),
                                            (tf.TensorShape([40,40,14]),tf.TensorShape([40,40,1])))
cv_dataset = cv_dataset.batch(64)

In [11]:
WEIGHT_DIR = './checkpoint/'
latest = tf.train.latest_checkpoint(WEIGHT_DIR)
print(latest)
model.load_weights(latest)
adam = tf.keras.optimizers.Adam(learning_rate=1.0e-05)
model.compile(loss=maeOverFscore, optimizer=adam, metrics=[mae, f1_m])

./checkpoint/v01_ep01-loss2.46503.ckpt


In [12]:
result = model.evaluate(cv_dataset)
dict(zip(model.metrics_names, result))



{'f1_m': 0.4889864921569824,
 'loss': 0.4758166968822479,
 'mae': 0.23813003301620483}

# Test

In [5]:
# test.ftr 파일에 맞는 input 생성 코드
def input_generator_ftr_test():
    # test feather 파일 로드 및 orbit, subset 조합 생성
    TEST_FILE = './test.ftr'
    test = pd.read_feather(TEST_FILE)
    file_cnt = 0
    temp_info = test[['orbit', 'subset']].drop_duplicates()
    orbit = temp_info['orbit'].tolist()
    subset = temp_info['subset'].tolist()
    del temp_info
    size = len(orbit)
  
  # 하나의 이미지 데이터에 해당하는 것만 3차원 변환 및 피처/라벨 생성
    for i in range(size):
        one_img = test.loc[(test['orbit'] == orbit[i]) & (test['subset'] == subset[i])].sort_values('pixel')
        one_img = np.array(one_img.drop(['orbit', 'subset', 'pixel'], axis=1)).reshape([40,40,14])
#         target = one_img[:,:,-1].reshape(40,40,1)
#         cutoff_labels = np.where(target < 0,0, target)
        feature = one_img[:,:,:]
        # if (cutoff_labels > 0).sum() < 50: continue
        yield(feature)
        file_cnt += 1

test_dataset = tf.data.Dataset.from_generator(input_generator_ftr_test, tf.float32,
                                               tf.TensorShape([40,40,14]))
test_dataset = test_dataset.batch(1024).prefetch(1)

In [6]:
WEIGHT_DIR = './checkpoint/'
latest = tf.train.latest_checkpoint(WEIGHT_DIR)
print(latest)
model.load_weights(latest)
adam = tf.keras.optimizers.Adam(learning_rate=1.0e-05)
model.compile(loss=maeOverFscore, optimizer=adam, metrics=[mae, f1_m])

./checkpoint/v01_ep04-loss0.86484.ckpt


In [7]:
result = model.predict(test_dataset)
result.shape

(2416, 40, 40, 1)

In [8]:
tmp = pd.read_csv('sample_submission.csv')
df = result.reshape([2416, 1600])
df = pd.DataFrame(df)
print(df.shape)
submit = pd.concat([tmp['id'], df], axis=1)
submit.columns = tmp.columns
submit.to_csv('./result/result_batch1024_2.csv', index=False)
submit

(2416, 1600)


Unnamed: 0,id,px_1,px_2,px_3,px_4,px_5,px_6,px_7,px_8,px_9,...,px_1591,px_1592,px_1593,px_1594,px_1595,px_1596,px_1597,px_1598,px_1599,px_1600
0,029858_01,0.261905,0.000000,0.184648,0.000000,0.000000,0.002708,0.159794,0.000000,0.117761,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.037848,0.0,0.0,0.0,0.0
1,029858_02,0.081972,0.000000,0.148341,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
2,029858_03,0.092854,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.028662,0.0,0.0,0.0,0.0
3,029858_05,0.071267,0.000000,0.122222,0.032004,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
4,029858_07,0.076566,0.000000,0.153572,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.399176,0.346281,0.345578,0.287870,0.127772,0.300113,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2411,031287_08,0.148113,0.000000,0.217676,0.129530,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.049249,0.0,0.0,0.0,0.0
2412,031288_01,0.000000,0.000000,0.000000,0.000000,0.126975,0.034592,0.000000,0.000000,0.054937,...,0.000000,0.000000,0.000000,0.214048,0.179185,0.005617,0.0,0.0,0.0,0.0
2413,031288_02,0.195744,0.092026,0.000000,0.000000,0.000000,0.034799,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.040110,0.0,0.0,0.0,0.0
2414,031288_08,0.000000,0.000000,0.099550,0.064366,0.000000,0.000000,0.000000,0.049474,0.000000,...,0.000000,0.000000,0.000000,0.041534,0.000000,0.000000,0.0,0.0,0.0,0.0
