In [1]:
# Reference - https://github.com/DSC-SCH/Determination-of-the-Status-of-Nuclear-Power-Plants/issues/6
import os
import pandas as pd 
import numpy as np
import multiprocessing # 여러 개의 일꾼 (cpu)들에게 작업을 분산시키는 역할
from multiprocessing import Pool 
from functools import partial # 함수가 받는 인자들 중 몇개를 고정 시켜서 새롭게 파생된 함수를 형성하는 역할
from data_loader_2 import data_loader_v2 # 자체적으로 만든 data loader version 2.0 ([데이콘 15회 대회] 데이터 설명 및 데이터 불러오기 영상 참조)

# keras
from keras.models import Sequential
from keras import optimizers, Sequential
from keras.models import Model
from keras.utils import plot_model
from keras.layers import Dense, LSTM, RepeatVector, TimeDistributed, Dropout
from keras.callbacks import ModelCheckpoint, TensorBoard
# sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_recall_curve
from sklearn.metrics import recall_score, classification_report, auc, roc_curve
from sklearn.metrics import precision_recall_fscore_support, f1_score

import joblib # 모델을 저장하고 불러오는 역할

train_folder = 'data/train/'
test_folder = 'data/test/'
train_label_path = 'data/train_label.csv'

train_list = os.listdir(train_folder)
test_list = os.listdir(test_folder)
train_label = pd.read_csv(train_label_path, index_col=0)


def data_loader_all_v2(func, files, folder='', train_label=None, event_time=10, nrows=60):   
    func_fixed = partial(func, folder=folder, train_label=train_label, event_time=event_time, nrows=nrows)     
    if __name__ == '__main__':
        pool = Pool(processes=multiprocessing.cpu_count()) 
        df_list = list(pool.imap(func_fixed, files)) 
        pool.close()
        pool.join()        
    combined_df = pd.concat(df_list)    
    return combined_df

train = data_loader_all_v2(data_loader_v2, train_list, folder=train_folder, 
                        train_label=train_label, event_time=10, nrows=60)




Using TensorFlow backend.


In [2]:
train.head()

Unnamed: 0,V0000,V0001,V0002,V0003,V0004,V0005,V0006,V0007,V0008,V0009,...,V5112,V5113,V5114,V5115,V5116,V5117,V5118,V5119,V5120,label
545,30.472197,8.695875,8.703739,8.709477,8.711114,200.381358,159.282182,-5.018618999999999e-19,0.0,0.001439,...,1.0,1.0,1.0,60.0,0.0,0.0,-2e-06,85.4,0.0,20
545,30.451815,8.635427,8.705436,8.699428,8.71152,181.601175,156.666622,6.360337999999999e-19,0.0,-0.000572,...,1.0,1.0,1.0,60.0,0.0,0.0,3e-06,85.4,0.0,20
545,30.464503,8.743024,8.725929,8.671431,8.743335,198.761704,191.961581,1.067647e-20,0.0,0.000346,...,1.0,1.0,1.0,60.0,0.0,0.0,-2.8e-05,85.4,0.0,20
545,30.483019,8.752263,8.711617,8.776546,8.703115,169.724555,177.933862,-5.364948999999999e-19,0.0,-0.001408,...,1.0,1.0,1.0,60.0,0.0,0.0,-9e-06,85.4,0.0,20
545,30.473983,8.740589,8.69243,8.740275,8.717582,186.863251,170.349628,-1.9225279999999997e-19,0.0,0.001069,...,1.0,1.0,1.0,60.0,0.0,0.0,1e-05,85.4,0.0,20


In [3]:
train.shape

(41400, 5122)

In [43]:
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras import backend as keras

def unet(input_size, output_size):
    inputs = Input(input_size)
    
    conv1 = Conv2D(16, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(inputs)
    conv1 = Conv2D(16, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(conv1)
    batch_norm = BatchNormalization(axis = -1)(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(batch_norm)
    
    conv2 = Conv2D(32, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(pool1)
    conv2 = Conv2D(32, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(conv2)
    batch_norm = BatchNormalization(axis = -1)(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(batch_norm)
    
    conv3 = Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(pool2)
    conv3 = Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(conv3)
    batch_norm = BatchNormalization(axis = -1)(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(batch_norm)
    
    conv4 = Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(pool3)
    conv4 = Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(conv4)
    batch_norm = BatchNormalization(axis = -1)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(batch_norm)
    
    conv5 = Conv2D(256, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(pool4)
    conv5 = Conv2D(256, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(conv5)
    batch_norm = BatchNormalization(axis = -1)(conv5)

    up6 = Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(UpSampling2D(size=(2, 2), data_format="channels_last")(batch_norm))
    merge6 = concatenate([conv4, up6], axis=3)
    conv6 = Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(merge6)
    conv6 = Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(conv6)
    
    up7 = Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(UpSampling2D(size=(2, 2), data_format="channels_last")(conv6))
    merge7 = concatenate([conv3, up7], axis=3)
    conv7 = Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(merge7)
    conv7 = Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(conv7)
    
    up8 = Conv2D(32, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(UpSampling2D(size=(2, 2), data_format="channels_last")(conv7))
    merge8 = concatenate([conv2, up8], axis=3)
    conv8 = Conv2D(32, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(merge8)
    conv8 = Conv2D(32, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(conv8)
    
    up9 = Conv2D(16, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(UpSampling2D(size=(2,2), data_format="channels_last")(conv8))
    merge9 = concatenate([conv1, up9], axis=3)
    conv9 = Conv2D(16, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(merge9)
    conv9 = Conv2D(16, 3, activation="relu", padding="same", kernel_initializer="he_normal", data_format="channels_last")(conv9)
    
    conv10 = Conv2D(output_size, activation="sigmoid", data_format="channels_last")(conv9)
    
    model = Model(input=inputs, output=conv10)

    model.compile(optimizer = Adam(lr = 1e-4), loss = "categorical_crossentropy", metrics = ['accuracy'])
    
    
    return model

In [38]:
train.columns

Index(['V0000', 'V0001', 'V0002', 'V0003', 'V0004', 'V0005', 'V0006', 'V0007',
       'V0008', 'V0009',
       ...
       'V5112', 'V5113', 'V5114', 'V5115', 'V5116', 'V5117', 'V5118', 'V5119',
       'V5120', 'label'],
      dtype='object', length=5122)

In [35]:
X_train = train.drop(["label"], axis=1)
y_train = train["label"]

In [41]:
# input size 확인.
rows, cols = X_train.shape
input_size = (rows ,cols, 1)
output_size = len(set(y_train)) + 1 

input_size

(41400, 5121, 1)

In [44]:
# input size 확인.
rows, cols = X_train.shape
input_size = (rows ,cols, 1)
output_size = len(set(y_train)) + 1 

model = unet(input_size, output_size)

model.fit(X_trian, y_train)

ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 5175, 640, 128), (None, 5174, 640, 128)]