# DNN

#### 패키지 설치(keras 이용)

In [None]:
import pandas as pd
import numpy as np
from numpy.random import seed
import tensorflow as tf
from tensorflow import set_random_seed
import keras
from keras import models, layers
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from sklearn.metrics import roc_auc_score
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras.layers.normalization import BatchNormalization

#### 신경망 내에서 auc_roc로 측정하기 위해 만든 함수

In [1]:
def auc_roc(y_true, y_pred):
    # any tensorflow metric
    value, update_op = tf.contrib.metrics.streaming_auc(y_pred, y_true)

    # find all variables created for this metric
    metric_vars = [i for i in tf.local_variables() if 'auc_roc' in i.name.split('/')[1]]

    # Add metric variables to GLOBAL_VARIABLES collection.
    # They will be initialized for new session.
    for v in metric_vars:
        tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)

    # force to update metric values
    with tf.control_dependencies([update_op]):
        value = tf.identity(value)
        return value

#### 각 은닉층의 뉴런 갯수 

In [None]:
Nh_l = [300,250, 200, 150, 128, 128] 

#### 은닉층 : 6층 / activation function : relu / learning rate=0.008

In [None]:
def create_network():
    network = models.Sequential()
    network.add(layers.Dense(Nh_l[0], activation='relu', input_shape=(len(list(train_x)),)))
    network.add(layers.Dense(Nh_l[1]))
    network.add(BatchNormalization()) # 표준화를 해주어 gradient vanishing문제를 해결
    network.add(layers.Activation('relu'))
    network.add(Dropout(0.5)) # 노드를 랜덤으로 없애서 과적합을 없앰 
    network.add(layers.Dense(Nh_l[2]))
    network.add(layers.Activation('relu'))
    network.add(layers.Dense(Nh_l[3]))
    network.add(BatchNormalization()) 
    network.add(layers.Activation('relu'))
    network.add(Dropout(0.6))
    network.add(layers.Dense(Nh_l[4]))
    network.add(BatchNormalization()) 
    network.add(layers.Activation('relu'))
    network.add(layers.Dense(Nh_l[5]))
    network.add(layers.Activation('relu'))
    network.add(Dropout(0.6)) 
    network.add(layers.Dense(Nh_l[5]))
    network.add(BatchNormalization())  
    network.add(layers.Activation('relu'))
    network.add(Dense(1, activation='sigmoid'))
    # Compile neural network
    network.compile(loss='binary_crossentropy',optimizer=Adam(lr=0.008, beta_1=0.9, beta_2=0.999),metrics=[auc_roc])
    # Return compiled network+
    return network

#### 같은 kfold 데이터를 이용하기 위해 R에서 데이터를 가져와서 이용

In [None]:
seed(1)
set_random_seed(1)
tf.set_random_seed(1)
auc_score=[]
for i in range(1,6) :
    train_d=pd.read_csv('C:/Desktop/Son/공모전/빅콘 2019/최종 데이터 및 코드/train_'+str(i)+'.csv',engine = 'python')
    valid_d = pd.read_csv("C:/Desktop/Son/공모전/빅콘 2019/최종 데이터 및 코드/valid_"+str(i)+".csv",engine='python')
    train_y=train_d['dly']
    train_x=train_d.drop(['dly','delay_time'],axis=1)
    valid_y=valid_d['dly']
    valid_x=valid_d.drop(['dly','delay_time'],axis=1)
    mc = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', save_best_only=True) # early stopping이 되기전 최적의 모델을 저장
    my_callbacks = EarlyStopping(monitor='val_loss', patience=25, verbose=2, mode='min') # 과적합을 피하기 위해 early stopping 지정
    neural_network = KerasClassifier(build_fn=create_network, batch_size=800, epochs=50, verbose=2,validation_data=(valid_x, valid_y),callbacks=[my_callbacks,mc]) # early stopping
    nnet=neural_network.fit(train_x, train_y)
    ynew = neural_network.predict_proba(valid_x)
    real_y=np.array(valid_y)
    pred_y=np.array(pd.DataFrame(ynew).loc[:,1])
    auc_score.append(roc_auc_score(real_y, pred_y))
    valid_1=pd.DataFrame(ynew)
    valid_1.to_csv("valid_"+i+".csv",mode='w')
print(auc_score)

In [None]:
seed(1)
set_random_seed(1)
tf.set_random_seed(1)
auc_score=[]
for i in range(1,6) :
    train_d=pd.read_csv('C:/Desktop/Son/공모전/빅콘 2019/최종 데이터 및 코드/train_a'+str(i)+'.csv',engine = 'python')
    valid_d = pd.read_csv("C:/Desktop/Son/공모전/빅콘 2019/최종 데이터 및 코드/valid_a"+str(i)+".csv",engine='python')
    train_y=train_d['dly']
    train_x=train_d.drop(['dly','delay_time'],axis=1)
    valid_y=valid_d['dly']
    valid_x=valid_d.drop(['dly','delay_time'],axis=1)
    mc = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', save_best_only=True) # early stopping이 되기전 최적의 모델을 저장
    my_callbacks = EarlyStopping(monitor='val_loss', patience=25, verbose=2, mode='min') # 과적합을 피하기 위해 early stopping 지정
    neural_network = KerasClassifier(build_fn=create_network, batch_size=800, epochs=50, verbose=2,validation_data=(valid_x, valid_y),callbacks=[my_callbacks,mc]) # early stopping
    nnet=neural_network.fit(train_x, train_y)
    ynew = neural_network.predict_proba(valid_x)
    real_y=np.array(valid_y)
    pred_y=np.array(pd.DataFrame(ynew).loc[:,1])
    auc_score.append(roc_auc_score(real_y, pred_y))
    valid_1=pd.DataFrame(ynew)
    valid_1.to_csv("valid_"+i+".csv",mode='w')
print(auc_score)