# 6(034). DNN

In [1]:
# for load data
import os
import pandas as pd
import numpy as np
import random

# split data
from sklearn.model_selection import StratifiedKFold

# scaler
from sklearn.preprocessing import MinMaxScaler, RobustScaler, Normalizer, StandardScaler, MaxAbsScaler

# load tensorflow
import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt

## Read data

In [13]:
path = (os.path.abspath("./input"))

# 비정형 데이터인 text를 Word2Vec한 Feature만을 사용한다.
X_train = pd.read_csv(path +'/034.imp_CAT_train.csv', encoding='cp949')
X_test = pd.read_csv(path +'/034.imp_CAT_test.csv', encoding='cp949')
y_train = pd.read_csv(path +'/y_train.csv', encoding='cp949').group

In [14]:
train_ID, test_ID = X_train.custid, X_test.custid
del X_train['custid'], X_test['custid']

### Split train, validation data

In [15]:
# 한 개의 회차 데이터만을 사용한다.
SKF = list(StratifiedKFold(n_splits = 5, shuffle = True, random_state = 0).split(X_train, y_train))[2]

tr_X, val_X = X_train.iloc[SKF[0]], X_train.iloc[SKF[1]]
tr_y, val_y = y_train.iloc[SKF[0]], y_train.iloc[SKF[1]]

In [16]:
tr_y = keras.utils.to_categorical(tr_y.astype('category').cat.codes)
val_y = keras.utils.to_categorical(val_y.astype('category').cat.codes)

In [17]:
tr_X.shape, tr_y.shape, val_X.shape, val_y.shape

((17270, 120), (17270, 8), (4317, 120), (4317, 8))

In [18]:
# scailing
scaler =MinMaxScaler()
tr_X = scaler.fit_transform(tr_X)
val_X = scaler.transform(val_X)
X_test = scaler.transform(X_test)

## Set Seed

In [19]:
def reset_seeds(s1,s2,s3, reset_graph_with_backend=None):
    if reset_graph_with_backend is not None:
        K = reset_graph_with_backend
        K.clear_session()
        tf.compat.v1.reset_default_graph()
        print("KERAS AND TENSORFLOW GRAPHS RESET")  # optional

    np.random.seed(s1)
    random.seed(s2)
    tf.compat.v1.set_random_seed(s3)
    os.environ['CUDA_VISIBLE_DEVICES'] = ''  # for GPU
#    print("RANDOM SEEDS RESET")  # optional

In [20]:
reset_seeds(53,164,12230)

## Hyper-Model

In [21]:
def model_fn(hp):
    inputs = keras.Input(shape=(X_train.shape[1],))
    x = inputs
    for i in range(hp.Int('num_layers', 2, 3)):
        x = keras.layers.Dense(hp.Int('unit_'+str(i), 10, 61, step=10),  activation='elu')(x)
        x = keras.layers.Dropout(hp.Float('dropout_'+str(i), 0, 0.5, step=0.1, default=0.5))(x) 
    outputs = keras.layers.Dense(8, activation='softmax')(x) # 예측값이 8종류이므로 8개 출력 뉴런 필요
    model = keras.Model(inputs, outputs)
    model.compile(loss='categorical_crossentropy', # Multiclass Classification에서 사용하는 loss function
                  optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', [0.01, 0.03])), 
                  metrics=[keras.metrics.CategoricalCrossentropy()]) # Multiclass Classification에서 사용하는 평가지표
    return model

## Hyper-tuned models

In [22]:
tuner = kt.Hyperband(model_fn,
                     objective=kt.Objective('val_categorical_crossentropy', direction="min"), 
                     max_epochs=50,
                     factor=10,
                     hyperband_iterations=2,
                     overwrite=True,
                     directory=(os.path.abspath("./src"))+'/dnn_tuning_034_4')
tuner.search(tr_X, tr_y, validation_data=(val_X, val_y), 
             callbacks=[tf.keras.callbacks.EarlyStopping(patience=1)])

Trial 40 Complete [00h 00m 06s]
val_categorical_crossentropy: 1.582961082458496

Best val_categorical_crossentropy So Far: 1.5359277725219727
Total elapsed time: 00h 04m 38s
INFO:tensorflow:Oracle triggered exit


In [23]:
model = tuner.get_best_models(1)[0]  
tuner.results_summary(1)

Results summary
Results in C:\Users\ha\+Competition\src/dnn_tuning_034_4\untitled_project
Showing 1 best trials
<keras_tuner.engine.objective.Objective object at 0x000002468075A9A0>
Trial summary
Hyperparameters:
num_layers: 2
unit_0: 30
dropout_0: 0.1
unit_1: 20
dropout_1: 0.1
learning_rate: 0.01
unit_2: 60
dropout_2: 0.5
tuner/epochs: 5
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 1.5359277725219727


## Deploy Model & Make submission file

In [24]:
pred = pd.DataFrame(model.predict(X_test))
submissions = pd.concat([test_ID, pred] ,axis=1)
submissions.columns = ['ID','F20','F30','F40','F50','M20','M30','M40','M50']

In [25]:
# scaler바꿔가며 CAT_1, 2, 3 생성
sub_path = (os.path.abspath("./submission"))

fname = f'/dnn_034_CAT_3.csv'
submissions.to_csv(sub_path+fname, index=False)
print("'{}' is ready to submit." .format(fname))

'/dnn_034_CAT_3.csv' is ready to submit.
