## Import

In [1]:
# for load data
import os
import pandas as pd
import numpy as np
import random

# split data
from sklearn.model_selection import StratifiedKFold

# scaler
from sklearn.preprocessing import StandardScaler

# load tensorflow
import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt

## Read data

In [2]:
path = (os.path.abspath("./input"))

# 비정형 데이터인 text를 Word2Vec한 Feature만을 사용한다.
X_train = pd.read_csv(path +'/feature_train_W2V.csv', encoding='cp949')
X_test = pd.read_csv(path +'/feature_test_W2V.csv', encoding='cp949')
y_train = pd.read_csv(path +'/y_train.csv', encoding='cp949').group

In [3]:
train_ID, test_ID = X_train.custid, X_test.custid
del X_train['custid'], X_test['custid']

### Split train, validation data

In [4]:
# 한 개의 회차 데이터만을 사용한다.
SKF = list(StratifiedKFold(n_splits = 5, shuffle = True, random_state = 0).split(X_train, y_train))[3]

tr_X, val_X = X_train.iloc[SKF[0]], X_train.iloc[SKF[1]]
tr_y, val_y = y_train.iloc[SKF[0]], y_train.iloc[SKF[1]]

In [5]:
tr_y = keras.utils.to_categorical(tr_y.astype('category').cat.codes)
val_y = keras.utils.to_categorical(val_y.astype('category').cat.codes)

In [6]:
tr_X.shape, tr_y.shape, val_X.shape, val_y.shape

((17270, 1530), (17270, 8), (4317, 1530), (4317, 8))

In [7]:
# scailing
scaler = StandardScaler()
tr_X = scaler.fit_transform(tr_X)
val_X = scaler.transform(val_X)
X_test = scaler.transform(X_test)

## Set Seed

In [8]:
def reset_seeds(s1,s2,s3, reset_graph_with_backend=None):
    if reset_graph_with_backend is not None:
        K = reset_graph_with_backend
        K.clear_session()
        tf.compat.v1.reset_default_graph()
        print("KERAS AND TENSORFLOW GRAPHS RESET")  # optional

    np.random.seed(s1)
    random.seed(s2)
    tf.compat.v1.set_random_seed(s3)
    os.environ['CUDA_VISIBLE_DEVICES'] = ''  # for GPU
#    print("RANDOM SEEDS RESET")  # optional

In [10]:
reset_seeds(7,77,777)

## Hyper-Model

In [11]:
def model_fn(hp):
    inputs = keras.Input(shape=(X_train.shape[1],))
    x = inputs
    for i in range(hp.Int('num_layers', 2, 3)):
        x = keras.layers.Dense(hp.Int('unit_'+str(i), 20, 61, step=10), hp.Choice('activation', ['relu', 'elu']))(x)
        x = keras.layers.Dropout(hp.Float('dropout_'+str(i), 0, 0.5, step=0.1, default=0.5))(x) 
    outputs = keras.layers.Dense(8, activation='softmax')(x) # 예측값이 8종류이므로 8개 출력 뉴런 필요
    model = keras.Model(inputs, outputs)
    model.compile(loss='categorical_crossentropy', # Multiclass Classification에서 사용하는 loss function
                  optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', [0.01, 0.03])), 
                  metrics=[keras.metrics.CategoricalCrossentropy()]) # Multiclass Classification에서 사용하는 평가지표
    return model

In [None]:
# def model_fn(hp):
#     inputs = keras.Input(shape=(X_train.shape[1],))
#     x = inputs
#     for i in range(hp.Int('num_layers', 2, 3)):
#         x = keras.layers.Dense(hp.Int('unit_'+str(i), 16, 64, step=16), activation='relu')(x)
#         x = keras.layers.Dropout(hp.Float('dropout_'+str(i), 0, 0.5, step=0.25, default=0.5))(x)
#     outputs = keras.layers.Dense(8, activation='softmax')(x) # 예측값이 8종류이므로 8개 출력 뉴런 필요
#     model = keras.Model(inputs, outputs)
#     model.compile(loss='categorical_crossentropy', # Multiclass Classification에서 사용하는 loss function
#                   optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])), 
#                   metrics=[keras.metrics.CategoricalCrossentropy()]) # Multiclass Classification에서 사용하는 평가지표
#     return model

## Hyper-tuned models

In [12]:
tuner = kt.Hyperband(model_fn,
                     objective=kt.Objective('val_categorical_crossentropy', direction="min"), 
                     max_epochs=30,
                     hyperband_iterations=2,
                     overwrite=True,
                     directory=(os.path.abspath("./src"))+'/dnn_tuning_3')
tuner.search(tr_X, tr_y, validation_data=(val_X, val_y), 
             callbacks=[tf.keras.callbacks.EarlyStopping(patience=1)])

Trial 180 Complete [00h 00m 24s]
val_categorical_crossentropy: 2.0298824310302734

Best val_categorical_crossentropy So Far: 1.6029775142669678
Total elapsed time: 00h 33m 40s
INFO:tensorflow:Oracle triggered exit


In [13]:
model = tuner.get_best_models(1)[0]  
tuner.results_summary(1)

Results summary
Results in C:\Users\ha\+Competition\src/dnn_tuning_3\untitled_project
Showing 1 best trials
<keras_tuner.engine.objective.Objective object at 0x000002356E41E3D0>
Trial summary
Hyperparameters:
num_layers: 3
unit_0: 20
activation: relu
dropout_0: 0.1
unit_1: 60
dropout_1: 0.1
learning_rate: 0.01
unit_2: 40
dropout_2: 0.1
tuner/epochs: 30
tuner/initial_epoch: 10
tuner/bracket: 3
tuner/round: 3
tuner/trial_id: 0136
Score: 1.6029775142669678


## Deploy Model & Make submission file

In [13]:
pred = pd.DataFrame(model.predict(X_test))
submissions = pd.concat([test_ID, pred] ,axis=1)
submissions.columns = ['ID','F20','F30','F40','F50','M20','M30','M40','M50']

In [None]:
sub_path = (os.path.abspath("./submission"))

fname = f'/MDNN_FW2V.csv'
submissions.to_csv(sub_path+fname, index=False)
print("'{}' is ready to submit." .format(fname))