In [1]:
from sklearn.metrics import mean_absolute_error
from itertools import combinations
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer, MinMaxScaler,StandardScaler
from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau
from sklearn.model_selection import KFold, GroupKFold, StratifiedKFold
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf
import tensorflow
from tensorflow_addons.optimizers import AdamW
from keras import backend as K
from sklearn.model_selection import train_test_split
import os
import time


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



### 데이터 불러오기

- 최종적으로 데이터는 EDA과정을 feature를 생성한 값을 사용한다

- if! normalize 과정이 주는 영향 체크 필요(모델2, 모델3은 오히려 성능 감소)

- y는 log scale을 취했을 때에 평균적인 validation accuracy가 0.1 상승하는 것을 확인했음. - log 스케일로 진행
 : 과정에서 필요한 costom metric의 경우 exp 값을 취해 정상적으로 계산되게 만들어 주는 것이 필요.


In [2]:
df = pd.read_csv('final_model1.csv')

In [3]:
df.drop(['Unnamed: 0'], axis = 1, inplace = True)

In [4]:
feature_cols = df.columns.tolist()
feature_cols.remove('Target')
target_cols = ['Target']
remove_list = ['Sex_I','Sex_F','Sex_M']
for col in remove_list:
    feature_cols.remove(col)
pipeline = Pipeline([('normalizer', Normalizer()),
                     ('scaler', StandardScaler())])

In [5]:
X = df.drop(['Target'], axis = 1)
y = df['Target']

# target 칼럼 log scale 적용
y = np.log10(y)

# train test split
X_train, X_test = train_test_split(X, test_size= 0.2, random_state = 42)
y_train, y_test = train_test_split(y, test_size= 0.2, random_state = 42)
X_train, X_val = train_test_split(X_train, test_size = 0.2, random_state = 42)
y_train, y_val = train_test_split(y_train, test_size = 0.2, random_state = 42)

# pipeline을 통해 normalize와 standard scaler 적용
X_train[feature_cols] = pipeline.fit_transform(X_train[feature_cols])
X_test[feature_cols] = pipeline.transform(X_test[feature_cols])
X_val[feature_cols] = pipeline.transform(X_val[feature_cols])

### 모델 구현

1. 인터넷 검색 모델

2. 최적의 파라미터 및 은닉층, 노드수 계산 모델

3. 최종 모델 선택 - 시간 고려(X) /// 추후 시간을 고려하는 모델과 성능을 고려하는 모델 두 가지 선택기능 구현 가능성

### 1. 인터넷 검색 모델

In [53]:
# 인터넷 구현 모델

# optimizer 구현 부분
schedule = tensorflow.optimizers.schedules.PiecewiseConstantDecay(
                [100, 150], [1e-0, 1e-1, 1e-2])
step = tf.Variable(0, trainable=False)
wd = lambda: 1e-3 * schedule(step)
optimizer = AdamW(learning_rate=0.001, weight_decay=wd)
def custom_opt(n):
    schedule = tensorflow.optimizers.schedules.PiecewiseConstantDecay([100, 150], [1e-0, 1e-1, 1e-2])
    step = tf.Variable(0, trainable=False)
    wd = lambda: 1e-3 * schedule(step)
    opt = AdamW(learning_rate = n, weight_decay = wd)
    return opt

# custom_metric 구현 - log scale 고려
def accuracy(y_true, y_pred):
    y_true = tf.math.exp(y_true)
    y_pred = tf.math.exp(y_pred)
    return 1 - tf.abs((y_true - y_pred) / y_true)
check = []
for _ in range(5):
    # 모델 구현
    model = Sequential()
    model.add(Dense(256, activation='elu', input_dim=X_train.shape[1]))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='elu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))              
    model.add(Dense(16, activation='elu'))
    model.add(Dense(16, activation='elu'))
    model.add(Dense(1, activation='elu'))

    # 모델 컴파일
    model.compile(loss='mae', optimizer=optimizer , metrics=[accuracy])

    # early stopping 구현추가
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)

    model.fit(X_train, y_train, epochs=1000, batch_size=256, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose=0)

    print("====== Final_model =======")
    print("train loss, train accuracy")
    train_loss, train_acc = model.evaluate(X_train, y_train, verbose =2)

    print("validation loss, validation accuracy")
    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=2)
    check.append(val_acc)

print(np.mean(check))

train loss, train accuracy
84/84 - 0s - loss: 0.0656 - accuracy: 0.9349 - 91ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.0651 - accuracy: 0.9350 - 40ms/epoch - 2ms/step
train loss, train accuracy
84/84 - 0s - loss: 0.0648 - accuracy: 0.9359 - 95ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.0649 - accuracy: 0.9354 - 44ms/epoch - 2ms/step
train loss, train accuracy
84/84 - 0s - loss: 0.0695 - accuracy: 0.9304 - 92ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.0692 - accuracy: 0.9303 - 44ms/epoch - 2ms/step
train loss, train accuracy
84/84 - 0s - loss: 0.0699 - accuracy: 0.9306 - 91ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.0693 - accuracy: 0.9309 - 43ms/epoch - 2ms/step
train loss, train accuracy
84/84 - 0s - loss: 0.0709 - accuracy: 0.9302 - 98ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.0695 - accuracy: 0.9311 - 37ms/epoch -

In [45]:
# normalize 효과 체크
# train test split
X_train_scaled, X_test_scaled = train_test_split(X, test_size= 0.2, random_state = 42)
y_train, y_test = train_test_split(y, test_size= 0.2, random_state = 42)
X_train_scaled, X_val_scaled = train_test_split(X_train_scaled, test_size = 0.2, random_state = 42)
y_train, y_val = train_test_split(y_train, test_size = 0.2, random_state = 42)

scaler = StandardScaler()
# standard scaler 적용
X_train_scaled[feature_cols] = scaler.fit_transform(X_train_scaled[feature_cols])
X_test_scaled[feature_cols] = scaler.transform(X_test_scaled[feature_cols])
X_val_scaled[feature_cols] = scaler.transform(X_val_scaled[feature_cols])

In [52]:
# to check
# 인터넷 구현 모델

# optimizer 구현 부분
schedule = tensorflow.optimizers.schedules.PiecewiseConstantDecay(
                [100, 150], [1e-0, 1e-1, 1e-2])
step = tf.Variable(0, trainable=False)
wd = lambda: 1e-3 * schedule(step)
optimizer = AdamW(learning_rate=0.001, weight_decay=wd)
def custom_opt(n):
    schedule = tensorflow.optimizers.schedules.PiecewiseConstantDecay([100, 150], [1e-0, 1e-1, 1e-2])
    step = tf.Variable(0, trainable=False)
    wd = lambda: 1e-3 * schedule(step)
    opt = AdamW(learning_rate = n, weight_decay = wd)
    return opt

# custom_metric 구현 - log scale 고려
def accuracy(y_true, y_pred):
    y_true = tf.math.exp(y_true)
    y_pred = tf.math.exp(y_pred)
    return 1 - tf.abs((y_true - y_pred) / y_true)
check = []

for _ in range(5):
    # 모델 구현
    model = Sequential()
    model.add(Dense(256, activation='elu', input_dim=X_train_scaled.shape[1]))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='elu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))              
    model.add(Dense(16, activation='elu'))
    model.add(Dense(16, activation='elu'))
    model.add(Dense(1, activation='elu'))

    # 모델 컴파일
    model.compile(loss='mae', optimizer=optimizer , metrics=[accuracy])

    # early stopping 구현추가
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)

    model.fit(X_train_scaled, y_train, epochs=1000, batch_size=256, validation_data=(X_val_scaled, y_val), callbacks=[early_stopping], verbose = 0)

    print("====== Final_model =======")
    print("train loss, train accuracy")
    train_loss, train_acc = model.evaluate(X_train_scaled, y_train, verbose =2)

    print("validation loss, validation accuracy")
    val_loss, val_acc = model.evaluate(X_val_scaled, y_val, verbose=2)
    check.append(val_acc)

print(np.mean(check))

train loss, train accuracy
84/84 - 0s - loss: 0.0679 - accuracy: 0.9329 - 99ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.0672 - accuracy: 0.9332 - 40ms/epoch - 2ms/step
train loss, train accuracy
84/84 - 0s - loss: 0.0810 - accuracy: 0.9196 - 88ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.0793 - accuracy: 0.9210 - 38ms/epoch - 2ms/step
train loss, train accuracy
84/84 - 0s - loss: 0.0715 - accuracy: 0.9298 - 92ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.0697 - accuracy: 0.9313 - 39ms/epoch - 2ms/step
train loss, train accuracy
84/84 - 0s - loss: 0.0677 - accuracy: 0.9327 - 86ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.0675 - accuracy: 0.9326 - 38ms/epoch - 2ms/step
train loss, train accuracy
84/84 - 0s - loss: 0.0701 - accuracy: 0.9298 - 97ms/epoch - 1ms/step
validation loss, validation accuracy
21/21 - 0s - loss: 0.0692 - accuracy: 0.9305 - 40ms/epoch -

In [18]:
# train accuracy : 0.9340 // validation accuracy 0.9325

# normalize 한 값이 더 좋음

### 2. 최적의 파라미터 및 은닉층, 노드수 계산 모델

- optimizer의 경우 속도 면에서는 rmsprop, sgd가 좋았고, adam은 성능 부분에서 더 좋았음. → learning rate를 조절

In [7]:
def custom_opt2(n):
    opt = tf.keras.optimizers.Adam(learning_rate=n)
    return opt

best_lst = []

In [22]:
# a. Dropout 미사용 (순정)

# 최고의 모델 찾기 - 검증 데이터와 표준화 진행한 데이터로 성능 구현(dropout사용)
act_func = ['relu', 'tanh', 'sigmoid', 'elu']
batch_lst = [8, 32, 64, 128, 256]
opt_lst = [0.01, 0.009, 0.006, 0.003, 0.001, 0.0005]
best_accuracy = 0.0
best_hyperparams = {}
best_time = 11111.0
time_hyper = {}

for func in act_func:
    for batch in batch_lst:
        for opti in opt_lst:
            # 모델 구현
            model = Sequential()
            model.add(Dense(256, activation=func, input_dim=X_train.shape[1]))
            model.add(Dense(128, activation=func))
            model.add(Dense(64, activation=func))
            model.add(Dense(32, activation=func))
            model.add(Dense(16, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(1, activation=func))

            # 모델 컴파일
            model.compile(loss='mse', optimizer=custom_opt2(opti), metrics=[accuracy])

            # early stopping 구현 - 커스텀 정확도 기준
            early_stopping = EarlyStopping(monitor='val_accuracy', patience=10)
            start_time = time.time()
            model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose = 0)
            end_time = time.time()
            cal_time = end_time - start_time
            loss, acc = model.evaluate(X_val, y_val, verbose=2)

            if acc > best_accuracy:
                best_accuracy = acc
                best_hyperparams = {'activation': func, 'batch_size': batch, 'learning_rate': opti}

            if cal_time < best_time:
                best_time = cal_time
                time_hyper = {'activation': func, 'batch_size': batch, 'learning_rate': opti}

print('Best hyperparameters:', best_hyperparams)
print('Best validation accuracy:', best_accuracy)
best_lst.append(best_accuracy)

print('Best time:', best_time)
print('time hyper_params:', time_hyper)



21/21 - 0s - loss: 0.0074 - accuracy: 0.9334 - 38ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0077 - accuracy: 0.9352 - 37ms/epoch - 2ms/step
21/21 - 0s - loss: 0.9579 - accuracy: 0.3828 - 42ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0078 - accuracy: 0.9351 - 38ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0073 - accuracy: 0.9366 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.9579 - accuracy: 0.3828 - 42ms/epoch - 2ms/step
21/21 - 0s - loss: 0.9579 - accuracy: 0.3828 - 34ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0076 - accuracy: 0.9306 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0092 - accuracy: 0.9200 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0075 - accuracy: 0.9330 - 42ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0091 - accuracy: 0.9240 - 43ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0089 - accuracy: 0.9323 - 37ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0108 - accuracy: 0.9123 - 34ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0076 - accuracy: 0.9350 - 34ms/epoch - 2ms/step
21/21 - 0s - loss: 0.9579 - accura

In [21]:
# b. Dropout 사용

# 최고의 모델 찾기 - 검증 데이터와 표준화 진행한 데이터로 성능 구현(dropout사용)
act_func = ['relu', 'tanh', 'sigmoid', 'elu']
batch_lst = [8, 32, 64, 128, 256]
opt_lst = [0.01, 0.009, 0.006, 0.003, 0.001, 0.0005]
best_accuracy = 0.0
best_hyperparams = {}
best_time = 11111.0
time_hyper = {}

for func in act_func:
    for batch in batch_lst:
        for opti in opt_lst:
            # 모델 구현
            model = Sequential()
            model.add(Dense(256, activation=func, input_dim=X_train.shape[1]))
            model.add(Dropout(0.2)) # Dropout 추가
            model.add(Dense(128, activation=func))
            model.add(Dropout(0.2)) # Dropout 추가
            model.add(Dense(64, activation=func))
            model.add(Dropout(0.2)) # Dropout 추가
            model.add(Dense(32, activation=func))
            model.add(Dense(16, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(1, activation=func))

            # 모델 컴파일
            model.compile(loss='mse', optimizer=custom_opt2(opti), metrics=[accuracy])

            # early stopping 구현 - 커스텀 정확도 기준
            early_stopping = EarlyStopping(monitor='val_accuracy', patience=10)
            start_time = time.time()
            model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose = 0)
            end_time = time.time()
            cal_time = end_time - start_time
            loss, acc = model.evaluate(X_val, y_val, verbose=2)

            if acc > best_accuracy:
                best_accuracy = acc
                best_hyperparams = {'activation': func, 'batch_size': batch, 'learning_rate': opti}

            if cal_time < best_time:
                best_time = cal_time
                time_hyper = {'activation': func, 'batch_size': batch, 'learning_rate': opti}

print('Best hyperparameters:', best_hyperparams)
print('Best validation accuracy:', best_accuracy)
best_lst.append(best_accuracy)

print('Best time:', best_time)
print('time hyper_params:', time_hyper)



21/21 - 0s - loss: 0.0083 - accuracy: 0.9283 - 35ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0087 - accuracy: 0.9243 - 34ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0074 - accuracy: 0.9350 - 35ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0075 - accuracy: 0.9310 - 35ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0073 - accuracy: 0.9368 - 35ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0071 - accuracy: 0.9374 - 37ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0075 - accuracy: 0.9361 - 33ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0084 - accuracy: 0.9231 - 35ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0072 - accuracy: 0.9360 - 34ms/epoch - 2ms/step
21/21 - 0s - loss: 0.9579 - accuracy: 0.3828 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0154 - accuracy: 0.9074 - 34ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0245 - accuracy: 0.8764 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0084 - accuracy: 0.9340 - 33ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0075 - accuracy: 0.9365 - 34ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0072 - accura

In [23]:
# c. Dropout, batchNormalize 사용

# 최고의 모델 찾기 - 검증 데이터와 표준화 진행한 데이터로 성능 구현(dropout사용)
act_func = ['relu', 'tanh', 'sigmoid', 'elu']
batch_lst = [8, 32, 64, 128, 256]
opt_lst = [0.01, 0.009, 0.006, 0.003, 0.001, 0.0005]
best_accuracy = 0.0
best_hyperparams = {}
best_time = 11111.0
time_hyper = {}

for func in act_func:
    for batch in batch_lst:
        for opti in opt_lst:
            # 모델 구현
            model = Sequential()
            model.add(Dense(256, activation=func, input_dim=X_train.shape[1]))
            model.add(BatchNormalization())  # BatchNormalization 추가
            model.add(Dropout(0.2))
            model.add(Dense(128, activation=func))
            model.add(BatchNormalization())  # BatchNormalization 추가
            model.add(Dropout(0.2))
            model.add(Dense(64, activation=func))
            model.add(BatchNormalization())  # BatchNormalization 추가
            model.add(Dropout(0.2))
            model.add(Dense(32, activation=func))
            model.add(Dense(16, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(1, activation=func))

            # 모델 컴파일
            model.compile(loss='mse', optimizer=custom_opt2(opti), metrics=[accuracy])

            # early stopping 구현 - 커스텀 정확도 기준
            early_stopping = EarlyStopping(monitor='val_accuracy', patience=10)
            start_time = time.time()
            model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose = 0)
            end_time = time.time()
            cal_time = end_time - start_time
            loss, acc = model.evaluate(X_val, y_val, verbose=2)

            if acc > best_accuracy:
                best_accuracy = acc
                best_hyperparams = {'activation': func, 'batch_size': batch, 'learning_rate': opti}

            if cal_time < best_time:
                best_time = cal_time
                time_hyper = {'activation': func, 'batch_size': batch, 'learning_rate': opti}

print('Best hyperparameters:', best_hyperparams)
print('Best validation accuracy:', best_accuracy)
best_lst.append(best_accuracy)

print('Best time:', best_time)
print('time hyper_params:', time_hyper)



21/21 - 0s - loss: 0.0105 - accuracy: 0.9182 - 38ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0092 - accuracy: 0.9200 - 37ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0076 - accuracy: 0.9312 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0072 - accuracy: 0.9359 - 35ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0071 - accuracy: 0.9350 - 38ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0073 - accuracy: 0.9346 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0078 - accuracy: 0.9308 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0071 - accuracy: 0.9361 - 37ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0076 - accuracy: 0.9342 - 39ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0078 - accuracy: 0.9326 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0071 - accuracy: 0.9364 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0072 - accuracy: 0.9346 - 38ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0076 - accuracy: 0.9324 - 42ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0071 - accuracy: 0.9366 - 41ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0078 - accura

In [24]:
# d. 모델 구조 변경 (다이아몬드)

# 최고의 모델 찾기 - 검증 데이터와 표준화 진행한 데이터로 성능 구현(dropout사용)
act_func = ['relu', 'tanh', 'sigmoid', 'elu']
batch_lst = [8, 32, 64, 128, 256]
opt_lst = [0.01, 0.009, 0.006, 0.003, 0.001, 0.0005]
best_accuracy = 0.0
best_hyperparams = {}
best_time = 11111.0
time_hyper = {}

for func in act_func:
    for batch in batch_lst:
        for opti in opt_lst:
            # 모델 구현
            model = Sequential()
            model.add(Dense(32, activation=func, input_dim=X_train.shape[1]))
            model.add(BatchNormalization())  # BatchNormalization 추가
            model.add(Dropout(0.2))
            model.add(Dense(128, activation=func))
            model.add(BatchNormalization())  # BatchNormalization 추가
            model.add(Dropout(0.2))
            model.add(Dense(256, activation=func))
            model.add(BatchNormalization())  # BatchNormalization 추가
            model.add(Dropout(0.2))
            model.add(Dense(64, activation=func))
            model.add(Dense(32, activation=func))
            model.add(Dense(16, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(1, activation=func))

            # 모델 컴파일
            model.compile(loss='mse', optimizer=custom_opt2(opti), metrics=[accuracy])

            # early stopping 구현 - 커스텀 정확도 기준
            early_stopping = EarlyStopping(monitor='val_accuracy', patience=10)
            start_time = time.time()
            model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose = 0)
            end_time = time.time()
            cal_time = end_time - start_time
            loss, acc = model.evaluate(X_val, y_val, verbose=2)

            if acc > best_accuracy:
                best_accuracy = acc
                best_hyperparams = {'activation': func, 'batch_size': batch, 'learning_rate': opti}

            if cal_time < best_time:
                best_time = cal_time
                time_hyper = {'activation': func, 'batch_size': batch, 'learning_rate': opti}

print('Best hyperparameters:', best_hyperparams)
print('Best validation accuracy:', best_accuracy)
best_lst.append(best_accuracy)

print('Best time:', best_time)
print('time hyper_params:', time_hyper)



21/21 - 0s - loss: 0.0079 - accuracy: 0.9333 - 43ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0078 - accuracy: 0.9313 - 41ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0092 - accuracy: 0.9262 - 39ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0073 - accuracy: 0.9349 - 40ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0076 - accuracy: 0.9301 - 49ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0076 - accuracy: 0.9340 - 48ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0075 - accuracy: 0.9346 - 45ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0079 - accuracy: 0.9323 - 42ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0071 - accuracy: 0.9340 - 47ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0076 - accuracy: 0.9317 - 56ms/epoch - 3ms/step
21/21 - 0s - loss: 0.0071 - accuracy: 0.9348 - 47ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0069 - accuracy: 0.9366 - 59ms/epoch - 3ms/step
21/21 - 0s - loss: 0.0070 - accuracy: 0.9337 - 57ms/epoch - 3ms/step
21/21 - 0s - loss: 0.0071 - accuracy: 0.9355 - 55ms/epoch - 3ms/step
21/21 - 0s - loss: 0.0071 - accura

### 3. 최종 모델 선택

In [25]:
# Best hyperparameters: {'activation': 'relu', 'batch_size': 128, 'learning_rate': 0.01}
# Best validation accuracy: 0.9382919073104858
# Best time: 4.589000701904297
# time hyper_params: {'activation': 'relu', 'batch_size': 128, 'learning_rate': 0.009}

# c. Dropout, batchNormalize 사용

# 최고의 모델 찾기 - 검증 데이터와 표준화 진행한 데이터로 성능 구현(dropout사용)

act_func = ['relu']
batch_lst = [128, 256]
opt_lst = [0.01, 0.009, 0.006, 0.003, 0.001]
best_accuracy = 0.0
best_hyperparams = {}
best_time = 11111.0
time_hyper = {}

for func in act_func:
    for batch in batch_lst:
        for opti in opt_lst:
            # 모델 구현
            model = Sequential()
            model.add(Dense(256, activation=func, input_dim=X_train.shape[1]))
            model.add(BatchNormalization())  # BatchNormalization 추가
            model.add(Dropout(0.2))
            model.add(Dense(128, activation=func))
            model.add(BatchNormalization())  # BatchNormalization 추가
            model.add(Dropout(0.2))
            model.add(Dense(64, activation=func))
            model.add(BatchNormalization())  # BatchNormalization 추가
            model.add(Dropout(0.2))
            model.add(Dense(32, activation=func))
            model.add(Dense(16, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(1, activation=func))

            # 모델 컴파일
            model.compile(loss='mse', optimizer=custom_opt2(opti), metrics=[accuracy])

            # early stopping 구현 - 커스텀 정확도 기준
            early_stopping = EarlyStopping(monitor='val_accuracy', patience=10)
            start_time = time.time()
            model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose = 0)
            end_time = time.time()
            cal_time = end_time - start_time
            loss, acc = model.evaluate(X_val, y_val, verbose=2)

            if acc > best_accuracy:
                best_accuracy = acc
                best_hyperparams = {'activation': func, 'batch_size': batch, 'learning_rate': opti}

            if cal_time < best_time:
                best_time = cal_time
                time_hyper = {'activation': func, 'batch_size': batch, 'learning_rate': opti}

print('Best hyperparameters:', best_hyperparams)
print('Best validation accuracy:', best_accuracy)
best_lst.append(best_accuracy)

print('Best time:', best_time)
print('time hyper_params:', time_hyper)



21/21 - 0s - loss: 0.0069 - accuracy: 0.9369 - 36ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0077 - accuracy: 0.9293 - 38ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0069 - accuracy: 0.9367 - 37ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0071 - accuracy: 0.9371 - 37ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0069 - accuracy: 0.9368 - 37ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0083 - accuracy: 0.9338 - 38ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0068 - accuracy: 0.9378 - 38ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0069 - accuracy: 0.9378 - 39ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0077 - accuracy: 0.9353 - 42ms/epoch - 2ms/step
21/21 - 0s - loss: 0.0081 - accuracy: 0.9340 - 38ms/epoch - 2ms/step
Best hyperparameters: {'activation': 'relu', 'batch_size': 256, 'learning_rate': 0.009}
Best validation accuracy: 0.9377842545509338
Best time: 4.463998556137085
time hyper_params: {'activation': 'relu', 'batch_size': 256, 'learning_rate': 0.006}


In [42]:
# Best hyperparameters: {'activation': 'elu', 'batch_size': 128, 'learning_rate': 0.006}

model = Sequential()
model.add(Dense(256, activation='elu', input_dim=X_train.shape[1]))
model.add(BatchNormalization())  # BatchNormalization 추가
model.add(Dropout(0.2))
model.add(Dense(128, activation='elu'))
model.add(BatchNormalization())  # BatchNormalization 추가
model.add(Dropout(0.2))
model.add(Dense(64, activation='elu'))
model.add(BatchNormalization())  # BatchNormalization 추가
model.add(Dropout(0.2))
model.add(Dense(32, activation='elu'))
model.add(Dense(16, activation='elu'))
model.add(Dense(8, activation='elu'))
model.add(Dense(8, activation='elu'))
model.add(Dense(1, activation='elu'))

# 모델 컴파일
model.compile(loss='mse', optimizer=custom_opt2(0.006), metrics=[accuracy])

# early stopping 구현 - 커스텀 정확도 기준
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10)
start_time = time.time()
model.fit(X_train, y_train, epochs=1000, batch_size=128, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose = 0)
end_time = time.time()

print("==== Final Model ====")
print("걸린시간 :",end_time-start_time)
print("==== train los acc ====")
train_loss, train_acc = model.evaluate(X_train, y_train, verbose = 2)
print("==== val los acc ====")
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=2)

==== Final Model ====
걸린시간 : 7.3850014209747314
==== train los acc ====
84/84 - 0s - loss: 0.0070 - accuracy: 0.9387 - 140ms/epoch - 2ms/step
==== val los acc ====
21/21 - 0s - loss: 0.0072 - accuracy: 0.9367 - 52ms/epoch - 2ms/step


In [43]:
# Best hyperparameters: {'activation': 'relu', 'batch_size': 128, 'learning_rate': 0.009}

# 모델 구현
model = Sequential()
model.add(Dense(32, activation='relu', input_dim=X_train.shape[1]))
model.add(BatchNormalization())  # BatchNormalization 추가
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())  # BatchNormalization 추가
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())  # BatchNormalization 추가
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='relu'))

# 모델 컴파일
model.compile(loss='mse', optimizer=custom_opt2(0.009), metrics=[accuracy])

# early stopping 구현 - 커스텀 정확도 기준
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10)
start_time = time.time()
model.fit(X_train, y_train, epochs=1000, batch_size=128, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose = 0)
end_time = time.time()

print("==== Final Model ====")
print("걸린시간 :",end_time-start_time)
print("==== train los acc ====")
train_loss, train_acc = model.evaluate(X_train, y_train, verbose = 2)
print("==== val los acc ====")
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=2)

==== Final Model ====
걸린시간 : 5.8289971351623535
==== train los acc ====
84/84 - 0s - loss: 0.0069 - accuracy: 0.9380 - 127ms/epoch - 2ms/step
==== val los acc ====
21/21 - 0s - loss: 0.0072 - accuracy: 0.9347 - 61ms/epoch - 3ms/step
