### 데이터 불러오기
1. SMOTE로 진행한 데이터가 비교적 높은 성능을 보였지만 f1_score, precision, 등 다소 아쉬운 모습을 보임.

2. 진행은 Basic 데이터에 class weight만 부여하고 Standard Scaler를 통해 처리후 진행
 : normalize 진행시 수치가 감소하는 모습을 확인함

In [21]:
import time
import tensorflow as tf
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from keras.layers import BatchNormalization
from keras.layers import Dropout
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer

In [30]:
# 새로운 데이터 (EDA)과정 진행 후 데이터
X_train = pd.read_csv('csv/Binary_to_share/Binary_to_share/Basic/X_train.csv')
X_test = pd.read_csv('csv/Binary_to_share/Binary_to_share/Basic/X_test.csv')
X_val = pd.read_csv('csv/Binary_to_share/Binary_to_share/Basic/X_val.csv')
y_train = pd.read_csv('csv/Binary_to_share/Binary_to_share/Basic/y_train.csv')
y_test = pd.read_csv('csv/Binary_to_share/Binary_to_share/Basic/y_test.csv')
y_val = pd.read_csv('csv/Binary_to_share/Binary_to_share/Basic/y_val.csv')

In [31]:
# 데이터 표준화
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

In [32]:
y_train = y_train['target_class']
y_test = y_test['target_class']
y_val = y_val['target_class']

In [33]:
# 클래스 가중치 계산
class_weight = {0: 1., 1: (len(y_train) / sum(y_train))}
print(class_weight)

{0: 1.0, 1: 10.428571428571429}


### 모델 구현

- 앞서 처리된 데이터를 모델로 돌려보면서 accuracy를 기준으로 early_stopping을 하면 val_accuracy 기준보다 오래걸리지만 정확도가 더 높아지는 모습을 보임.
 : 두개 다 확인해보기

In [34]:
# 복습한 내용으로 추가적인 진행 - adam 사용
def custom_opt(n):
    opt = tf.keras.optimizers.Adam(learning_rate=n)
    return opt


# 변수 리스트 생성
act_func = ['relu', 'elu', 'tanh', 'sigmoid']
batch_lst =[32, 64, 128, 256]
best_accuracy = 0.0
best_hyperparams = {}
lr_lst = [0.009, 0.006, 0.003, 0.001, 0.0005]
best_time = 11111.0
time_hyper = {}


best_metric = 0.0
metric_hyper = {}


# dropout, 배치 정규화 추가
# 모델 구현
for func in act_func:
    for i in lr_lst:
        for batch in batch_lst:
            metrics_lst = []

            # model구현
            model = Sequential()
            model.add(Dense(256, activation=func, input_shape=(X_train.shape[1],)))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가
            model.add(Dense(128, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가              
            model.add(Dense(64, activation=func))
            model.add(Dense(32, activation=func))
            model.add(Dense(16, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(1, activation=func))

            # 모델 컴파일
            model.compile(optimizer=custom_opt(i), loss='binary_crossentropy', metrics=['accuracy'])

            # Early stopping 기능 추가
            early_stopping = EarlyStopping(patience=10, monitor='val_accuracy')
            start_time = time.time()

        
            model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], class_weight=class_weight, verbose = 0)
            tmp = class_weight
         


            end_time = time.time()
            long_time = end_time - start_time
            if long_time < best_time:
                best_time = long_time
                time_hyper = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}


            # 모델 평가
            y_pred = model.predict(X_val)
            y_pred_binary = [1 if pred > 0.5 else 0 for pred in y_pred]
            metrics_lst.append(f1_score(y_val, y_pred_binary))
            metrics_lst.append(precision_score(y_val, y_pred_binary))
            metrics_lst.append(recall_score(y_val, y_pred_binary))
            mean_met = np.mean(metrics_lst)

            if mean_met > best_metric:
                best_metric = mean_met
                metric_hyper = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}

            loss, acc = model.evaluate(X_val, y_val, verbose = 2)

            if acc > best_accuracy:
                best_accuracy = acc
                best_hyperparams = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}

print('Best hyperparameters:', best_hyperparams)
print('Best validation accuracy:', best_accuracy)
print('Best time:', best_time)
print('Best time_hyperparams:', time_hyper)
print('Best metric:', best_metric)
print('Best metric_hyperparams:', metric_hyper)


85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 95ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.7246 - accuracy: 0.0960 - 109ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2471 - accuracy: 0.9581 - 114ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2306 - accuracy: 0.9743 - 94ms/epoch - 1ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 94ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.4778 - accuracy: 0.7397 - 118ms/epoch - 1ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 101ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.2802 - accuracy: 0.9467 - 94ms/epoch - 1ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 94ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.5636 - accuracy: 0.9632 - 94ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2013 - accuracy: 0.9721 - 109ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2059 - accuracy: 0.9588 - 88ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2146 - accuracy: 0.9658 - 104ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1776 - accuracy: 0.9610 - 94ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1758 - accuracy: 0.9654 - 95ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1788 - accuracy: 0.9647 - 94ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2009 - accuracy: 0.9632 - 91ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2325 - accuracy: 0.9603 - 82ms/epoch - 960us/step
85/85 - 0s - loss: 0.1744 - accuracy: 0.9695 - 96ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2130 - accuracy: 0.9629 - 94ms/epoch - 1ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 94ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 1.3314 - accuracy: 0.9129 - 109ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2539 - accuracy: 0.9397 - 105ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2832 - accuracy: 0.9368 - 116ms/epoch - 1ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 94ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 117ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.4661 - accuracy: 0.8033 - 102ms/epoch - 1ms/step
85/85 - 0s - loss: 0.6990 - accuracy: 0.9364 - 104ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2165 - accuracy: 0.9629 - 94ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2440 - accuracy: 0.9706 - 113ms/epoch - 1ms/step
85/85 - 0s - loss: 0.3353 - accuracy: 0.9158 - 109ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1947 - accuracy: 0.9746 - 104ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2369 - accuracy: 0.9691 - 96ms/epoch - 1ms/step
85/85 - 0s - loss: 0.3119 - accuracy: 0.8559 - 109ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2961 - accuracy: 0.9004 - 106ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2477 - accuracy: 0.9647 - 105ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2134 - accuracy: 0.9504 - 97ms/epoch - 1ms/step
85/85 - 0s - loss: 0.3570 - accuracy: 0.9026 - 100ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2171 - accuracy: 0.9607 - 102ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2581 - accuracy: 0.9390 - 104ms/epoch - 1ms/step
85/85 - 0s - loss: 0.28

In [39]:
# 출력층 함수를 sigmoid로 진행하면 높아질지 확인
# 복습한 내용으로 추가적인 진행 - adam 사용
def custom_opt(n):
    opt = tf.keras.optimizers.Adam(learning_rate=n)
    return opt


# 변수 리스트 생성
act_func = ['relu', 'elu', 'tanh', 'sigmoid']
batch_lst =[32, 64, 128, 256]
best_accuracy = 0.0
best_hyperparams = {}
lr_lst = [0.009, 0.006, 0.003, 0.001, 0.0005]
best_time = 11111.0
time_hyper = {}


best_metric = 0.0
metric_hyper = {}


# dropout, 배치 정규화 추가
# 모델 구현
for func in act_func:
    for i in lr_lst:
        for batch in batch_lst:
            metrics_lst = []

            # model구현
            model = Sequential()
            model.add(Dense(256, activation=func, input_shape=(X_train.shape[1],)))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가
            model.add(Dense(128, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가              
            model.add(Dense(64, activation=func))
            model.add(Dense(32, activation=func))
            model.add(Dense(16, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(1, activation='sigmoid'))

            # 모델 컴파일
            model.compile(optimizer=custom_opt(i), loss='binary_crossentropy', metrics=['accuracy'])

            # Early stopping 기능 추가
            early_stopping = EarlyStopping(patience=10, monitor='val_accuracy')
            start_time = time.time()

        
            model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], class_weight=class_weight, verbose = 0)
            tmp = class_weight
         


            end_time = time.time()
            long_time = end_time - start_time
            if long_time < best_time:
                best_time = long_time
                time_hyper = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}


            # 모델 평가
            y_pred = model.predict(X_val)
            y_pred_binary = [1 if pred > 0.5 else 0 for pred in y_pred]
            metrics_lst.append(f1_score(y_val, y_pred_binary))
            metrics_lst.append(precision_score(y_val, y_pred_binary))
            metrics_lst.append(recall_score(y_val, y_pred_binary))
            mean_met = np.mean(metrics_lst)

            if mean_met > best_metric:
                best_metric = mean_met
                metric_hyper = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}

            loss, acc = model.evaluate(X_val, y_val, verbose = 2)

            if acc > best_accuracy:
                best_accuracy = acc
                best_hyperparams = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}

print('Best hyperparameters:', best_hyperparams)
print('Best validation accuracy:', best_accuracy)
print('Best time:', best_time)
print('Best time_hyperparams:', time_hyper)
print('Best metric:', best_metric)
print('Best metric_hyperparams:', metric_hyper)


85/85 - 0s - loss: 0.1359 - accuracy: 0.9647 - 138ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2235 - accuracy: 0.9371 - 124ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1331 - accuracy: 0.9695 - 142ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1378 - accuracy: 0.9625 - 125ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1409 - accuracy: 0.9629 - 127ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1619 - accuracy: 0.9496 - 176ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1334 - accuracy: 0.9610 - 154ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1419 - accuracy: 0.9636 - 150ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1347 - accuracy: 0.9621 - 155ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1370 - accuracy: 0.9596 - 173ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1880 - accuracy: 0.9312 - 138ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1436 - accuracy: 0.9691 - 140ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1643 - accuracy: 0.9592 - 137ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1725 - accuracy: 0.9551 - 152ms/epoch - 2ms/step
85/85 - 0s - loss: 0

In [37]:
# 복습한 내용으로 추가적인 진행 - adam 사용 -accuracy
def custom_opt(n):
    opt = tf.keras.optimizers.Adam(learning_rate=n)
    return opt


# 변수 리스트 생성
act_func = ['relu', 'elu', 'tanh', 'sigmoid']
batch_lst =[32, 64, 128, 256]
best_accuracy = 0.0
best_hyperparams = {}
lr_lst = [0.009, 0.006, 0.003, 0.001, 0.0005]
best_time = 11111.0
time_hyper = {}


best_metric = 0.0
metric_hyper = {}


# dropout, 배치 정규화 추가
# 모델 구현
for func in act_func:
    for i in lr_lst:
        for batch in batch_lst:
            metrics_lst = []

            # model구현
            model = Sequential()
            model.add(Dense(256, activation=func, input_shape=(X_train.shape[1],)))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가
            model.add(Dense(128, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가              
            model.add(Dense(64, activation=func))
            model.add(Dense(32, activation=func))
            model.add(Dense(16, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(1, activation=func))

            # 모델 컴파일
            model.compile(optimizer=custom_opt(i), loss='binary_crossentropy', metrics=['accuracy'])

            # Early stopping 기능 추가 - accuracy 기준으로 바꿔서 확인
            early_stopping = EarlyStopping(patience=10, monitor='accuracy')
            start_time = time.time()

        
            model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], class_weight=class_weight, verbose = 0)
            tmp = class_weight
         


            end_time = time.time()
            long_time = end_time - start_time
            if long_time < best_time:
                best_time = long_time
                time_hyper = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}


            # 모델 평가
            y_pred = model.predict(X_val)
            y_pred_binary = [1 if pred > 0.5 else 0 for pred in y_pred]
            metrics_lst.append(f1_score(y_val, y_pred_binary))
            metrics_lst.append(precision_score(y_val, y_pred_binary))
            metrics_lst.append(recall_score(y_val, y_pred_binary))
            mean_met = np.mean(metrics_lst)

            if mean_met > best_metric:
                best_metric = mean_met
                metric_hyper = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}

            loss, acc = model.evaluate(X_val, y_val, verbose = 2)

            if acc > best_accuracy:
                best_accuracy = acc
                best_hyperparams = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}

print('Best hyperparameters:', best_hyperparams)
print('Best validation accuracy:', best_accuracy)
print('Best time:', best_time)
print('Best time_hyperparams:', time_hyper)
print('Best metric:', best_metric)
print('Best metric_hyperparams:', metric_hyper)


85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 104ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 115ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.3602 - accuracy: 0.9290 - 132ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2255 - accuracy: 0.9684 - 109ms/epoch - 1ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 112ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 142ms/epoch - 2ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.1862 - accuracy: 0.9699 - 133ms/epoch - 2ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 127ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.2762 - accuracy: 0.9614 - 117ms/epoch - 1ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 143ms/epoch - 2ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.2229 - accuracy: 0.9728 - 107ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1721 - accuracy: 0.9710 - 117ms/epoch - 1ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 120ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.1185 - accuracy: 0.9732 - 115ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2378 - accuracy: 0.9596 - 125ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1850 - accuracy: 0.9632 - 126ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2103 - accuracy: 0.9533 - 105ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1809 - accuracy: 0.9651 - 120ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1795 - accuracy: 0.9654 - 129ms/epoch - 2ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 126ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 111ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 5.7355 - accuracy: 0.6239 - 131ms/epoch - 2ms/step
85/85 - 0s - loss: 0.5015 - accuracy: 0.9673 - 128ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2694 - accuracy: 0.9614 - 126ms/epoch - 1ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 117ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.4788 - accuracy: 0.9688 - 124ms/epoch - 1ms/step
85/85 - 0s - loss: 1.1366 - accuracy: 0.9250 - 133ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2269 - accuracy: 0.9482 - 122ms/epoch - 1ms/step
85/85 - 0s - loss: 1.4801 - accuracy: 0.9040 - 125ms/epoch - 1ms/step


  _warn_prf(average, modifier, msg_start, len(result))


85/85 - 0s - loss: 0.4852 - accuracy: 0.9665 - 118ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2772 - accuracy: 0.9357 - 128ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2772 - accuracy: 0.9522 - 118ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2121 - accuracy: 0.9599 - 137ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1999 - accuracy: 0.9566 - 134ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2663 - accuracy: 0.8971 - 140ms/epoch - 2ms/step
85/85 - 0s - loss: 0.5102 - accuracy: 0.8224 - 132ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2087 - accuracy: 0.9349 - 119ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2394 - accuracy: 0.9577 - 131ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1953 - accuracy: 0.9482 - 114ms/epoch - 1ms/step
85/85 - 0s - loss: 0.2496 - accuracy: 0.9456 - 139ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2593 - accuracy: 0.9673 - 130ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1848 - accuracy: 0.9721 - 129ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2778 - accuracy: 0.9537 - 141ms/epoch - 2ms/step
85/85 - 0s - loss: 0

In [43]:
# 복습한 내용으로 추가적인 진행 - adam 사용

def custom_opt(n):
    opt = tf.keras.optimizers.Adam(learning_rate=n)
    return opt


# 변수 리스트 생성
act_func = ['relu', 'elu', 'tanh']
batch_lst =[32, 64, 256]
best_accuracy = 0.0
best_hyperparams = {}
lr_lst = [0.009, 0.006, 0.003, 0.001]
best_time = 11111.0
time_hyper = {}


best_metric = 0.0
metric_hyper = {}


# dropout, 배치 정규화 추가
# 모델 구현 - 다이아몬드 형
for func in act_func:
    for i in lr_lst:
        for batch in batch_lst:
            metrics_lst = []

            # model구현
            model = Sequential()
            model.add(Dense(256, activation=func, input_shape=(X_train.shape[1],)))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가
            model.add(Dense(512, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가              
            model.add(Dense(128, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가 
            model.add(Dense(32, activation=func))
            model.add(Dense(16, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(8, activation=func))
            model.add(Dense(1, activation='sigmoid'))

            # 모델 컴파일
            model.compile(optimizer=custom_opt(i), loss='binary_crossentropy', metrics=['accuracy'])

            # Early stopping 기능 추가 
            early_stopping = EarlyStopping(patience=10, monitor='val_accuracy')
            start_time = time.time()

        
            model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], class_weight=class_weight, verbose = 0)
            tmp = class_weight
         


            end_time = time.time()
            long_time = end_time - start_time
            if long_time < best_time:
                best_time = long_time
                time_hyper = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}


            # 모델 평가
            y_pred = model.predict(X_val)
            y_pred_binary = [1 if pred > 0.5 else 0 for pred in y_pred]
            metrics_lst.append(f1_score(y_val, y_pred_binary))
            metrics_lst.append(precision_score(y_val, y_pred_binary))
            metrics_lst.append(recall_score(y_val, y_pred_binary))
            mean_met = np.mean(metrics_lst)

            if mean_met > best_metric:
                best_metric = mean_met
                metric_hyper = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}

            loss, acc = model.evaluate(X_val, y_val, verbose = 2)

            if acc > best_accuracy:
                best_accuracy = acc
                best_hyperparams = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}

print('Best hyperparameters:', best_hyperparams)
print('Best validation accuracy:', best_accuracy)
print('Best time:', best_time)
print('Best time_hyperparams:', time_hyper)
print('Best metric:', best_metric)
print('Best metric_hyperparams:', metric_hyper)


85/85 - 0s - loss: 0.1684 - accuracy: 0.9551 - 137ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2027 - accuracy: 0.9735 - 154ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1246 - accuracy: 0.9713 - 138ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1229 - accuracy: 0.9640 - 127ms/epoch - 1ms/step
85/85 - 0s - loss: 0.1502 - accuracy: 0.9563 - 136ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1206 - accuracy: 0.9691 - 135ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1483 - accuracy: 0.9640 - 152ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1680 - accuracy: 0.9504 - 155ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1753 - accuracy: 0.9419 - 150ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1384 - accuracy: 0.9684 - 136ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1258 - accuracy: 0.9684 - 148ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1373 - accuracy: 0.9632 - 139ms/epoch - 2ms/step
85/85 - 0s - loss: 0.2338 - accuracy: 0.9368 - 146ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1403 - accuracy: 0.9629 - 148ms/epoch - 2ms/step
85/85 - 0s - loss: 0

In [42]:
# 복습한 내용으로 추가적인 진행 - adam 사용
def custom_opt(n):
    opt = tf.keras.optimizers.Adam(learning_rate=n)
    return opt


# 변수 리스트 생성
act_func = ['relu', 'elu', 'tanh']
batch_lst =[32, 64, 256]
best_accuracy = 0.0
best_hyperparams = {}
lr_lst = [0.009, 0.006, 0.003, 0.001]
best_time = 11111.0
time_hyper = {}


best_metric = 0.0
metric_hyper = {}


# dropout, 배치 정규화 추가
# 모델 구현 - 다이아몬드 형
for func in act_func:
    for i in lr_lst:
        for batch in batch_lst:
            metrics_lst = []

            # model구현
            model = Sequential()
            model.add(Dense(512, activation=func, input_shape=(X_train.shape[1],)))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가
            model.add(Dense(256, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가              
            model.add(Dense(128, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가 
            model.add(Dense(32, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가 
            model.add(Dense(16, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가
            model.add(Dense(8, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가
            model.add(Dense(8, activation=func))
            model.add(BatchNormalization()) # 배치 정규화 추가
            model.add(Dropout(0.2))  # Dropout 추가
            model.add(Dense(1, activation='sigmoid'))

            # 모델 컴파일
            model.compile(optimizer=custom_opt(i), loss='binary_crossentropy', metrics=['accuracy'])

            # Early stopping 기능 추가 - accuracy 기준으로 바꿔서 확인
            early_stopping = EarlyStopping(patience=10, monitor='accuracy')
            start_time = time.time()

        
            model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], class_weight=class_weight, verbose = 0)
            tmp = class_weight
         


            end_time = time.time()
            long_time = end_time - start_time
            if long_time < best_time:
                best_time = long_time
                time_hyper = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}


            # 모델 평가
            y_pred = model.predict(X_val)
            y_pred_binary = [1 if pred > 0.5 else 0 for pred in y_pred]
            metrics_lst.append(f1_score(y_val, y_pred_binary))
            metrics_lst.append(precision_score(y_val, y_pred_binary))
            metrics_lst.append(recall_score(y_val, y_pred_binary))
            mean_met = np.mean(metrics_lst)

            if mean_met > best_metric:
                best_metric = mean_met
                metric_hyper = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}

            loss, acc = model.evaluate(X_val, y_val, verbose = 2)

            if acc > best_accuracy:
                best_accuracy = acc
                best_hyperparams = {'activation': func, 'learning_rate': i, 'X_train': tmp, 'batch': batch}

print('Best hyperparameters:', best_hyperparams)
print('Best validation accuracy:', best_accuracy)
print('Best time:', best_time)
print('Best time_hyperparams:', time_hyper)
print('Best metric:', best_metric)
print('Best metric_hyperparams:', metric_hyper)


85/85 - 0s - loss: 0.2221 - accuracy: 0.9268 - 184ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1201 - accuracy: 0.9706 - 159ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1080 - accuracy: 0.9739 - 142ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1463 - accuracy: 0.9724 - 167ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1282 - accuracy: 0.9603 - 165ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1267 - accuracy: 0.9632 - 142ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1527 - accuracy: 0.9691 - 132ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1354 - accuracy: 0.9647 - 195ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1425 - accuracy: 0.9643 - 179ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1860 - accuracy: 0.9592 - 155ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1532 - accuracy: 0.9529 - 151ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1609 - accuracy: 0.9585 - 135ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1630 - accuracy: 0.9710 - 195ms/epoch - 2ms/step
85/85 - 0s - loss: 0.1379 - accuracy: 0.9654 - 191ms/epoch - 2ms/step
85/85 - 0s - loss: 0

### 최종 모델

- Best hyperparameters: {'activation': 'tanh', 'learning_rate': 0.009, 'X_train': {0: 1.0, 1: 10.428571428571429}, 'batch': 32}
- Best validation accuracy: 0.9750000238418579
- Best time: 5.428201913833618
- Best time_hyperparams: {'activation': 'tanh', 'learning_rate': 0.003, 'X_train': {0: 1.0, 1: 10.428571428571429}, 'batch': 256}
- Best metric: 0.865815916262915
- Best metric_hyperparams: {'activation': 'tanh', 'learning_rate': 0.009, 'X_train': {0: 1.0, 1: 10.428571428571429}, 'batch': 32}

----------------------------------

최종적으로 평가지표 수치들이 감소한 것으로 확인
SMOTE 진행한 데이터가 더 신뢰도가 높음

In [45]:
func = 'tanh'
i = 0.009
batch = 32

# model구현
model = Sequential()
model.add(Dense(256, activation=func, input_shape=(X_train.shape[1],)))
model.add(BatchNormalization()) # 배치 정규화 추가
model.add(Dropout(0.2))  # Dropout 추가
model.add(Dense(128, activation=func))
model.add(BatchNormalization()) # 배치 정규화 추가
model.add(Dropout(0.2))  # Dropout 추가              
model.add(Dense(64, activation=func))
model.add(Dense(32, activation=func))
model.add(Dense(16, activation=func))
model.add(Dense(8, activation=func))
model.add(Dense(8, activation=func))
model.add(Dense(1, activation='sigmoid'))

# 모델 컴파일
model.compile(optimizer=custom_opt(i), loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping 기능 추가
early_stopping = EarlyStopping(patience=10, monitor='val_accuracy')
start_time = time.time()


model.fit(X_train, y_train, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], class_weight=class_weight, verbose = 0)
tmp = class_weight

end_time = time.time()
long_time = end_time - start_time

# 모델 평가
y_pred = model.predict(X_train)
y_pred_binary = [1 if pred > 0.5 else 0 for pred in y_pred]
metrics_lst.append(f1_score(y_train, y_pred_binary))
metrics_lst.append(precision_score(y_train, y_pred_binary))
metrics_lst.append(recall_score(y_train, y_pred_binary))
mean_met = np.mean(metrics_lst)

print("train_loss, train_accuracy")
train_loss, train_cc = model.evaluate(X_train, y_train, verbose = 2)

print("val_loss, val_accuracy")
val_loss, val_acc = model.evaluate(X_val, y_val, verbose = 2)

print("걸린시간 :", long_time)

print("=== train set ===")
print('f1_score:', f1_score(y_train, y_pred_binary))
print('precision_score:', precision_score(y_train, y_pred_binary))
print('recall_score', recall_score(y_train, y_pred_binary))

y_pred = model.predict(X_val)
y_pred_binary = [1 if pred > 0.5 else 0 for pred in y_pred]
print("=== val set ===")
print('f1_score:', f1_score(y_val, y_pred_binary))
print('precision_score:', precision_score(y_val, y_pred_binary))
print('recall_score', recall_score(y_val, y_pred_binary))

train_loss, train_accuracy
340/340 - 1s - loss: 0.1790 - accuracy: 0.9496 - 512ms/epoch - 2ms/step
val_loss, val_accuracy
85/85 - 0s - loss: 0.1920 - accuracy: 0.9393 - 107ms/epoch - 1ms/step
걸린시간 : 20.106853723526
=== train set ===
f1_score: 0.7802726543704891
precision_score: 0.6705720192970366
recall_score 0.9328859060402684
=== val set ===
f1_score: 0.7393364928909952
precision_score: 0.6290322580645161
recall_score 0.896551724137931


### 최종 모델 (data set 수정)

In [47]:
# 새로운 데이터 (EDA)과정 진행 후 데이터
X_train_smote = pd.read_csv('csv/Binary_to_share/Binary_to_share/Minmax+Standard/X_train_MS_smote.csv')
X_train_adasyn = pd.read_csv('csv/Binary_to_share/Binary_to_share/Minmax+Standard/X_train_MS_adasyn.csv')
X_test = pd.read_csv('csv/Binary_to_share/Binary_to_share/Minmax+Standard/X_test_MS.csv')
X_val = pd.read_csv('csv/Binary_to_share/Binary_to_share/Minmax+Standard/X_val_MS.csv')
y_train_smote = pd.read_csv('csv/Binary_to_share/Binary_to_share/Minmax+Standard/y_train_MS_smote.csv')
y_train_adasyn = pd.read_csv('csv/Binary_to_share/Binary_to_share/Minmax+Standard/y_train_MS_adasyn.csv')
y_test = pd.read_csv('csv/Binary_to_share/Binary_to_share/Minmax+Standard/y_test.csv')
y_val = pd.read_csv('csv/Binary_to_share/Binary_to_share/Minmax+Standard/y_val.csv')

y_train_smote = y_train_smote['target_class']
y_test = y_test['target_class']
y_val = y_val['target_class']
# 클래스 가중치 계산
class_weight1 = {0: 1., 1: (len(y_train_smote) / sum(y_train_smote))}


In [48]:
# Best hyperparameters: {'activation': 'relu', 'learning_rate': 0.003, 'X_train': {0: 1.0, 1: 2.0}, 'batch': 64}
# Best validation accuracy: 0.9753676652908325
# Best time: 8.609997987747192
# Best time_hyperparams: {'activation': 'elu', 'learning_rate': 0.009, 'X_train': {0: 1.0, 1: 2.0}, 'batch': 256}
# Best metric: 0.8658478183402076
# Best metric_hyperparams: {'activation': 'relu', 'learning_rate': 0.003, 'X_train': {0: 1.0, 1: 2.0}, 'batch': 64}

# 복습한 내용으로 추가적인 진행 - adam 사용, smote
def custom_opt(n):
    opt = tf.keras.optimizers.Adam(learning_rate=n)
    return opt

# 변수 리스트 생성
func = 'relu'
batch = 64
i = 0.003

# model 구현
model = Sequential()
model.add(Dense(256, activation=func, input_shape=(X_train_smote.shape[1],)))
model.add(BatchNormalization()) # 배치 정규화 추가
model.add(Dropout(0.2))  # Dropout 추가
model.add(Dense(128, activation=func))
model.add(BatchNormalization()) # 배치 정규화 추가
model.add(Dropout(0.2))  # Dropout 추가              
model.add(Dense(64, activation=func))
model.add(Dense(32, activation=func))
model.add(Dense(16, activation=func))
model.add(Dense(8, activation=func))
model.add(Dense(8, activation=func))
model.add(Dense(1, activation='sigmoid')) # 출력층을 sigmoid , 또는 relu사용 가능


# 모델 컴파일
model.compile(optimizer=custom_opt(i), loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping 기능 추가
early_stopping = EarlyStopping(patience=10, monitor='val_accuracy')
start_time = time.time()

# 모델 적용
model.fit(X_train_smote, y_train_smote, epochs=1000, batch_size=batch, validation_data=(X_val, y_val), callbacks=[early_stopping], class_weight=class_weight1, verbose = 0)

end_time = time.time()
long_time = end_time - start_time



# 모델 평가
y_pred = model.predict(X_train_smote)
y_pred_binary = [1 if pred > 0.5 else 0 for pred in y_pred]
metrics_lst.append(f1_score(y_train_smote, y_pred_binary))
metrics_lst.append(precision_score(y_train_smote, y_pred_binary))
metrics_lst.append(recall_score(y_train_smote, y_pred_binary))
mean_met = np.mean(metrics_lst)

print("train_loss, train_accuracy")
train_loss, train_cc = model.evaluate(X_train_smote, y_train_smote, verbose = 2)

print("val_loss, val_accuracy")
val_loss, val_acc = model.evaluate(X_val, y_val, verbose = 2)

print("걸린시간 :", long_time)

print("=== train set ===")
print('f1_score:', f1_score(y_train_smote, y_pred_binary))
print('precision_score:', precision_score(y_train_smote, y_pred_binary))
print('recall_score', recall_score(y_train_smote, y_pred_binary))

y_pred = model.predict(X_test)
y_pred_binary = [1 if pred > 0.5 else 0 for pred in y_pred]
print("=== test set ===")
print('f1_score:', f1_score(y_test, y_pred_binary))
print('precision_score:', precision_score(y_test, y_pred_binary))
print('recall_score', recall_score(y_test, y_pred_binary))


train_loss, train_accuracy
615/615 - 1s - loss: 0.1074 - accuracy: 0.9596 - 638ms/epoch - 1ms/step
val_loss, val_accuracy
85/85 - 0s - loss: 0.1588 - accuracy: 0.9482 - 113ms/epoch - 1ms/step
걸린시간 : 17.10420846939087
=== train set ===
f1_score: 0.9598218803764802
precision_score: 0.9552780016116036
recall_score 0.9644091925971121
=== test set ===
f1_score: 0.8058510638297873
precision_score: 0.7112676056338029
recall_score 0.9294478527607362
