# 개요 

Credit Card Fraud detection 과제 중 Kaggle에서 Tuner 관련 글을 보고 실습해봅니다<br>
* 링크 : [https://www.kaggle.com/code/suvroo/advance-hyperparameter-optimization-in-ml-and-dl#Keras-Tuner-](https://www.kaggle.com/code/suvroo/advance-hyperparameter-optimization-in-ml-and-dl#Keras-Tuner-)

# 실습

## 데이터 전처리

In [None]:
import pandas as pd
import sqlite3
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

# Create a connection to the SQLite database
conn = sqlite3.connect('creditcard.db')

# Read the data from the database into a pandas DataFrame
df = pd.read_sql_query("SELECT * FROM creditcard", conn)

# Close the connection
conn.close()

# 거래액 0인 값 제거
df_filtered1 = df[df['Amount'] != 0].copy()
df_filtered1

# Dataset 나누고 Scaler 적용
df_x = df_filtered1.drop(['Time', 'Class'], axis=1).copy()
df_y = df_filtered1['Class'].copy()

scaler_minmax = MinMaxScaler()
df_x_scaled = scaler_minmax.fit_transform(df_x)

scaler_std = StandardScaler()
df_x_scaled = scaler_std.fit_transform(df_x_scaled)

# train + test
x_train, x_test = train_test_split(df_x_scaled, test_size=0.3)
y_train, y_test = train_test_split(df_y, test_size=0.3)

# train + validation
x_train, x_validate = train_test_split(x_train, test_size=0.3)
y_train, y_validate = train_test_split(y_train, test_size=0.3)

## Keras Tuner - Optimizer 설정 예제

In [None]:
import tensorflow as tf
import keras_tuner as kt

In [None]:
def build_model(hp):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Input((29,1)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(1, activation='sigmoid') # 이진분류이므로 Sigmoid사용
    ])

    # For optimizer
    optimizer=hp.Choice('optimizer',values=['Nadam','adam','sgd','rmsprop', 'Ftrl','Adamax','adadelta','Adagrad'])
     
    model.compile(optimizer=optimizer, loss='binary_crossentropy',metrics=['F1Score'])
    
    return model

tuner=kt.RandomSearch(build_model,
                     objective=kt.Objective('val_loss', direction='min'),
                     overwrite=True,
                     max_trials=9)

tuner.search(x_train,y_train,epochs=5,validation_data=(x_test,y_test))

Trial 2 Complete [00h 00m 56s]
val_F1Score: 0.002878308529034257

Best val_F1Score So Far: 0.002878308529034257
Total elapsed time: 00h 01m 57s


* Best Optimizer 출력

In [None]:
tuner.get_best_hyperparameters()[0].values

{'optimizer': 'Nadam'}

## Keras Tuner - Neuron 수 설정 예제

In [None]:
def build_model(hp):
    model = tf.keras.models.Sequential()

    # For Neurons
    units=hp.Int('units',min_value=5,max_value=150,step=5)

    model.add(tf.keras.layers.Input((29,1)))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(units, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.2))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # 이진분류이므로 Sigmoid사용

    model.compile(optimizer='Nadam', loss='binary_crossentropy',metrics=['F1Score'])
    
    return model

tuner=kt.RandomSearch(build_model,
                     objective=kt.Objective('val_loss', direction='min'),# accuracy 미사용
                     overwrite=True,
                     max_trials=9,
                     project_name='randomsearch_neuron')

tuner.search(x_train,y_train,epochs=10,validation_data=(x_test,y_test))

Trial 9 Complete [00h 00m 44s]
val_loss: 0.012561993673443794

Best val_loss So Far: 0.012475043535232544
Total elapsed time: 00h 06m 42s


* Best Neuron의 수 출력

In [None]:
tuner.get_best_hyperparameters()[0].values

{'units': 130}

## Keras Tuner - Hidden layer수 설정 예제

In [None]:
def build_model(hp):
    model = tf.keras.models.Sequential()

    model.add(tf.keras.layers.Input((29,1)))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(130, activation='relu'))

    # For Hidden Layers
    for i in range(hp.Int('number_of_layers',min_value=1,max_value=10)):
        model.add(tf.keras.layers.Dense(130,activation='relu'))

    model.add(tf.keras.layers.Dropout(0.2))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # 이진분류이므로 Sigmoid사용

    model.compile(optimizer='Adamax', loss='binary_crossentropy',metrics=['F1Score'])
    return model

tuner=kt.RandomSearch(build_model,
                     objective=kt.Objective('val_loss', direction='min'),# accuracy 미사용
                     overwrite=True,
                     max_trials=9,
                     project_name='randomsearch_hidden_layer')

tuner.search(x_train,y_train,epochs=10,validation_data=(x_test,y_test))

Trial 9 Complete [00h 01m 35s]
val_loss: 0.012566009536385536

Best val_loss So Far: 0.012515497393906116
Total elapsed time: 00h 10m 45s


* Best Hidden Layer의 수 출력

In [None]:
tuner.get_best_hyperparameters()[0].values

{'number_of_layers': 8}

## Keras Tuner - Optimizer, Neuron, Hidden layer 등 동시설정 예제

In [None]:
def build_model(hp):
    model = tf.keras.models.Sequential()

    # Input & Flatten
    model.add(tf.keras.layers.Input((29,1)))
    model.add(tf.keras.layers.Flatten())

    # Hidden Layers
    for i in range(hp.Int('num_layers',min_value=1,max_value=20)):

        # For Dense
        units = hp.Int('units',min_value=5,max_value=150,step=5) # For Neurons
        activation = hp.Choice('activation'+str(i),values=['relu','elu']) # For Activation

        model.add(tf.keras.layers.Dense(units, activation=activation))

        # For Dropout
        dropout_rate = hp.Choice('dropout'+str(i),values=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9])

        model.add(tf.keras.layers.Dropout(dropout_rate))

    model.add(tf.keras.layers.Dense(1, activation='sigmoid')) # 이진분류이므로 Sigmoid사용
        
    
    optimizer=hp.Choice('optimizer',values=['Nadam'])
    model.compile(optimizer=optimizer, loss='binary_crossentropy',metrics=['F1Score'])
    
    return model

with tf.device('/device:GPU:0'):
    tuner=kt.RandomSearch(build_model,
                        objective=kt.Objective('val_F1Score', direction='max'),# accuracy 미사용
                        overwrite=True,
                        max_trials=9,
                        project_name='randomsearch_model')


    tuner.search(x_train,y_train,epochs=10,validation_data=(x_test,y_test))

Trial 9 Complete [00h 01m 10s]
val_F1Score: 0.0028702165000140667

Best val_F1Score So Far: 0.0028861388564109802
Total elapsed time: 00h 18m 28s


In [None]:
tuner.get_best_hyperparameters()[0].values

{'num_layers': 12,
 'units': 90,
 'activation0': 'relu',
 'dropout0': 0.9,
 'optimizer': 'Nadam',
 'activation1': 'relu',
 'dropout1': 0.1,
 'activation2': 'relu',
 'dropout2': 0.1,
 'activation3': 'relu',
 'dropout3': 0.1,
 'activation4': 'relu',
 'dropout4': 0.1,
 'activation5': 'relu',
 'dropout5': 0.1,
 'activation6': 'relu',
 'dropout6': 0.1,
 'activation7': 'relu',
 'dropout7': 0.1,
 'activation8': 'relu',
 'dropout8': 0.1,
 'activation9': 'relu',
 'dropout9': 0.1,
 'activation10': 'relu',
 'dropout10': 0.1,
 'activation11': 'relu',
 'dropout11': 0.1}

## Keras Tuner - 모델 저장

In [None]:
model_2= tuner.get_best_models(num_models=1)[0]
model_2.summary()

  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# 모델 학습
history = model_2.fit(x_train, y_train, epochs=10)

# 모델 평가
print('* 모델평가')
loss, f1score = model_2.evaluate(x_train, y_train, verbose=2)
loss, f1score = model_2.evaluate(x_test, y_test, verbose=2)

Epoch 1/10
[1m4334/4334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 2ms/step - F1Score: 0.0031 - loss: 0.0463
Epoch 2/10
[1m4334/4334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - F1Score: 0.0034 - loss: 0.0222
Epoch 3/10
[1m4334/4334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - F1Score: 0.0031 - loss: 0.0144
Epoch 4/10
[1m4334/4334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - F1Score: 0.0032 - loss: 0.0202
Epoch 5/10
[1m4334/4334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - F1Score: 0.0034 - loss: 0.0200
Epoch 6/10
[1m4334/4334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - F1Score: 0.0034 - loss: 0.0166
Epoch 7/10
[1m4334/4334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - F1Score: 0.0036 - loss: 0.0167
Epoch 8/10
[1m4334/4334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - F1Score: 0.0032 - loss: 0.0131
Epoch 9/10
[1m4334/4334