In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Concatenate, RepeatVector, TimeDistributed
from tensorflow.keras.preprocessing.sequence import pad_sequences
from kerastuner.tuners import BayesianOptimization
from tensorflow.keras.optimizers import Adam, SGD
import keras_tuner as kt


In [11]:

# 데이터 불러오기
df = pd.read_csv('NVspinData_None_-1_230807.csv')

# 데이터셋 분리: train_set 8 : test_set 2
train_df, test_df = train_test_split(df, shuffle=True, test_size=0.2)
# train_set을 다시 8:2로 나눠서 train_set과 validation_set을 만듦
train_df, val_df = train_test_split(train_df, shuffle=True, test_size=0.2)
# random_state=42

# 모든 시퀀스의 길이 중에서 최대 길이를 구하기
all_sequences = [eval(seq) for seq in df['combination'].values]
max_seq_length = max([len(seq) for seq in all_sequences])

# 각 데이터셋에서 theta, phi, sequence 추출하고 reshape 적용
theta_train = train_df['Theta'].values.reshape(-1, 1)
phi_train = train_df['Phi'].values.reshape(-1, 1)
sequence_train = pad_sequences(train_df['combination'].apply(eval).tolist(), maxlen=max_seq_length, padding='pre')

theta_val = val_df['Theta'].values.reshape(-1, 1)
phi_val = val_df['Phi'].values.reshape(-1, 1)
sequence_val = pad_sequences(val_df['combination'].apply(eval).tolist(), maxlen=max_seq_length, padding='pre')

theta_test = test_df['Theta'].values.reshape(-1, 1)
phi_test = test_df['Phi'].values.reshape(-1, 1)
sequence_test = pad_sequences(test_df['combination'].apply(eval).tolist(), maxlen=max_seq_length, padding='pre')

In [12]:
def build_model(hp):
    theta_input = Input(shape=(1,), name='theta_input')
    phi_input = Input(shape=(1,), name='phi_input')
    merged = Concatenate()([theta_input, phi_input])
    
    repeated_vector = RepeatVector(max_seq_length)(merged)
    
    # RNN 레이어 동적 추가
    rnn_output = repeated_vector
    for i in range(hp.Int('num_layers', 1, 3)):  # 1-3개의 RNN 레이어
        rnn_output = SimpleRNN(units=hp.Int('rnn_units', min_value=16, max_value=128, step=16), 
                               return_sequences=True)(rnn_output)
        
    output = TimeDistributed(Dense(5, activation='softmax'), name='output_layer')(rnn_output)
    
    model = Model(inputs=[theta_input, phi_input], outputs=output)
    
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
    optimizer = hp.Choice('optimizer', values=['adam', 'sgd'])
    
    model.compile(optimizer=optimizer, 
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# 베이지안 최적화 사용
tuner = kt.BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    directory='my_dir',
    project_name='keras_tuner_bayesian_rnn'
)

# 튜닝 시작
tuner.search([theta_train, phi_train], np.expand_dims(sequence_train, -1),
             validation_data=([theta_val, phi_val], np.expand_dims(sequence_val, -1)),
             epochs=10, batch_size=64)

# 최상의 모델 얻기
best_model = tuner.get_best_models(1)[0]

# 검증
loss, accuracy = best_model.evaluate([theta_test, phi_test], np.expand_dims(sequence_test, -1))
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

INFO:tensorflow:Reloading Tuner from my_dir\keras_tuner_bayesian_rnn\tuner0.json
INFO:tensorflow:Oracle triggered exit
Test Loss: 0.2669
Test Accuracy: 0.9000


In [14]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected layer is {best_hps.get('rnn_units')}.
The optimal optimizer is {best_hps.get('optimizer')}.
The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.
""")



The hyperparameter search is complete. The optimal number of units in the first densely-connected layer is 128.
The optimal optimizer is adam.
The optimal learning rate for the optimizer is 0.006898389145724495.

