In [2]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Concatenate, TimeDistributed, RepeatVector
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import Adam, SGD, RMSprop

# 데이터 불러오기
df = pd.read_csv('NVspinData_None_-1_230807.csv')

# 데이터셋 분리: train_set 8 : test_set 2
train_df, test_df = train_test_split(df, shuffle=True, test_size=0.2)
# train_set을 다시 8:2로 나눠서 train_set과 validation_set을 만듦
train_df, val_df = train_test_split(train_df, shuffle=True, test_size=0.2)
# random_state=42

# 모든 시퀀스의 길이 중에서 최대 길이를 구하기
all_sequences = [eval(seq) for seq in df['combination'].values]
max_seq_length = max([len(seq) for seq in all_sequences])

# 각 데이터셋에서 theta, phi, sequence 추출하고 reshape 적용
theta_train = train_df['Theta'].values.reshape(-1, 1)
phi_train = train_df['Phi'].values.reshape(-1, 1)
sequence_train = pad_sequences(train_df['combination'].apply(eval).tolist(), maxlen=max_seq_length, padding='pre')

theta_val = val_df['Theta'].values.reshape(-1, 1)
phi_val = val_df['Phi'].values.reshape(-1, 1)
sequence_val = pad_sequences(val_df['combination'].apply(eval).tolist(), maxlen=max_seq_length, padding='pre')

theta_test = test_df['Theta'].values.reshape(-1, 1)
phi_test = test_df['Phi'].values.reshape(-1, 1)
sequence_test = pad_sequences(test_df['combination'].apply(eval).tolist(), maxlen=max_seq_length, padding='pre')


In [3]:

# 모델 정의
theta_input = Input(shape=(1,), name='theta_input')
phi_input = Input(shape=(1,), name='phi_input')

# theta와 phi를 Concatenate
merged = Concatenate()([theta_input, phi_input])

# 시퀀스를 예측하기 위한 SimpleRNN 레이어
repeated_vector = RepeatVector(max_seq_length)(merged)  # max_sequence_length는 시퀀스의 최대 길이

rnn_layer = SimpleRNN(64, return_sequences=True, name='rnn_layer')(repeated_vector)

output = TimeDistributed(Dense(5, activation='softmax'), name='output_layer')(rnn_layer)

model = Model(inputs=[theta_input, phi_input], outputs=output)

# 컴파일 및 훈련
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit([theta_train, phi_train], np.expand_dims(sequence_train, -1), 
                    validation_data=([theta_val, phi_val], np.expand_dims(sequence_val, -1)), epochs=50, batch_size=64)

# 검증
loss, accuracy = model.evaluate([theta_test, phi_test], np.expand_dims(sequence_test, -1))
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 0.2323
Test Accuracy: 0.9147


In [4]:
# 결과를 저장할 디렉토리 생성
models_dir = 'saved_models'
if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
# 모델 저장
model.save(os.path.join(models_dir, "SimpleRNN_model.h5"))

# # 모델 불러오기
# from tensorflow.keras.models import load_model
# loaded_model = load_model("SimpleRNN_model.h5")

  saving_api.save_model(


In [None]:
# # 가중치만 저장
# model.save_weights("model_weights.h5")

# # 구조만 저장
# model_json = model.to_json()
# with open("model_structure.json", "w") as json_file:
#     json_file.write(model_json)

# # 가중치 불러오기
# model.load_weights("model_weights.h5")

# # 구조만 불러오기
# from tensorflow.keras.models import model_from_json
# with open("model_structure.json", "r") as json_file:
#     model_json = json_file.read()
# loaded_model = model_from_json(model_json)

In [5]:
# 테스트 데이터셋에서 10개의 샘플을 무작위로 선택
indices = np.random.choice(len(theta_test), 10)

theta_samples = np.array(theta_test)[indices]
phi_samples = np.array(phi_test)[indices]
sequence_samples = np.array(sequence_test)[indices]

# 모델을 사용하여 예측 수행
predicted_sequences = model.predict([theta_samples, phi_samples])

# 가장 확률이 높은 클래스의 인덱스를 선택
predicted_sequences = np.argmax(predicted_sequences, axis=-1)

# 결과를 DataFrame으로 변환
df_results = pd.DataFrame({
    'Theta': theta_samples.ravel(),
    'Phi': phi_samples.ravel(),
    'Actual Sequence': [list(seq) for seq in sequence_samples],
    'Predicted Sequence': [list(seq) for seq in predicted_sequences]
})

# 결과를 저장할 디렉토리 생성
results_dir = 'samle_test_simpleRNN'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

# 결과를 CSV 파일로 저장
df_results.to_csv(os.path.join(results_dir, 'simpleRNN_results.csv'), index=False)

print("Results saved to simpleRNN_results.csv")


Results saved to simpleRNN_results.csv


In [6]:

from kerastuner.tuners import BayesianOptimization

def build_model(hp):
    theta_input = Input(shape=(1,), name='theta_input')
    phi_input = Input(shape=(1,), name='phi_input')

    merged = Concatenate()([theta_input, phi_input])

    repeated_vector = RepeatVector(max_seq_length)(merged)
    
    rnn_layer = SimpleRNN(hp.Int('rnn_units', min_value=16, max_value=128, step=16),
                          return_sequences=True, name='rnn_layer')(repeated_vector)
    
    output = TimeDistributed(Dense(hp.Int('dense_units', min_value=5, max_value=50, step=5),
                                   activation='softmax'), name='output_layer')(rnn_layer)

    model = Model(inputs=[theta_input, phi_input], outputs=output)
    
    # 컴파일 설정
    optimizer_choice = hp.Choice('optimizer', ['adam', 'sgd', 'rmsprop'])
    lr = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
    
    if optimizer_choice == 'adam':
        optimizer = Adam(learning_rate=lr)
    elif optimizer_choice == 'sgd':
        optimizer = SGD(learning_rate=lr)
    else:
        optimizer = RMSprop(learning_rate=lr)
    
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

tuner = BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=1,
    directory='rnn_tuning',
    project_name='SimpleRNN_model_tuning'
)

# 하이퍼파라미터 검색
tuner.search([theta_train, phi_train], np.expand_dims(sequence_train, -1),
             validation_data=([theta_val, phi_val], np.expand_dims(sequence_val, -1)),
             epochs=50,
             batch_size=64)

# 최상의 하이퍼파라미터 출력
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"""
The hyperparameter search is complete. 
The optimal number of units in the SimpleRNN layer is {best_hps.get('rnn_units')}.
The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.
The optimal optimizer is {best_hps.get('optimizer')}.
The optimal number of units in the Dense layer is {best_hps.get('dense_units')}.
""")


INFO:tensorflow:Reloading Tuner from rnn_tuning\SimpleRNN_model_tuning\tuner0.json
INFO:tensorflow:Oracle triggered exit

The hyperparameter search is complete. 
The optimal number of units in the SimpleRNN layer is 96.
The optimal learning rate for the optimizer is 0.0030088511849749758.
The optimal optimizer is rmsprop.
The optimal number of units in the Dense layer is 5.



  from kerastuner.tuners import BayesianOptimization


In [8]:

# 결과를 저장할 디렉토리 생성
results_dir = 'tuned_simpleRNN_models_results'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

# 최상의 하이퍼파라미터 조합 가져오기
best_hps = tuner.get_best_hyperparameters(num_trials=5)

# 각 하이퍼파라미터 조합에 대한 학습 곡선을 그릴 예정
for idx, hp in enumerate(best_hps):
    # Hyperparameters from the trial
    optimizer = hp['optimizer']
    learning_rate = hp['learning_rate']
    rnn_units = hp['rnn_units']

    print(f"Running with hyperparameters: {hp.values}")
    model = build_model(hp)
    history = model.fit([theta_train, phi_train], np.expand_dims(sequence_train, -1), 
                        validation_data=([theta_val, phi_val], np.expand_dims(sequence_val, -1)), 
                        epochs=50, batch_size=64)  # 고정된 배치 크기 사용
    
    # Plot the loss
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'Trial {idx+1} - Loss (Optimizer: {optimizer}, LR: {round(learning_rate, 2)}, Units: {rnn_units})')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Plot the accuracy
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title(f'Trial {idx+1} - Accuracy (Optimizer: {optimizer}, LR: {round(learning_rate, 2)}, Units: {rnn_units})')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.tight_layout()
    
    # 결과를 PNG 파일로 저장
    plt.savefig(os.path.join(results_dir, f'trial_{idx+1}_results.png'))
    plt.close()  # 현재 그린 그래프를 닫아 새로운 그래프를 그릴 준비

Running with hyperparameters: {'rnn_units': 96, 'dense_units': 5, 'optimizer': 'rmsprop', 'learning_rate': 0.0030088511849749758}
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Running with hyperparameters: {'rnn_units': 64, 'dense_units': 40, 'optimizer': 'adam', 'learning_rate': 0.008901042597893304}
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch