In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Embedding, RepeatVector, Concatenate, TimeDistributed
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import plot_model
import matplotlib.pyplot as plt

class RNNModel:
    def __init__(self, max_seq_length):
        # 모델 구성
        
        # Input : 데이터 Tensor화
        theta_input = Input(shape=(1,), name='theta_input')
        phi_input = Input(shape=(1,), name='phi_input')
        sequence_input = Input(shape=(max_seq_length,), name='sequence_input')

        # RepeatVector : sequence와 동일한 길이로 확장
        # 각 시퀀스의 모든 단계(원소 스텝 = 타임 스텝)에서 동일한 theta와 phi 값 반복 사용
        theta_repeated = RepeatVector(max_seq_length)(theta_input)
        phi_repeated = RepeatVector(max_seq_length)(phi_input)
        # Embedding : 시퀀스 내의 각 숫자(최대 정수 값 +1 = 5)를 32차원 벡터로 변환
        sequence_embedding = Embedding(5, 32, input_length=max_seq_length)(sequence_input)
        
        # Concatenate : 입력을 연결, 여러 입력 정보 합침
        # 입력 레이어
        concat_layer = Concatenate()([theta_repeated, phi_repeated, sequence_embedding])
        # RNN 레이어 1
        rnn_layer = SimpleRNN(32, return_sequences=True)(concat_layer)
        # RNN 레이어 2
        rnn_layer2 = SimpleRNN(64, return_sequences=True)(rnn_layer)
        # TimeDistributed : Dense 레이어를 각 시간 단계에 독립적으로 적용, 시퀀스의 각 시간 단계에서 동일한 Dense 레이어가 적용
        # ... 시퀀스의 각 시간 단계를 독립적으로 예측 가능
        # Dense : 출력 뉴런 5개(= 출력 공간 차원), softmax 활성화 함수 사용하는 Dense layer
        # 출력 레이어
        output = TimeDistributed(Dense(5, activation='softmax'))(rnn_layer2)

        self.model = Model(inputs=[theta_input, phi_input, sequence_input], outputs=output)
        # plot_model(self.model, show_shapes=True)
        self.model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    def fit(self, theta, phi, sequence, epochs=50):
        # 데이터셋 분리 train_set 8 : validation_set 2
        sequence_train, sequence_val, theta_train, theta_val, phi_train, phi_val = train_test_split(sequence, theta, phi, test_size=0.2)

        self.history = self.model.fit(
            {'theta_input': theta_train, 'phi_input': phi_train, 'sequence_input': sequence_train},
            np.expand_dims(sequence_train, -1), 
            validation_data=({'theta_input': theta_val, 'phi_input': phi_val, 
                              'sequence_input': sequence_val}, np.expand_dims(sequence_val, -1)), epochs=epochs, batch_size=64)
        # batch 64개로 나누어 50 epoch 만큼 반복

    def summary(self):
        self.model.summary()
    
    def plot_history(self):
         # Plot training & validation loss values
        plt.plot(self.history.history['loss'])
        plt.plot(self.history.history['val_loss'])
        plt.title('Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Validation'], loc='upper right')
        plt.show()

        # Plot training & validation accuracy values
        plt.plot(self.history.history['accuracy'])
        plt.plot(self.history.history['val_accuracy'])
        plt.title('Model accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend(['Train', 'Validation'], loc='upper right')
        plt.show()

    def evaluate(self, theta, phi, sequence):
        loss, acc = self.model.evaluate({'theta_input': theta, 'phi_input': phi, 'sequence_input': sequence}, np.expand_dims(sequence, -1))
        print("Test Accuracy: {:.2f}".format(acc*100))

In [3]:
# CSV 파일에서 데이터 읽기
df = pd.read_csv('NVspinData_None_-1_230807.csv')

# 데이터셋 분리 - train_set(train) 8 : test_set 2
train_set, test_set = train_test_split(df, shuffle=True, test_size=0.2)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12350 entries, 0 to 12349
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   gate length     12350 non-null  int64  
 1   Theta           12350 non-null  float64
 2   Phi             12350 non-null  float64
 3   dt              12350 non-null  float64
 4   combination     12350 non-null  object 
 5   total time      12350 non-null  float64
 6   computing time  12350 non-null  float64
dtypes: float64(5), int64(1), object(1)
memory usage: 675.5+ KB


In [4]:
# train_set 데이터 전처리 (train+validation)
theta_tv = train_set['Theta'].values.reshape(-1, 1)
phi_tv = train_set['Phi'].values.reshape(-1, 1)
sequence_tv = train_set['combination'].apply(eval).to_list()
max_seq_length_tv = max(len(seq) for seq in sequence_tv)
sequence_padded_tv = pad_sequences(sequence_tv, maxlen=max_seq_length_tv)

# test_set 데이터 전처리 (final_test)
theta_test = test_set['Theta'].values.reshape(-1, 1)
phi_test = test_set['Phi'].values.reshape(-1, 1)
sequence_test = test_set['combination'].apply(eval).to_list()
max_seq_length_test = max(len(seq) for seq in sequence_test)
sequence_padded_test = pad_sequences(sequence_test, maxlen=max_seq_length_test)

In [10]:
# 모델 초기화
rnn_model = RNNModel(max_seq_length_tv)

rnn_model.summary(rnn_model)

# # 모델 학습
# rnn_model.fit(theta_tv, phi_tv, sequence_padded_tv)

# # 결과 그래프 그리기
# rnn_model.plot_history()

# # 모델 성능 평가
# rnn_model.evaluate(theta_test, phi_test, sequence_padded_test)

KerasTensor(type_spec=TensorSpec(shape=(None, 35, 32), dtype=tf.float32, name=None), name='embedding_5/embedding_lookup/Identity:0', description="created by layer 'embedding_5'")


AttributeError: 'RNNModel' object has no attribute 'summary'