In [None]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, GRU, Concatenate, Dense

# 데이터 로드
train_data = np.load('naver_movie_review_train_data.npy', allow_pickle=True)
test_data = np.load('naver_movie_review_test_data.npy', allow_pickle=True)

# 데이터 분리
X_train = train_data[:, 1]
y_train = train_data[:, 2]
X_test = test_data[:, 1]
y_test = test_data[:, 2]

# 토크나이저 생성 및 학습
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)

# 텍스트를 시퀀스로 변환
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# 패딩
max_len = max(len(l) for l in X_train_seq)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# 모델 생성
inputs = Input(shape=(max_len,))
x = Embedding(len(tokenizer.word_index)+1, 128)(inputs)
x1 = LSTM(64)(x)
x2 = GRU(64)(x)
x = Concatenate()([x1, x2])
outputs = Dense(1, activation='sigmoid')(x)
model = Model(inputs=inputs, outputs=outputs)

# 모델 컴파일
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 모델 학습
model.fit(X_train_pad, y_train, validation_data=(X_test_pad, y_test), epochs=10)