In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
vocab_size = 10000
(X_train, y_train),(X_test, y_test) = imdb.load_data(num_words = vocab_size)

In [3]:
print(f'리뷰의 최대 길이 : {max(len(l) for l in X_train)}')
print(f'리뷰의 평균 길이 : {sum(map(len, X_train))/len(X_train)}')

리뷰의 최대 길이 : 2494
리뷰의 평균 길이 : 238.71364


In [5]:
max_len = 500
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen = max_len)

In [6]:
import tensorflow as tf

In [8]:
class BahdanauAttention(tf.keras.Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)
        
    def call(self, values, query): # key, value 가 같을때
        hidden_with_time_axis = tf.expand_dims(query, 1)
        
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        
        return context_vector, attention_weights

In [9]:
import os
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, LSTM, Concatenate, Dropout
from tensorflow.keras import Input, Model
from tensorflow.keras import optimizers

In [10]:
sequence_input = Input(shape=(max_len,), dtype='int32')
embedded_sequences = Embedding(vocab_size, 128, input_length=max_len, mask_zero = True)(sequence_input)

In [11]:
lstm = Bidirectional(LSTM(64, dropout = 0.5, return_sequences = True))(embedded_sequences)

In [13]:
lstm, forward_h, forword_c, backword_h, backword_c = Bidirectional(LSTM(64, dropout=0.5, return_sequences=True, return_state=True))(lstm)

In [14]:
print(lstm.shape, forward_h.shape, forword_c.shape, backword_h.shape, backword_c.shape)

(None, 500, 128) (None, 64) (None, 64) (None, 64) (None, 64)


In [16]:
state_h = Concatenate()([forward_h, backword_h])
state_c = Concatenate()([forword_c, backword_c])

In [17]:
attention = BahdanauAttention(64)
context_vector, attention_weights = attention(lstm, state_h)

In [18]:
dense1 = Dense(20, activation='relu')(context_vector)
dropout = Dropout(0.5)(dense1)
output = Dense(1, activation='sigmoid')(dropout)
model = Model(inputs = sequence_input, outputs= output)

In [19]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [21]:
history = model.fit(X_train, y_train, epochs=3, batch_size=256, validation_data=(X_test, y_test), verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [22]:
print("\n 테스트 정확도: %.4f" % (model.evaluate(X_test, y_test)[1]))


 테스트 정확도: 0.8822
