<a href="https://colab.research.google.com/github/HJJunn/DeepLearning---NLP/blob/main/15_%EC%96%B4%ED%85%90%EC%85%98_%EB%A7%A4%EC%BB%A4%EB%8B%88%EC%A6%98ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#양방향 LSTM과 어텐션 매커니즘

## 1. IMDB 리뷰 데이터 전처리하기

In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
vocab_size = 10000
(X_train, y_train),(X_test, y_test) = imdb.load_data(num_words = vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [3]:
print("리뷰 최대 길이:", max(len(l) for l in X_train))
print("리뷰 평균 길이:", sum(map(len, X_train))/ len(X_train))

리뷰 최대 길이: 2494
리뷰 평균 길이: 238.71364


In [4]:
max_len = 500
X_train = pad_sequences(X_train, maxlen = max_len)
X_test = pad_sequences(X_test, maxlen = max_len)

In [5]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(25000, 500)
(25000, 500)
(25000,)
(25000,)


## 2. 바다나우 어텐션

In [6]:
import tensorflow as tf
class BahdanauAttention(tf.keras.Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, values, query): # key == value
        hidden_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))
        attention_weights = tf.nn.softmax(score, axis = 1)

        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis = 1)

        return context_vector, attention_weights

## 3. 양방향 LSTM + 어텐션 매커니즘

In [12]:
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, LSTM, Concatenate, Dropout, Masking
from tensorflow.keras import Input, Model
from tensorflow.keras import optimizers
import os

In [13]:
sequence_input = Input(shape = (max_len,), dtype = 'int32')
embedded_sequences = Embedding(vocab_size, 128, input_length = max_len)(sequence_input)
embedded_masking = Masking(mask_value = 0.0)(embedded_sequences)

In [14]:
lstm = Bidirectional(LSTM(64, dropout = 0.5, return_sequences = True))(embedded_masking)

In [17]:
lstm, forward_h, forward_c, backward_h, backward_c = Bidirectional(LSTM(64, dropout = 0.5, return_sequences = True, return_state = True))(lstm)

In [18]:
print(lstm.shape, forward_h.shape, forward_c.shape, backward_h.shape, backward_c.shape)

(None, 500, 128) (None, 64) (None, 64) (None, 64) (None, 64)


In [19]:
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate(([forward_c, backward_c]))

In [22]:
attention = BahdanauAttention(64)
context_vector,attention_weights = attention(lstm, state_h)



In [23]:
dense1 = Dense(20, activation = "relu")(context_vector)
dropout = Dropout(0.5)(dense1)
output = Dense(1, activation = "sigmoid")(dropout)
model = Model(inputs = sequence_input, outputs = output)

In [24]:
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [25]:
history = model.fit(X_train, y_train, epochs = 3, batch_size = 256, validation_data=(X_test, y_test), verbose = 1)

Epoch 1/3




[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1148s[0m 12s/step - accuracy: 0.5679 - loss: 0.6556 - val_accuracy: 0.8644 - val_loss: 0.3245
Epoch 2/3
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1217s[0m 12s/step - accuracy: 0.8938 - loss: 0.2866 - val_accuracy: 0.8818 - val_loss: 0.2810
Epoch 3/3
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1162s[0m 12s/step - accuracy: 0.9231 - loss: 0.2165 - val_accuracy: 0.8907 - val_loss: 0.2728


In [26]:
print(model.evaluate(X_test, y_test)[1])

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m295s[0m 377ms/step - accuracy: 0.8927 - loss: 0.2705
0.890720009803772
