<a href="https://colab.research.google.com/github/Kimhansav/introofnlp_practice/blob/main/15_03(%EC%96%91%EB%B0%A9%ED%96%A5_LSTM%EA%B3%BC_%EC%96%B4%ED%85%90%EC%85%98_%EB%A9%94%EC%BB%A4%EB%8B%88%EC%A6%98(BiLSTM_with_Attention_mechanism))practice_ipynb%EC%9D%98_%EC%82%AC%EB%B3%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. IMDB 리뷰 데이터 전처리하기

In [None]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
vocab_size = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
print('리뷰의 최대 길이 : {}'.format(max(len(l) for l in X_train)))
print('리뷰의 평균 길이 : {}'.format(sum(map(len, X_train)) / len(X_train)))

리뷰의 최대 길이 : 2494
리뷰의 평균 길이 : 238.71364


In [None]:
max_len = 500
X_train = pad_sequences(X_train, maxlen = max_len)
X_test = pad_sequences(X_test, maxlen = max_len)

텍스트 분류에서 어텐션을 사용하는 이유 : RNN의 마지막 은닉상태로 예측을 하는데 층을 지나오며 정보를 손실한 상태이므로 지나온 은닉상태를 다시 한번 참조하겠다는 의도

In [None]:
import tensorflow as tf

In [None]:
class BahdanauAttention(tf.keras.Model):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = Dense(units)
    self.W2 = Dense(units)
    self.V = Dense(1)

  def call(self, values, query): #단, key와 value는 같음
    #query shape == (batch size, hidden size)
    #hidden_with_time_axis shape == (batch_size, 1, hidden size)
    #score 계산을 위해 뒤에서 할 덧셈을 위해서 차원을 변경해줍니다
    hidden_with_time_axis = tf.expand_dims(query, 1)

    #score shape == (batch_size, max_length, 1)
    #we get 1 at the last axis because we are applying score to self.V
    #the shape of the tensor before applying self.V is (batch_size, max_length, units)
    score = self.V(tf.nn.tanh(
        self.W1(values) + self.W2(hidden_with_time_axis)))

    #attention_weights shape == (batch_size, max_length, 1)
    attention_weights = tf.nn.softmax(score, axis = 1)

    #context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis = 1)

    return context_vector, attention_weights

In [None]:
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, LSTM, Concatenate, Dropout
from tensorflow.keras import Input, Model
from tensorflow.keras import optimizers
import os

In [None]:
sequence_input = Input(shape = (max_len,), dtype = 'int32')
embedded_sequences = Embedding(vocab_size, 128, input_length = max_len, mask_zero = True)(sequence_input)

In [None]:
lstm = Bidirectional(LSTM(64, dropout = 0.5, return_sequences = True))(embedded_sequences)

In [None]:
lstm, forward_h, forward_c, backward_h, backward_c = Bidirectional(LSTM(64, dropout = 0.5, return_sequences = True, return_state = True))(lstm)

In [None]:
print(lstm.shape, forward_h.shape, forward_c.shape, backward_h.shape, backward_c.shape)

(None, 500, 128) (None, 64) (None, 64) (None, 64) (None, 64)


In [None]:
state_h = Concatenate()([forward_h, backward_h]) #은닉 상태
state_c = Concatenate()([forward_c, backward_c]) #셀 상태

In [None]:
attention = BahdanauAttention(64) #가중치 크기 정의
context_vector, attention_weights = attention(lstm, state_h) #왜 attention.call() 대신 attention()이라고 하지? -> 텐서플로에서 인스턴스 호출시 알아서 call함수 호출하도록 설계해둠.
print(context_vector, attention_weights)

KerasTensor(type_spec=TensorSpec(shape=(None, 128), dtype=tf.float32, name=None), name='bahdanau_attention/Sum:0', description="created by layer 'bahdanau_attention'") KerasTensor(type_spec=TensorSpec(shape=(None, 500, 1), dtype=tf.float32, name=None), name='bahdanau_attention/transpose_1:0', description="created by layer 'bahdanau_attention'")


In [None]:
dense1 = Dense(20, activation = "relu")(context_vector)
dropout = Dropout(0.5)(dense1)
output = Dense(1, activation = "sigmoid")(dropout)
model = Model(inputs = sequence_input, outputs = output)

In [None]:
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs = 3, batch_size = 256, validation_data = (X_test, y_test), verbose = 1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
print("\n 테스트 정확도 : %.4f" % (model.evaluate(X_test, y_test)[1]))


 테스트 정확도 : 0.8806
