<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [1]:
import pandas as pd
import urllib.request
import matplotlib.pyplot as plt
import re
import numpy as np
import os
import urllib
import string
from tqdm.notebook import tqdm
import json
import tensorflow as tf
from tensorflow.keras import Input, Model, Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras.layers import Layer, Dense, Flatten, Dropout, Concatenate, Add, Dot, Multiply, Reshape, Activation, BatchNormalization, SimpleRNNCell, RNN, SimpleRNN, LSTM, Embedding, Bidirectional, TimeDistributed, Conv1D, Conv2D, MaxPool1D, MaxPool2D, GlobalMaxPool1D, GlobalMaxPool2D, AveragePooling1D, AveragePooling2D, GlobalAveragePooling1D, GlobalAveragePooling2D, ZeroPadding2D
from tensorflow.keras.optimizers import SGD, Adam, Adagrad
from tensorflow.keras.metrics import MeanSquaredError, RootMeanSquaredError, MeanAbsoluteError, MeanAbsolutePercentageError, BinaryCrossentropy, CategoricalCrossentropy, SparseCategoricalCrossentropy, CosineSimilarity
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.activations import linear, sigmoid, relu
from tensorflow.keras.initializers import RandomNormal, glorot_uniform, he_uniform, Constant
from tensorflow.keras.models import load_model
from tensorflow.keras.datasets import imdb

tqdm.pandas()
plt.style.use("dark_background")

  from pandas import Panel


In [2]:
vocab_size = 10000
(tr_X, tr_y), (te_X, te_y) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [6]:
lens = sorted([len(doc) for doc in tr_X])
ratio = 0.99
max_len = int(np.quantile(lens, 0.99))
print(f"길이가 가장 긴 문장의 길이는 {np.max(lens)}이고 길이가 {max_len} 이하인 문장이 전체의 {ratio:.0%}를 차지합니다.")

# max_len = 500
tr_X = pad_sequences(tr_X, maxlen=max_len)
te_X = pad_sequences(te_X, maxlen=max_len)

길이가 가장 긴 문장의 길이는 2494이고 길이가 926 이하인 문장이 전체의 99%를 차지합니다.


In [10]:
class BahdanauAttention(Model):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    # The key is same as the value 
    def call(self, values, query):
        # (batch_size, h_size) -> (batch_size, 1, h_size)
        # we are doing this to perform addition to calculate the score
        hidden_with_time_axis = tf.expand_dims(query, 1)

        # After applying self.V shape changes; (batch_size, max_len, units) -> (batch_size, max_len, 1)
        # we get 1 at the last axis because we are applying score to self.V
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))

        # (batch_size, max_len, 1)
        attention_weights = tf.nn.softmax(score, axis=1)

        context_vector = attention_weights * values
        # (batch_size, h_size)
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

inputs = Input(shape=(max_len,))

z1 = Embedding(vocab_size, 128, input_length=max_len, mask_zero = True)(inputs)
z2 = Bidirectional(LSTM(64, dropout=0.5, return_sequences = True))(z1)
z3, for_h_state, for_c_state, back_h_state, back_c_state = Bidirectional(LSTM(64, dropout=0.5, return_sequences=True, return_state=True))(z2)
# print(lstm.shape, for_h_state.shape, for_c_state.shape, back_h_state.shape, back_c_state.shape)
h_states = Concatenate()([for_h_state, back_h_state])
c_states = Concatenate()([for_c_state, back_c_state])
context_vector, attention_weights = BahdanauAttention(units=64)(z3, h_states)
z4 = Dense(units=20, activation="relu")(context_vector)
z5 = Dropout(rate=0.5)(z4)

outputs = Dense(units=1, activation="sigmoid")(z5)
  
model = Model(inputs=inputs, outputs=outputs)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 926)]        0           []                               
                                                                                                  
 embedding_3 (Embedding)        (None, 926, 128)     1280000     ['input_4[0][0]']                
                                                                                                  
 bidirectional_6 (Bidirectional  (None, 926, 128)    98816       ['embedding_3[0][0]']            
 )                                                                                                
                                                                                                  
 bidirectional_7 (Bidirectional  [(None, 926, 128),  98816       ['bidirectional_6[0][0]']    

In [12]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["acc"])

hist = model.fit(x=tr_X, y=tr_y, epochs=5, batch_size=256, validation_data=(te_X, te_y), verbose=1)

Epoch 1/5
 1/98 [..............................] - ETA: 2:10:19 - loss: 0.6932 - acc: 0.4961

KeyboardInterrupt: 

In [None]:
print("\n 테스트 정확도: %.4f" % (model.evaluate(te_X, te_y)[1]))


 테스트 정확도: 0.8793
