In [1]:
import numpy as np

## dataset objection 로 별도로 변환해 줄 필요 없음
import tensorflow_datasets as tfds
import tensorflow as tf

tfds.disable_progress_bar()

In [2]:
import matplotlib.pyplot as plt

def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])

In [3]:
dataset, info = tfds.load('imdb_reviews', with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

train_dataset.element_spec

Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\SNUAI\tensorflow_datasets\imdb_reviews\plain_text\1.0.0...
Dataset imdb_reviews downloaded and prepared to C:\Users\SNUAI\tensorflow_datasets\imdb_reviews\plain_text\1.0.0. Subsequent calls will reuse this data.


(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [4]:
for example, label in train_dataset.take(1):
  print('text: ', example.numpy())
  print('label: ', label.numpy())

text:  b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
label:  0


In [10]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

In [11]:
## 배치를 가져와서 첫 세 개 확인
for example, label in train_dataset.take(1):
  print('texts: ', example.numpy()[:3])
  print()
  print('labels: ', label.numpy()[:3])

texts:  [b"There is a bit of trivia which should be pointed out about a scene early in the movie where Homer watches the attempt of December 6, 1957 (at least that was the video used on the TV he was watching) which showed the Vangard launch attempt, which failed.<br /><br />He is next shown reading or dictating a letter to Dr. Von Braun offering condolences about the failure.<br /><br />Von Braun was at Marshall space flight center in Huntsville working for the Army. The Vanguard project was by the early Nasa team which was at what soon became Goddard Space flight center.<br /><br />The army rushed the Jupiter-C, which was essentially a US made V2 technology, but worked to launch a satellite in response to Russia's success with Sputnik.<br /><br />This error may have actually been made by Homer, because of the notoriety of Von Braun, but his team didn't have their attempt fail. In fact the underlying Redstone was flying from 52 and was the first US man rated booster, used for Shepard'

In [13]:
## IMDB 에 많은 vocab 이 있음 --> 모두 pos/neg 분류에 사용되지는 않음 (ex 영화 제목, 감독 이름 등)
## 자주 등장하는 일부 vocab 만 분류에 사용한다. 
VOCAB_SIZE=1000
## 자주 안 나오는 vocab 은 none 처리
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(
    max_tokens=VOCAB_SIZE)
## encoding 가능하도록 내용 파악
## The vocabulary for the layer must be either supplied on construction or learned via adapt(). When this layer is adapted, it will analyze the
## dataset, determine the frequency of individual string values, and create a
## vocabulary from them.  --  https://www.tensorflow.org/api_docs/python/tf/keras/layers/TextVectorization

encoder.adapt(train_dataset.map(lambda text, label: text))
## 단어들로 이루어진 dictionary 생성

In [22]:
vocab = np.array(encoder.get_vocabulary())

## Frequency 가 높은 순서대로 나옴
print(vocab[:20])
print(vocab[-20:-1])

['' '[UNK]' 'the' 'and' 'a' 'of' 'to' 'is' 'in' 'it' 'i' 'this' 'that'
 'br' 'was' 'as' 'for' 'with' 'movie' 'but']
['manages' 'ideas' 'expecting' 'jane' 'fails' 'deserves' 'present'
 'political' 'missing' 'attempts' 'twist' 'secret' 'fire' 'dumb' 'unlike'
 'fighting' 'fantasy' 'pay' 'air']


In [23]:
## integer 로 만듬
## 아래 보면 문장이 0 0 0 으로 끝나고 있음
## 자연어의 sequence 길이는 각기 다르기 때문에 한 배치로 묶어 사용하려면, 
## 배치 내 가장 긴 문장 기준으로 길이를 맞추어 zero padding
encoded_example = encoder(example)[:3].numpy()
encoded_example

array([[48,  7,  4, ...,  0,  0,  0],
       [48,  7, 11, ...,  0,  0,  0],
       [ 1,  1,  1, ...,  0,  0,  0]], dtype=int64)

In [24]:
## 비교해서 어떤 부분이 바뀌었는지 확인해 본다.
for n in range(3):
  print("Original: ", example[n].numpy())
  print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
  print()

Original:  b"There is a bit of trivia which should be pointed out about a scene early in the movie where Homer watches the attempt of December 6, 1957 (at least that was the video used on the TV he was watching) which showed the Vangard launch attempt, which failed.<br /><br />He is next shown reading or dictating a letter to Dr. Von Braun offering condolences about the failure.<br /><br />Von Braun was at Marshall space flight center in Huntsville working for the Army. The Vanguard project was by the early Nasa team which was at what soon became Goddard Space flight center.<br /><br />The army rushed the Jupiter-C, which was essentially a US made V2 technology, but worked to launch a satellite in response to Russia's success with Sputnik.<br /><br />This error may have actually been made by Homer, because of the notoriety of Von Braun, but his team didn't have their attempt fail. In fact the underlying Redstone was flying from 52 and was the first US man rated booster, used for Shepar

In [29]:
for n in range(3):
  print("=====================================")    
  print("Original: ", example[n])
  print("Original: ", example[n].numpy())


Original:  tf.Tensor(b"There is a bit of trivia which should be pointed out about a scene early in the movie where Homer watches the attempt of December 6, 1957 (at least that was the video used on the TV he was watching) which showed the Vangard launch attempt, which failed.<br /><br />He is next shown reading or dictating a letter to Dr. Von Braun offering condolences about the failure.<br /><br />Von Braun was at Marshall space flight center in Huntsville working for the Army. The Vanguard project was by the early Nasa team which was at what soon became Goddard Space flight center.<br /><br />The army rushed the Jupiter-C, which was essentially a US made V2 technology, but worked to launch a satellite in response to Russia's success with Sputnik.<br /><br />This error may have actually been made by Homer, because of the notoriety of Von Braun, but his team didn't have their attempt fail. In fact the underlying Redstone was flying from 52 and was the first US man rated booster, used 

In [30]:
model = tf.keras.Sequential([
  # First layer is the encoder
    encoder,
  # Add embedding layer using tf.keras.layers.Embedding

  ## input dim = vocab size / output dim = embedding dim = embedding vector size 
    tf.keras.layers.Embedding(
        ## get_vocabulary : Vocab 이 list 로 나옴
        input_dim=len(encoder.get_vocabulary()),
        ## 뒤 쪽 hidden 과는 상관없음, 다만 vocab 보다는 작아야 함
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        ## zero padding 한 값들이 들어올 것이라고 알려줌 --- padding 된 부분은 무시한다.
        mask_zero=True),

  # Add Bidirectional LSTM with 64 hidden units
  ## 자연어 처리이므로, bidirectional LSTM 으로 동작하게 해야 함
  ## 문장 sequence 를 받아서 1개의 output 출력 --- many to one --- 이므로 return_sequences-False
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),  # units, return_sequences=False

  # Add two Dense layers for final processing; one hidden layer with 64 units and one output layer
    ## hidden layer 이기 때문에 activation function 있어야 함
    tf.keras.layers.Dense(64, activation='relu'),
    ## 0/1 binary classification
    tf.keras.layers.Dense(1)
])

In [32]:

## zero masking 적용 --- embedding layer 부터는 적용되어 있는 것 확인 가능
print([layer.supports_masking for layer in model.layers])

[False, True, True, True, True]


In [33]:
# predict on a sample text without padding.
## 학습 전이기 때문에 틀린 값이 나옴
sample_text = ('The movie was cool. The animation and the graphics '
               'were out of this world. I would recommend this movie.')
predictions = model.predict(np.array([sample_text]))
print(predictions[0])

nb

UnknownError: Graph execution error:

Detected at node 'cond/CudnnRNNV3' defined at (most recent call last):
    File "C:\Users\SNUAI\anaconda3\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\SNUAI\anaconda3\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\SNUAI\anaconda3\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "C:\Users\SNUAI\anaconda3\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "C:\Users\SNUAI\anaconda3\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 471, in dispatch_queue
      await self.process_one()
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 460, in process_one
      await dispatch(*args)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 367, in dispatch_shell
      await result
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 662, in execute_request
      reply_content = await reply_content
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 360, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 532, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2863, in run_cell
      result = self._run_cell(
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2909, in _run_cell
      return runner(coro)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3106, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3309, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3369, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\SNUAI\AppData\Local\Temp\ipykernel_22788\1305985433.py", line 5, in <cell line: 5>
      predictions = model.predict(np.array([sample_text]))
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\training.py", line 2253, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\training.py", line 2041, in predict_function
      return step_function(self, iterator)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\training.py", line 2027, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\training.py", line 2015, in run_step
      outputs = model.predict_step(data)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\training.py", line 1983, in predict_step
      return self(x, training=False)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\sequential.py", line 410, in call
      return super().call(inputs, training=training, mask=mask)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\layers\rnn\bidirectional.py", line 277, in __call__
      return super().__call__(inputs, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\layers\rnn\bidirectional.py", line 404, in call
      y = self.forward_layer(
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\layers\rnn\base_rnn.py", line 553, in __call__
      return super().__call__(inputs, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\layers\rnn\lstm.py", line 751, in call
      ) = lstm_with_backend_selection(**normal_lstm_kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\layers\rnn\lstm.py", line 1356, in lstm_with_backend_selection
      gru_lstm_utils.function_register(defun_gpu_lstm, **params)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\layers\rnn\gru_lstm_utils.py", line 257, in function_register
      concrete_func = func.get_concrete_function(*args, **kwargs)
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\layers\rnn\lstm.py", line 1305, in gpu_lstm_with_fallback
      return tf.cond(
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\layers\rnn\lstm.py", line 1275, in cudnn_lstm_fn
      return gpu_lstm(
    File "C:\Users\SNUAI\anaconda3\lib\site-packages\keras\layers\rnn\lstm.py", line 1115, in gpu_lstm
      outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
Node: 'cond/CudnnRNNV3'
Fail to find the dnn implementation.
	 [[{{node cond/CudnnRNNV3}}]]
	 [[sequential/bidirectional/forward_lstm/PartitionedCall]] [Op:__inference_predict_function_8362]