In [68]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow_datasets as tfds

In [69]:
imdb = tf.keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [78]:
max_length = 256
embedding_dim = 16
vocab_size = 10000

In [72]:
word_index = imdb.get_word_index()
word_index = {k:(v+3) for k,v in word_index.items()}

word_index['<PAD>'] = 0
word_index['<START>'] = 1
word_index['<UNK>'] = 2
word_index['<UNUSED>'] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

In [76]:
train_data = tf.keras.preprocessing.sequence.pad_sequences(train_data,
                                                           maxlen=max_length,
                                                           padding='post',
                                                           value=word_index['<PAD>'])
test_data = tf.keras.preprocessing.sequence.pad_sequences(test_data,
                                                         maxlen=max_length,
                                                         padding='post',
                                                         value=word_index['<PAD>'])

In [77]:
def decode_review(text):
  return ' '.join([reverse_word_index.get(i, '?') for i in text])

In [80]:
class MyCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if (logs.get('accuracy') > 0.97):
      print('97% accuracy achieved, stopping training.')
      self.model.stop_training = True

callback = MyCallback()

In [83]:
tf.random.set_seed(42)

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

history = model.fit(train_data, train_labels,
                    epochs=10,
                    validation_data=(test_data, test_labels),
                                     callbacks=[callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
97% accuracy achieved, stopping training.
