In [1]:
import tensorflow as tf
from tensorflow import keras

In [2]:
model = keras.models.Sequential([
    keras.layers.GRU(10, return_sequences=True, input_shape=[None, 10]),
    keras.layers.Bidirectional(keras.layers.GRU(10, return_sequences=True))
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, None, 10)          660       
_________________________________________________________________
bidirectional (Bidirectional (None, None, 20)          1320      
Total params: 1,980
Trainable params: 1,980
Non-trainable params: 0
_________________________________________________________________


In [3]:
import tensorflow_datasets as tfds

datasets, info = tfds.load('imdb_reviews', as_supervised=True, with_info=True)

In [4]:
def preprocess(x_batch, y_batch):
    x_batch = tf.strings.substr(x_batch, 0, 300)
    x_batch = tf.strings.regex_replace(x_batch, rb'<br\s*/?>', b' ')
    x_batch = tf.strings.regex_replace(x_batch, b"[^a-zA-Z']", b' ')
    x_batch = tf.strings.split(x_batch)
    return x_batch.to_tensor(default_value=b'<pad>'), y_batch

In [5]:
from collections import Counter

vocabulary = Counter()
for x_batch, y_batch in datasets['train'].batch(32).map(preprocess):
    for review in x_batch:
        vocabulary.update(list(review.numpy()))

In [6]:
vocab_size = 10000
truncated_vocabulary = [
    word for word, count in vocabulary.most_common()[:vocab_size]]

In [7]:
words = tf.constant(truncated_vocabulary)
word_ids = tf.range(len(truncated_vocabulary), dtype=tf.int64)
vocab_init = tf.lookup.KeyValueTensorInitializer(words, word_ids)
num_oov_buckets = 1000
table = tf.lookup.StaticVocabularyTable(vocab_init, num_oov_buckets)

In [8]:
def encode_words(x_batch, y_batch):
    return table.lookup(x_batch), y_batch

In [9]:
train_set = datasets['train'].batch(32).map(preprocess)
train_set = train_set.map(encode_words).prefetch(1)

In [10]:
embed_size = 128
model = keras.models.Sequential([
    keras.layers.Embedding(vocab_size + num_oov_buckets, 
                           embed_size, input_shape=[None], 
                           mask_zero=True),
    keras.layers.Bidirectional(
        keras.layers.GRU(128, return_sequences=True)),
    keras.layers.Bidirectional(
        keras.layers.GRU(128)),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
history = model.fit(train_set, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
