In [1]:
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf

tfds.disable_progress_bar()

In [2]:
import matplotlib.pyplot as plt

def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])

In [3]:
dataset, info = tfds.load('imdb_reviews', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

print(train_dataset.element_spec)

(TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.int64, name=None))


In [4]:
for example, label in train_dataset.take(1):
    print('test: ', example.numpy())
    print('label: ', example.numpy())

test:  b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
label:  b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an e

In [5]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [6]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [7]:
for example, label in train_dataset.take(1):
    print('texts: ', example.numpy()[:3])
    print()
    print('labels: ', label.numpy()[:3])

texts:  [b'This is one of those movies that you keep thinking about when you wake up the next morning. It will give you that warm, fuzzy feeling and leave you with a smile on your face.<br /><br />Sure, we get fed the typical stereotype characters and stories, but it does do the trick: Entertain.<br /><br />Being from Sweden and living in the US for quite sometime, it is funny how we react. "The deadbeat husband is going to kill him", "She (Gabriella) is going to die and then there will be a heartbreaking larger-than-life ending". We know how these things work, everything comes together at the end. And it did. The characters were somewhat simple, they were so elaborate that you didn\'t really think twice about it, nothing was really left for your own imagination. The closest would probably be Siv, she makes you ask yourself if she indeed was in love with Daniel, but that\'s about it.<br /><br />But the movie is beautiful, set in rural Norrland, the music is absolutely amazing and the c

In [8]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.experimental.preprocessing.TextVectorization(max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

In [9]:
vocab = np.array(encoder.get_vocabulary())
inverse_vocab = {index:key for index, key in enumerate(vocab)}
vocab[:20]

array(['', '[UNK]', 'the', 'and', 'a', 'of', 'to', 'is', 'in', 'it', 'i',
       'this', 'that', 'br', 'was', 'as', 'for', 'with', 'movie', 'but'],
      dtype='<U14')

In [10]:
encoded_example = encoder(example)[:3].numpy()
print(encoded_example)
for seq in encoded_example:
    print([inverse_vocab[index] for index in seq])

[[ 11   7  29 ...   0   0   0]
 [  8   1 535 ...   0   0   0]
 [ 10 418   6 ...   0   0   0]]
['this', 'is', 'one', 'of', 'those', 'movies', 'that', 'you', 'keep', 'thinking', 'about', 'when', 'you', '[UNK]', 'up', 'the', 'next', '[UNK]', 'it', 'will', 'give', 'you', 'that', '[UNK]', '[UNK]', 'feeling', 'and', 'leave', 'you', 'with', 'a', '[UNK]', 'on', 'your', '[UNK]', 'br', 'sure', 'we', 'get', '[UNK]', 'the', 'typical', '[UNK]', 'characters', 'and', 'stories', 'but', 'it', 'does', 'do', 'the', '[UNK]', '[UNK]', 'br', 'being', 'from', '[UNK]', 'and', 'living', 'in', 'the', 'us', 'for', 'quite', '[UNK]', 'it', 'is', 'funny', 'how', 'we', '[UNK]', 'the', '[UNK]', 'husband', 'is', 'going', 'to', 'kill', 'him', 'she', '[UNK]', 'is', 'going', 'to', 'die', 'and', 'then', 'there', 'will', 'be', 'a', '[UNK]', '[UNK]', 'ending', 'we', 'know', 'how', 'these', 'things', 'work', 'everything', 'comes', 'together', 'at', 'the', 'end', 'and', 'it', 'did', 'the', 'characters', 'were', 'somewhat', 's

In [11]:
for n in range(3):
    print("Original: ", example[n].numpy())
    print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
    print()

Original:  b'This is one of those movies that you keep thinking about when you wake up the next morning. It will give you that warm, fuzzy feeling and leave you with a smile on your face.<br /><br />Sure, we get fed the typical stereotype characters and stories, but it does do the trick: Entertain.<br /><br />Being from Sweden and living in the US for quite sometime, it is funny how we react. "The deadbeat husband is going to kill him", "She (Gabriella) is going to die and then there will be a heartbreaking larger-than-life ending". We know how these things work, everything comes together at the end. And it did. The characters were somewhat simple, they were so elaborate that you didn\'t really think twice about it, nothing was really left for your own imagination. The closest would probably be Siv, she makes you ask yourself if she indeed was in love with Daniel, but that\'s about it.<br /><br />But the movie is beautiful, set in rural Norrland, the music is absolutely amazing and the

In [28]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [29]:
print([layer.supports_masking for layer in model.layers])

[False, True, True, True, True]


In [30]:
sample_text = ('The movie was cool. The animation and the graphics were out of this world. I would recommend this movie.')

predictions = model.predict(np.array([sample_text]))
print(predictions[0])

[-0.00440053]


In [31]:
padding = "the " * 2000
predictions = model.predict(np.array([sample_text, padding]))
print(predictions[0])

[-0.00440053]


In [32]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), optimizer=tf.keras.optimizers.Adam(1e-4), metrics=['accuracy'])

In [27]:
history = model.fit(train_dataset, epochs=10, validation_data=test_dataset, validation_steps=30)

Epoch 1/10

KeyboardInterrupt: 

In [None]:
test_loss, test_acc = model.evaluate(test_dataset)
print("Test Loss: ", test_loss)
print("Test Accuracy", test_acc)

In [None]:
plt.figure(figsize=(16,8))
plt.subplot(1, 2, 1)
plot_graphs(history, 'accuracy')
plt.ylim(None, 1)
plt.subplot(1, 2, 2)
plot_graphs(history, 'loss')
plt.ylim(0, None)

In [None]:
sample_text = ('I am fairly ambivalent to the movie. It was neither good, nor bad. It just was.')

predictions = model.predict(np.array([sample_text]))

In [None]:
print(predictions)