# Sentiment analysis using an RNN


In [3]:
import numpy as np

import tensorflow_datasets as tfds
import tensorflow as tf

tfds.disable_progress_bar()

In [4]:
import matplotlib.pyplot as plt


def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])

In [5]:
dataset, info = tfds.load('imdb_reviews', with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

train_dataset.element_spec

[1mDownloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /Users/jacobmoran/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...[0m
[1mDataset imdb_reviews downloaded and prepared to /Users/jacobmoran/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.[0m


(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [6]:
for example, label in train_dataset.take(1):
  print('text: ', example.numpy())
  print('label: ', label.numpy())

text:  b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
label:  0


In [7]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

for example, label in train_dataset.take(1):
  print('texts: ', example.numpy()[:3])
  print()
  print('labels: ', label.numpy()[:3])

texts:  [b"Are we really making 'video nasties' again? In the guise of a digital wide screen big budget remake of 8MM, this is quite a ride. Unfortunately there is a bit too much story and at times this becomes like a travelogue as our heroine searches the sleaze spots of Paris, Hamburg and Amsterdam. I am however being rather churlish for the 'depraved' scenes, including everything from, hot wax, harsh whipping and rough sex to drowning, beheading and some. These scenes are immaculate and it's a pity Bruno and his budget couldn't stretch to make all the many characterful creatures introduced become more than simply caricatures."
 b'Daphne Zuniga is the only light that shines in this sleepy slasher, and the light fades quickly. If not her, than what other reason to watch this. five college kids are signed up to prepare an old dorm for its due date of demolition. Problems are automatically occurring when a weird homeless man is soliciting, and the group are short a few people. Then, a k

In [9]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))


vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'the', 'and', 'a', 'of', 'to', 'is', 'in', 'it', 'i',
       'this', 'that', 'br', 'was', 'as', 'for', 'with', 'movie', 'but'],
      dtype='<U14')

In [10]:
encoded_example = encoder(example)[:3].numpy()
encoded_example



for n in range(3):
  print("Original: ", example[n].numpy())
  print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
  print()

Original:  b"Are we really making 'video nasties' again? In the guise of a digital wide screen big budget remake of 8MM, this is quite a ride. Unfortunately there is a bit too much story and at times this becomes like a travelogue as our heroine searches the sleaze spots of Paris, Hamburg and Amsterdam. I am however being rather churlish for the 'depraved' scenes, including everything from, hot wax, harsh whipping and rough sex to drowning, beheading and some. These scenes are immaculate and it's a pity Bruno and his budget couldn't stretch to make all the many characterful creatures introduced become more than simply caricatures."
Round-trip:  are we really making video [UNK] again in the [UNK] of a [UNK] [UNK] screen big budget remake of [UNK] this is quite a [UNK] unfortunately there is a bit too much story and at times this becomes like a [UNK] as our [UNK] [UNK] the [UNK] [UNK] of [UNK] [UNK] and [UNK] i am however being rather [UNK] for the [UNK] scenes including everything from 

## Creating the model


In [11]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [12]:
print([layer.supports_masking for layer in model.layers])

[False, True, True, True, True]


In [13]:
sample_text = ('The movie was cool. The animation and the graphics '
               'were out of this world. I would recommend this movie.')
predictions = model.predict(np.array([sample_text]))
print(predictions[0])

[0.00151565]


In [14]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [15]:
history = model.fit(train_dataset, epochs=10,
                    validation_data=test_dataset,
                    validation_steps=30)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
model.save('RNN')



INFO:tensorflow:Assets written to: RNN/assets


INFO:tensorflow:Assets written to: RNN/assets


In [31]:
sample_text = ('The weather is great today')
predictions = model.predict(np.array([sample_text]))

In [32]:
print(predictions)

[[2.058515]]
