# Prac9 Implement Text processing with neural network

In [1]:
import numpy as np
import tensorflow_datasets as tfds
import tensorflow as tf
tfds.disable_progress_bar()

import matplotlib.pyplot as plt

In [2]:
def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])

In [3]:
dataset, info = tfds.load('imdb_reviews', with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

train_dataset.element_spec

[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\DELL\tensorflow_datasets\imdb_reviews\plain_text\1.0.0...[0m
[1mDataset imdb_reviews downloaded and prepared to C:\Users\DELL\tensorflow_datasets\imdb_reviews\plain_text\1.0.0. Subsequent calls will reuse this data.[0m


(TensorSpec(shape=(), dtype=tf.string, name=None),
 TensorSpec(shape=(), dtype=tf.int64, name=None))

In [4]:
for example, label in train_dataset.take(1):
  print('text: ', example.numpy())
  print('label: ', label.numpy())

text:  b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
label:  0


In [5]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [6]:
for example, label in train_dataset.take(1):
  print('texts: ', example.numpy()[:3])
  print()
  print('labels: ', label.numpy()[:3])

texts:  [b'To me A Matter of Life and Death is just that- simply the best film ever made.<br /><br />From beginning to end it oozes class. It is stimulating, thought provoking, a mirror to the post war world and the relations between peoples.<br /><br />The cinematography is simply stunning and the effect of mixing monochrome and Technicolour to accent the different worlds works seamlessly. The characters and plot development are near perfect and the attention to detail promotes a thoroughly believable fantasy.<br /><br />No matter how many times I watch the film - and I have watched it a lot - it never fails to touch me. It makes me smile, it makes me laugh, it makes me think, it makes me cry. It is as fresh today as it was in 1946.<br /><br />If I were allowed just one film to keep and watch again A Matter of Life and Death would be that film.'
 b'Fairly funny Jim Carrey vehicle that has him as a News reporter who temporarily gets the power of God and wrecks havoc. Carrey is back in 

In [7]:
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

vocab = np.array(encoder.get_vocabulary())
vocab[:20]

array(['', '[UNK]', 'the', 'and', 'a', 'of', 'to', 'is', 'in', 'it', 'i',
       'this', 'that', 'br', 'was', 'as', 'for', 'with', 'movie', 'but'],
      dtype='<U14')

In [8]:
encoded_example = encoder(example)[:3].numpy()
encoded_example

array([[  6,  70,   4, ...,   0,   0,   0],
       [977, 162,   1, ...,   0,   0,   0],
       [792, 361,   8, ...,   6,  56,   1]], dtype=int64)

In [9]:
for n in range(3):
  print("Original: ", example[n].numpy())
  print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
  print()

Original:  b'To me A Matter of Life and Death is just that- simply the best film ever made.<br /><br />From beginning to end it oozes class. It is stimulating, thought provoking, a mirror to the post war world and the relations between peoples.<br /><br />The cinematography is simply stunning and the effect of mixing monochrome and Technicolour to accent the different worlds works seamlessly. The characters and plot development are near perfect and the attention to detail promotes a thoroughly believable fantasy.<br /><br />No matter how many times I watch the film - and I have watched it a lot - it never fails to touch me. It makes me smile, it makes me laugh, it makes me think, it makes me cry. It is as fresh today as it was in 1946.<br /><br />If I were allowed just one film to keep and watch again A Matter of Life and Death would be that film.'
Round-trip:  to me a matter of life and death is just that simply the best film ever [UNK] br from beginning to end it [UNK] class it is [U

In [10]:
model = tf.keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [11]:
print([layer.supports_masking for layer in model.layers])

[False, True, True, True, True]


In [12]:
sample_text = ('The movie was cool. The animation and the graphics '
               'were out of this world. I would recommend this movie.')
predictions = model.predict(np.array([sample_text]))
print(predictions[0])

[0.00033818]


In [13]:
# predict on a sample text with padding

padding = "the " * 2000
predictions = model.predict(np.array([sample_text, padding]))
print(predictions[0]) 

[0.00033818]


In [14]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [16]:
history = model.fit(train_dataset, epochs=3,
                    validation_data=test_dataset,
                    validation_steps=30)

test_loss, test_acc = model.evaluate(test_dataset)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [17]:
print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

Test Loss: 0.3621436655521393
Test Accuracy: 0.8479200005531311
