In [1]:
# Following Along Tutorial.

In [2]:
# Import the packages we need to use.

import tensorflow as tf
import tensorflow_datasets as tfds

In [3]:
# Load the dataset that we will be using for this tutorial. 

dataset, info = tfds.load('imdb_reviews/subwords8k', with_info= True, as_supervised=True)

# Do a train test split.

train_dataset, test_dataset = dataset['train'], dataset['test']

# Set up the encoder.

encoder = info.features['text'].encoder

# Set some global parameters. 

BUFFER_SIZE = 10000
BATCH_SIZE = 64 

# Padding so words are all the same size for later. 

padded_shapes = ([None], ())

# Shuffle both the training and test dataset. 

train_dataset = train_dataset.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE, padded_shapes = padded_shapes)

test_dataset = test_dataset.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE, padded_shapes = padded_shapes)

# Set up the model that we will use for the language model. 

model = tf.keras.Sequential([
                             tf.keras.layers.Embedding(encoder.vocab_size, 64),
                             tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
                             tf.keras.layers.Dense(64,activation='relu'),
                             tf.keras.layers.Dense(1,activation='sigmoid')
                               ])

# Compile the model. 

model.compile(loss = 'binary_crossentropy', optimizer = tf.keras.optimizers.Adam(1e-4), metrics = ['accuracy'])

# History parameter. 

history = model.fit(train_dataset, epochs= 5, validation_data = test_dataset, validation_steps = 30)

# Padding so that the words are all the same size. 

def pad_to_size(vec, size):
  zeros = [0]*(size-len(vec))
  vec.extend(zeros)
  return vec

# Create a predict function to incorpoate the padding. 

def sample_predict(sentence, pad):
  encoded_sample_pred_text = encoder.encode(sentence)
  if pad:
    encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64)
  encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
  predictions = model.predict(tf.expand_dims(encoded_sample_pred_text,0))

  return predictions

# Create a sample text to pass through the model. 

sample_text = ('I would say the movie was alright, not the best but also not the worst I have seen.')
predictions = sample_predict(sample_text, pad = True) * 100

# Now we see how positive it thinks the review is. 

print('Prob of positive review')
print(predictions)



[1mDownloading and preparing dataset imdb_reviews/subwords8k/1.0.0 (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…







HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incompleteWUJAS7/imdb_reviews-train.tfrecord


HBox(children=(FloatProgress(value=0.0, max=25000.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incompleteWUJAS7/imdb_reviews-test.tfrecord


HBox(children=(FloatProgress(value=0.0, max=25000.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incompleteWUJAS7/imdb_reviews-unsupervised.tfrecord


HBox(children=(FloatProgress(value=0.0, max=50000.0), HTML(value='')))



[1mDataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0. Subsequent calls will reuse this data.[0m
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Prob of positive review
[[9.444676]]
