In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
ds, info = tfds.load('imdb_reviews/subwords8k', data_dir='dataset', with_info=True, as_supervised=True)
train_data, test_data = ds['train'], ds['test']

[1mDownloading and preparing dataset imdb_reviews/subwords8k/1.0.0 (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to dataset/imdb_reviews/subwords8k/1.0.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…







HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to dataset/imdb_reviews/subwords8k/1.0.0.incompleteZN0S1K/imdb_reviews-train.tfrecord


HBox(children=(FloatProgress(value=0.0, max=25000.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to dataset/imdb_reviews/subwords8k/1.0.0.incompleteZN0S1K/imdb_reviews-test.tfrecord


HBox(children=(FloatProgress(value=0.0, max=25000.0), HTML(value='')))



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Shuffling and writing examples to dataset/imdb_reviews/subwords8k/1.0.0.incompleteZN0S1K/imdb_reviews-unsupervised.tfrecord


HBox(children=(FloatProgress(value=0.0, max=50000.0), HTML(value='')))

[1mDataset imdb_reviews downloaded and prepared to dataset/imdb_reviews/subwords8k/1.0.0. Subsequent calls will reuse this data.[0m


In [3]:
encoder = info.features['text'].encoder

In [4]:
padded_shapes = ([None], ())

In [5]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000

In [6]:
train_batches = train_data.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE, padded_shapes=padded_shapes)
test_batches = test_data.padded_batch(BATCH_SIZE, padded_shapes=padded_shapes)

In [7]:
EMBEDDING_DIM = 64

In [8]:
model = keras.Sequential([
    layers.Embedding(encoder.vocab_size, EMBEDDING_DIM),
    layers.Bidirectional(layers.LSTM(64)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [9]:
model.compile(optimizer=keras.optimizers.Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 64)          523840    
_________________________________________________________________
bidirectional (Bidirectional (None, 128)               66048     
_________________________________________________________________
dense (Dense)                (None, 64)                8256      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 598,209
Trainable params: 598,209
Non-trainable params: 0
_________________________________________________________________


In [11]:
history = model.fit(train_batches, epochs=10, validation_data=test_batches, validation_steps=30, workers=6)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
def pad_to_size(vec, size):
    zeros = [0] * (size - len(vec))
    vec.extend(zeros)
    return vec

def sample_predict(sentance, pad, model_used):
    encoded_sample_pred_text = encoder.encode(sentance)
    if pad:
      encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64)
    encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
    return model_used.predict(tf.expand_dims(encoded_sample_pred_text, 0))

In [13]:
sample_text = 'This movie was brilliant. The acting and the storytelling was just great.'
pred = sample_predict(sample_text, pad=True, model_used=model) * 100
print('Probability that this is a positive review: %.5f' % pred)

Probability that this is a positive review: 88.32720


In [14]:
sample_text = 'The only way I can describe the experience from the movie is that it was dull. Everything was so dull.'
pred = sample_predict(sample_text, pad=True, model_used=model) * 100
print('Probability that this is a positive review: %.5f' % pred)

Probability that this is a positive review: 4.92650


In [15]:
model_2 = keras.Sequential([
    layers.Embedding(encoder.vocab_size, EMBEDDING_DIM),
    layers.Bidirectional(layers.LSTM(64, return_sequences=True)),
    layers.Bidirectional(layers.LSTM(32)),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])
model_2.compile(optimizer=keras.optimizers.Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])
model_2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 64)          523840    
_________________________________________________________________
bidirectional_1 (Bidirection (None, None, 128)         66048     
_________________________________________________________________
bidirectional_2 (Bidirection (None, 64)                41216     
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 635,329
Trainable params: 635,329
Non-trainable params: 0
________________________________________________

In [16]:
history = model_2.fit(train_batches, epochs=10, validation_data=test_batches, validation_steps=30, workers=6)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:
sample_text = 'This movie was brilliant. The acting and the storytelling was just great.'
pred = sample_predict(sample_text, pad=True, model_used=model_2) * 100
print('Probability that this is a positive review: %.5f' % pred)
sample_text = 'The only way I can describe the experience from the movie is that it was dull. Everything was so dull.'
pred = sample_predict(sample_text, pad=True, model_used=model_2) * 100
print('Probability that this is a positive review: %.5f' % pred)

Probability that this is a positive review: 98.27574
Probability that this is a positive review: 4.49548
