In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available:  1


In [3]:
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)
train_data, test_data = dataset['train'], dataset['test']



Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incompleteRRPKR3/imdb_reviews-train.tfrecord…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incompleteRRPKR3/imdb_reviews-test.tfrecord*…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0.incompleteRRPKR3/imdb_reviews-unsupervised.t…



Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/subwords8k/1.0.0. Subsequent calls will reuse this data.


In [4]:
encoder = info.features['text'].encoder
encoder

<SubwordTextEncoder vocab_size=8185>

In [5]:
BUFFER_SIZE = 1000
BATCH_SIZE = 64

In [6]:
padded_shapes = ([None], ())

In [7]:
train_data = train_data.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE, padded_shapes=padded_shapes)
test_data = test_data.padded_batch(BATCH_SIZE, padded_shapes=padded_shapes)

In [8]:
model = tf.keras.Sequential([tf.keras.layers.Embedding(encoder.vocab_size, 64),
                             tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
                             tf.keras.layers.Dense(64, activation='relu'),
                             tf.keras.layers.Dense(1, activation='sigmoid')])

In [9]:
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(1e-4), metrics=['accuracy'])

In [10]:
history = model.fit(train_data, epochs=5, validation_data=test_data, validation_steps=30)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [11]:
def pad_to_size(vec, size):
  zeros = [0]*(size-len(vec))
  vec.extend(zeros)
  return vec

In [12]:
def sample_predict(sentence, pad, model_):
    encoded_sample_pred_text = encoder.encode(sentence)
    if pad:
        encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64)
    encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
    predictions = model_.predict(tf.expand_dims(encoded_sample_pred_text, 0))

    return predictions

In [13]:
sample_text = ('This movie was awesome. The acting was incredible. Highly recommend')
predictions = sample_predict(sample_text, pad=True, model_=model) * 100



In [14]:
print('probability this is a positive review %.2f' % predictions)

probability this is a positive review 78.65


In [15]:
sample_text = ('This movie was so so. The acting was medicore. Kind of recommend')
predictions = sample_predict(sample_text, pad=True, model_=model) * 100



In [16]:
print('probability this is a positive review %.2f' % predictions)

probability this is a positive review 42.22


In [17]:
model = tf.keras.Sequential([tf.keras.layers.Embedding(encoder.vocab_size, 64),
                              tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
                              tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
                              tf.keras.layers.Dense(64, activation='relu'),
                              tf.keras.layers.Dropout(0.5),
                              tf.keras.layers.Dense(1, activation='sigmoid')])

In [18]:
model.compile(loss='binary_crossentropy',
                optimizer=tf.keras.optimizers.Adam(1e-4),
                metrics=['accuracy'])

In [20]:
history = model.fit(train_data, epochs=5, validation_data=test_data,
                    validation_steps=30)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [21]:
sample_text = ('This movie was awesome. The acting was incredible. Highly recommend')
predictions = sample_predict(sample_text, pad=True, model_=model) * 100

print('probability this is a positive review %.2f' % predictions)

probability this is a positive review 58.74


In [22]:
sample_text = ('This movie was so so. The acting was medicore. Kind of recommend')
predictions = sample_predict(sample_text, pad=True, model_=model) * 100

print('probability this is a positive review %.2f' % predictions)

probability this is a positive review 14.34
