In [1]:
import tensorflow_datasets as tfds
import tensorflow as tf

In [3]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64
padded_shapes = ([None], ())

In [4]:
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True,
                          as_supervised=True)
trainset, testset = dataset['train'], dataset['test']
encoder = info.features['text'].encoder
trainset = trainset.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE,
                                         padded_shapes=padded_shapes)
testset = testset.padded_batch(BATCH_SIZE,
                               padded_shapes=padded_shapes)




In [5]:
model = tf.keras.Sequential([tf.keras.layers.Embedding(encoder.vocab_size, 64),
                             tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
                             tf.keras.layers.Dense(64, activation='relu'),
                             tf.keras.layers.Dense(1, activation='sigmoid')
                             ])

In [9]:
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy',
              metrics=['accuracy'])
history = model.fit(trainset, epochs=5, validation_data=testset,
                    validation_steps=30)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [7]:
def pad_to_size(vec, size):
  zeros = [0]*(size - len(vec))
  vec.extend(zeros)
  return vec

In [21]:
def sample_predict(sentence, pad, model_):
  encoded_sample_pred_text = encoder.encode(sentence)
  if pad:
    encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64)

  encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
  predictions = model_.predict(tf.expand_dims(encoded_sample_pred_text, 0))

  return predictions

In [15]:
# Dummy Reviews
sample_text_pos = ('This movie was so awesome. Acting was really nice, I recommend it')
predictions1 = sample_predict(sample_text_pos, pad=True, model_=model) * 100
print('Probability of Positive review %.2f' % predictions1)
sample_text_neg = ('I did not like the movie. The acting was okayish')
predictions2 = sample_predict(sample_text_neg, pad=True, model_=model) * 100
print('Probability of Positive review %.2f' % predictions2)

Probability of Positive review 62.91
Probability of Positive review 26.84


Now let's make a more complex model

In [16]:
model2 = tf.keras.Sequential([tf.keras.layers.Embedding(encoder.vocab_size, 64),
                              tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
                              tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
                              tf.keras.layers.Dense(64, activation='relu'),
                              tf.keras.layers.Dropout(0.5),
                              tf.keras.layers.Dense(1, activation='sigmoid')
                              ])

In [19]:
model2.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss='binary_crossentropy',
              metrics=['accuracy'])
history2 = model2.fit(trainset, epochs=5, validation_data=testset,
                    validation_steps=30)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [23]:
# Dummy Reviews
sample_text_pos = ('This movie was so awesome. Acting was really nice, I recommend it')
predictions1 = sample_predict(sample_text_pos, pad=True, model_=model2) * 100
print('Probability of Positive review %.2f' % predictions1)
sample_text_neg = ('I did not like the movie. The acting was okayish')
predictions2 = sample_predict(sample_text_neg, pad=True, model_=model2) * 100
print('Probability of Positive review %.2f' % predictions2)

Probability of Positive review 88.10
Probability of Positive review 23.42
