In [None]:
!pip install tf-nightly-gpu-2.0-preview==2.0.0.dev20190218

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

!pip install -q tensorflow-gpu==2.0.0-beta1
!pip install tensorflow_datasets
import tensorflow_datasets as tfds
import tensorflow as tf


In [None]:
!pip install -q tensorflow-gpu==2.0.0-nightly-gpu-dev040404

In [None]:
import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()


In [None]:
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']


In [None]:
tokenizer = info.features['text'].encoder

print ('Vocabulary size: {}'.format(tokenizer.vocab_size))


In [None]:
sample_string = 'TensorFlow is cool.'

tokenized_string = tokenizer.encode(sample_string)
print ('Tokenized string is {}'.format(tokenized_string))

original_string = tokenizer.decode(tokenized_string)
print ('The original string: {}'.format(original_string))

assert original_string == sample_string


In [None]:
for ts in tokenized_string:
    print ('{} ----> {}'.format(ts, tokenizer.decode([ts])))

    

In [None]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64


In [None]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)

test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout,Dense,LSTM

In [None]:
# from keras.models import Sequential
# from keras.layers import Dropout,Dense,LSTM,CuDNNLSTM,Embedding,Bidirectional

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])


In [None]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


In [None]:
history = model.fit(train_dataset, epochs=10,
                    validation_data=test_dataset)


In [None]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc))


In [None]:
def pad_to_size(vec, size):
    zeros = [0] * (size - len(vec))
    vec.extend(zeros)
    return vec

def sample_predict(sentence, pad):
    tokenized_sample_pred_text = tokenizer.encode(sample_pred_text)

    if pad:
        tokenized_sample_pred_text = pad_to_size(tokenized_sample_pred_text, 64)

    predictions = model.predict(tf.expand_dims(tokenized_sample_pred_text, 0))

    return (predictions)


In [None]:

# predict on a sample text without padding.

sample_pred_text = ('The movie was ok. The animation and the graphics '
                    'were not good. I would recommend this movie.')
predictions = sample_predict(sample_pred_text, pad=False)
print (predictions)


In [None]:
# predict on a sample text with padding

sample_pred_text = ('The movie not good. The animation and the graphics '
                    'were out of this world. I would recommend this movie.')
predictions = sample_predict(sample_pred_text, pad=True)
print (predictions)


In [None]:
plot_graphs(history, 'accuracy')


In [None]:
plot_graphs(history, 'loss')
