In [1]:
from __future__ import absolute_import, division, print_function

#!pip install -q tensorflow-gpu==2.0.0-alpha0
import tensorflow_datasets as tfds
import tensorflow as tf

In [2]:
import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()

In [3]:
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, 
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

In [4]:
tokenizer = info.features['text'].encoder

In [5]:
print("Vovaulary size : {}".format(tokenizer.vocab_size))

Vovaulary size : 8185


In [6]:
sample_string = 'TensorFlow is cool.'

In [7]:
tokenized_string = tokenizer.encode(sample_string)
print("Tokenized string is {}".format(tokenized_string))

Tokenized string is [6307, 2327, 4043, 4265, 9, 2724, 7975]


In [8]:
original_string = tokenizer.decode(tokenized_string)
print("The original string is: {}".format(original_string))

The original string is: TensorFlow is cool.


In [9]:
assert original_string == sample_string

In [10]:
for ts in tokenized_string:
    print("{} -------------> {}".format(ts, tokenizer.decode([ts])))

6307 -------------> Ten
2327 -------------> sor
4043 -------------> Fl
4265 -------------> ow 
9 -------------> is 
2724 -------------> cool
7975 -------------> .


In [11]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [18]:
tf.TensorShape([None])

TensorShape([None])

In [19]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.padded_batch(BATCH_SIZE, (tf.TensorShape([None]), tf.TensorShape([])))

test_dataset = test_dataset.padded_batch(BATCH_SIZE, (tf.TensorShape([None]), tf.TensorShape([])))

In [20]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [21]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [22]:
history = model.fit(train_dataset, epochs=10, 
                    validation_data=test_dataset)

Epoch 1/10
    263/Unknown - 521s 2s/step - loss: 0.5937 - accuracy: 0.6627

KeyboardInterrupt: 