## Setup

In [0]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
print('Tensorflow version is', tf.__version__)

## Load Dataset

In [0]:
imdb = keras.datasets.imdb

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
print('Training data', len(train_data))
print('Testing data', len(test_data))

## Data Exploration

In [0]:
print('The first training sample:', train_data[0])
print('The first training sample\'s label:', train_labels[0])

In [0]:
# A dictionary mapping words to an integer index
word_index = imdb.get_word_index()

# The first indices are reserved
word_index = {k:(v+3) for k,v in word_index.items()} 
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])
  
decode_review(train_data[0])

## Padding

In [0]:
train_data_pd = keras.preprocessing.sequence.pad_sequences(
    train_data,
    value=word_index["<PAD>"],
    padding='post',
    maxlen=256)

test_data_pd = keras.preprocessing.sequence.pad_sequences(
    test_data,
    value=word_index["<PAD>"],
    padding='post',
    maxlen=256)

print('Shape of padded training set', train_data_pd.shape)
print('Shape of padded testing set', test_data_pd.shape)
print(train_data_pd[0])
print(decode_review(train_data_pd[0]))

## Experimental Protocol

In [0]:
x_validation = train_data_pd[:10000]
x_train = train_data_pd[10000:]
x_test = test_data_pd

y_validation = train_labels[:10000]
y_train = train_labels[10000:]
y_test = test_labels

## Training

In [0]:
vocab_size = 10000

model = keras.Sequential()
model.add(keras.layers.Embedding(vocab_size, 25))
model.add(keras.layers.CuDNNGRU(100))
model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))


model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy'])



history = model.fit(x_train,
                    y_train,
                    epochs=15,
                    batch_size=512,
                    validation_data=(x_validation, y_validation),
                    verbose=1)
results = model.evaluate(x_test, y_test)

print(results)