**Let's start with importing the libraries as follows:**

In [1]:
import numpy as np
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import GRU
from keras.preprocessing import sequence
from keras.callbacks import EarlyStopping
from keras.datasets import imdb

Using TensorFlow backend.


**We'll be using the IMDB dataset that classifies the sentiment of text; load the data with the following code:**

In [2]:
n_words = 1000

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = n_words)
print('Train seq: {}'.format(len(X_train)))
print('Test seq: {}'.format(len(X_test)))

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
Train seq: 25000
Test seq: 25000


**By padding the sequences, we prepare our input for our network:**

In [0]:
# Pad sequences with max_len

max_len = 200

X_train = sequence.pad_sequences(X_train, maxlen = max_len)
X_test = sequence.pad_sequences(X_test, maxlen = max_len)

**We are now ready to define our network architecture:**

In [4]:
# Define network architecture and compile

model = Sequential()

model.add(Embedding(n_words, 50, input_length = max_len))
model.add(Dropout(0.2))
model.add(GRU(100, dropout = 0.2, recurrent_dropout = 0.2))
model.add(Dense(250, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation = 'sigmoid'))

model.compile(loss = 'binary_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy'])
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 200, 50)           50000     
_________________________________________________________________
dropout_1 (Dropout)          (None, 200, 50)           0         
_________________________________________________________________
gru_1 (GRU)                  (None, 100)               45300     
_________________________________________________________________
dense_1 (Dense)              (None, 250)               25250     
_________________________________________________________________
dropout_2 (Dropout)          (None, 250)               0         
_________________________________________________________________
dense_2 (Dense)      

**We use early stopping to prevent overfitting:**

In [0]:
callbacks = [EarlyStopping(monitor = 'val_acc', patience = 3)]

**Let's define the hyperparameters and start training our network:**

In [6]:
n_epochs = 100
batch_size = 512

model.fit(X_train, y_train,
          batch_size = batch_size,
          epochs = n_epochs,
          validation_split = 0.2,
          callbacks = callbacks)

Instructions for updating:
Use tf.cast instead.
Train on 20000 samples, validate on 5000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100


<keras.callbacks.History at 0x7f844a6cc898>

**Finally, we can check the performance of our trained network on the test set:**

In [7]:
print('Accuracy on test set: {}'.format(model.evaluate(X_test, y_test)[1]))

Accuracy on test set: 0.77688
