In [1]:
# Based on
# https://github.com/fchollet/deep-learning-with-python-notebooks/blob/master/6.2-understanding-recurrent-neural-networks.ipynb

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
%matplotlib inline
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [4]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)

1.8.0


In [5]:
# https://keras.io/datasets/#imdb-movie-reviews-sentiment-classification
max_features = 10000  # number of words to consider as features
maxlen = 500  # cut texts after this number of words (among top max_features most common words)

# each review is encoded as a sequence of word indexes
# indexed by overall frequency in the dataset
# output is 0 (negative) or 1 (positive) 
imdb = tf.keras.datasets.imdb.load_data(num_words=max_features)
(raw_input_train, y_train), (raw_input_test, y_test) = imdb

In [6]:
# https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/sequence/pad_sequences

input_train = tf.keras.preprocessing.sequence.pad_sequences(raw_input_train, maxlen=maxlen)
input_test = tf.keras.preprocessing.sequence.pad_sequences(raw_input_test, maxlen=maxlen)

In [7]:
input_train.shape, input_test.shape, y_train.shape, y_test.shape

((25000, 500), (25000, 500), (25000,), (25000,))

## GRU

In [8]:
# tf.keras.layers.GRU?

In [9]:
# Batch Normalization:
# https://towardsdatascience.com/batch-normalization-in-neural-networks-1ac91516821c
# https://www.quora.com/Why-does-batch-normalization-help

embedding_dim = 32
dropout = 0.15
recurrent_dropout = 0.2

model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(name='embedding', input_dim=max_features, output_dim=embedding_dim, input_length=maxlen))


# https://arxiv.org/ftp/arxiv/papers/1701/1701.05923.pdf
# n = output dimension
# m = input dimension
# Total number of parameters for 
# RNN = n**2 + nm + n
# GRU = 3 × (n**2 + nm + n)
# LSTM = 4 × (n**2 + nm + n)


model.add(tf.keras.layers.GRU(name='gru1', units=32, dropout=dropout, recurrent_dropout=recurrent_dropout, return_sequences=True))

# for embedding: 32*2 (“standard deviation” parameter (gamma), “mean” parameter (beta)) trainable parameters
# and 32*2 (moving_mean and moving_variance) non-trainable parameters
# model.add(tf.keras.layers.BatchNormalization())  
# model.add(tf.keras.layers.Dropout(dropout))

# stack recurrent layers like with fc
model.add(tf.keras.layers.GRU(name='gru2', units=32))

# model.add(tf.keras.layers.BatchNormalization())
# model.add(tf.keras.layers.Dropout(dropout))


# binary classifier
model.add(tf.keras.layers.Dense(name='fc', units=32, activation='relu'))
model.add(tf.keras.layers.Dense(name='classifier', units=1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 500, 32)           320000    
_________________________________________________________________
gru1 (GRU)                   (None, 500, 32)           6240      
_________________________________________________________________
gru2 (GRU)                   (None, 32)                6240      
_________________________________________________________________
fc (Dense)                   (None, 32)                1056      
_________________________________________________________________
classifier (Dense)           (None, 1)                 33        
Total params: 333,569
Trainable params: 333,569
Non-trainable params: 0
_________________________________________________________________


In [10]:
batch_size = 1000

%time history = model.fit(input_train, y_train, epochs=10, batch_size=batch_size, validation_split=0.2)

Train on 20000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Wall time: 5min 27s


In [11]:
train_loss, train_accuracy = model.evaluate(input_train, y_train, batch_size=batch_size)
train_accuracy



0.746199996471405

In [12]:
test_loss, test_accuracy = model.evaluate(input_test, y_test, batch_size=batch_size)
test_accuracy



0.6975999999046326

In [13]:
# precition
model.predict(input_test[0:5])

array([[0.37094206],
       [0.60916513],
       [0.20637871],
       [0.5892426 ],
       [0.7700812 ]], dtype=float32)

In [14]:
# ground truth
y_test[0:5]

array([0, 1, 1, 0, 1], dtype=int64)