In [None]:
import os, sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Uncomment these lines to deactivate a GPU
#
# As the problem size here is very small, training on a CPU might be faster
#
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    
import keras
import KerasTools as KT
import numpy as np

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

In [None]:
max_features = 1000 # Top most frequent words to consider
maxlen = 500        # Cut texts after this number of words

print('Load data...')
(train_data, train_labels), (test_data, test_labels) = KT.datasets.imdb.load_data(num_words=max_features)

print('Pad sequences (samples x time)')
x_train = keras.preprocessing.sequence.pad_sequences(train_data, maxlen=maxlen)
x_test = keras.preprocessing.sequence.pad_sequences(test_data, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

In [None]:
#
# Source: https://mlwhiz.com/blog/2018/12/17/text_classification/
#

def textcnn(inp):
    filter_sizes = [1,2,3,5]
    num_filters = 32

    maxpool_pool = []
    for i in range(len(filter_sizes)):
        conv = keras.layers.Conv1D(
            num_filters, kernel_size=filter_sizes[i],
            kernel_initializer='he_normal', activation='relu', padding='causal'
        )(inp)
        maxpool_pool.append(
            keras.layers.MaxPool1D(pool_size=(maxlen - filter_sizes[i] + 1))(conv)
        )
    z = keras.layers.Concatenate(axis=1)(maxpool_pool)
    z = keras.layers.Flatten()(z)
    return z
    
class AttentionWithContext(keras.layers.Layer):
    """
    Attention operation, with a context/query vector, for temporal data.
    Supports Masking.
    Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf]
    "Hierarchical Attention Networks for Document Classification"
    by using a context vector to assist the attention
    # Input shape
        3D tensor with shape: `(samples, steps, features)`.
    # Output shape
        2D tensor with shape: `(samples, features)`.
    How to use:
    Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
    The dimensions are inferred based on the output shape of the RNN.
    Note: The layer has been tested with Keras 2.0.6
    Example:
        model.add(LSTM(64, return_sequences=True))
        model.add(AttentionWithContext())
        # next add a Dense layer (for classification/regression) or whatever...
    """

    def __init__(self,
                 W_regularizer=None, u_regularizer=None, b_regularizer=None,
                 W_constraint=None, u_constraint=None, b_constraint=None,
                 bias=True, **kwargs):

        self.supports_masking = True
        self.init = keras.initializers.get('glorot_uniform')

        self.W_regularizer = keras.regularizers.get(W_regularizer)
        self.u_regularizer = keras.regularizers.get(u_regularizer)
        self.b_regularizer = keras.regularizers.get(b_regularizer)

        self.W_constraint = keras.constraints.get(W_constraint)
        self.u_constraint = keras.constraints.get(u_constraint)
        self.b_constraint = keras.constraints.get(b_constraint)

        self.bias = bias
        super(AttentionWithContext, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1], input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        if self.bias:
            self.b = self.add_weight((input_shape[-1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)

        self.u = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_u'.format(self.name),
                                 regularizer=self.u_regularizer,
                                 constraint=self.u_constraint)

        super(AttentionWithContext, self).build(input_shape)

    def _dot_product(self, x, kernel):
        return keras.backend.squeeze(keras.backend.dot(x, keras.backend.expand_dims(kernel)), axis=-1)

    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def call(self, x, mask=None):
        uit = self._dot_product(x, self.W)
        if self.bias: uit += self.b
        uit = keras.backend.tanh(uit)
        ait = self._dot_product(uit, self.u)

        a = keras.backend.exp(ait)
        if mask is not None: a *= mask
        a /= keras.backend.sum(a, axis=1, keepdims=True) + keras.backend.epsilon()
        a = keras.backend.expand_dims(a)
        weighted_input = x * a
        
        return keras.backend.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0], input_shape[-1]


In [None]:
def build_network(intermediate):
    inp = keras.layers.Input(shape=(maxlen, ))
    embd = keras.layers.Embedding(max_features, 3, mask_zero=False)(inp)
    intm = intermediate(embd)
    out = keras.layers.Dense(1, activation='sigmoid')(intm)
    model = keras.models.Model(inputs=inp, outputs=out)
    model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.RMSprop(), metrics=['accuracy'])
    model.summary()
    return model

In [None]:
%%time
# 55.7 s on Intel i7-7700HQ CPU @ 2.80GHz 
# 22.7 s on Intel i7-7700HQ CPU @ 2.80GHz / Nvidia GeForce 940MX
model = build_network(lambda x: keras.layers.Dense(32, activation='relu')(
                                keras.layers.Flatten()(x)))
history_embd = model.fit(x_train, train_labels, batch_size=256, epochs=50, validation_split=0.1)

In [None]:
%%time
#  on Intel i7-7700HQ CPU @ 2.80GHz
#  on Intel i7-7700HQ CPU @ 2.80GHz / Nvidia GeForce 940MX
model = build_network(keras.layers.SimpleRNN(32))
history_simplernn = model.fit(x_train, train_labels, batch_size=256, epochs=50, validation_split=0.1)

In [None]:
%%time
#  on Intel i7-7700HQ CPU @ 2.80GHz
#  on Intel i7-7700HQ CPU @ 2.80GHz / Nvidia GeForce 940MX
model = build_network(keras.layers.GRU(32))
history_gru = model.fit(x_train, train_labels, batch_size=256, epochs=50, validation_split=0.1)

In [None]:
%%time
#  on Intel i7-7700HQ CPU @ 2.80GHz
#  on Intel i7-7700HQ CPU @ 2.80GHz / Nvidia GeForce 940MX
model = build_network(keras.layers.Bidirectional(keras.layers.GRU(32)))
history_bigru = model.fit(x_train, train_labels, batch_size=256, epochs=50, validation_split=0.1)

In [None]:
%%time
#  on Intel i7-7700HQ CPU @ 2.80GHz
#  on Intel i7-7700HQ CPU @ 2.80GHz / Nvidia GeForce 940MX
model = build_network(keras.layers.LSTM(32))
history_lstm = model.fit(x_train, train_labels, batch_size=256, epochs=50, validation_split=0.1)

In [None]:
%%time
# 31min 59s on Intel i7-7700HQ CPU @ 2.80GHz
#  on Intel i7-7700HQ CPU @ 2.80GHz / Nvidia GeForce 940MX
model = build_network(lambda x: AttentionWithContext()(
                                keras.layers.Bidirectional(
                                keras.layers.GRU(32, return_sequences=True))(x)))
history_attn = model.fit(x_train, train_labels, batch_size=256, epochs=50, validation_split=0.1)

In [None]:
%%time
# 6min 32s on Intel i7-7700HQ CPU @ 2.80GHz
# 13min 36s on Intel i7-7700HQ CPU @ 2.80GHz / Nvidia GeForce 940MX
model = build_network(textcnn)
history_tcnn = model.fit(x_train, train_labels, batch_size=256, epochs=50, validation_split=0.1)

In [None]:
# Matplotlib standard palette
# ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

import bqplot, bqplot.pyplot
import ipywidgets

axes_loss = {'x': {'label': 'Epochs'}, 
             'y': {'label': 'Losses', 'label_offset': '50px', 'tick_style': {'font-size': 10}}}
axes_acc = {'x': {'label': 'Epochs'}, 
            'y': {'label': 'Accuracy', 'label_offset': '50px', 'tick_style': {'font-size': 10}}}

loss_plt = bqplot.pyplot.figure(min_aspect_ratio=4/3, max_aspect_ratio=4/3)
bqplot.pyplot.plot(range(30), history_embd.history['val_loss'], axes_options=axes_loss, colors=['#1f77b4'])
bqplot.pyplot.plot(range(30), history_simplernn.history['val_loss'], colors=['#ff7f0e'])
bqplot.pyplot.plot(range(30), history_gru.history['val_loss'], colors=['#2ca02c'])
bqplot.pyplot.plot(range(30), history_attn.history['val_loss'], colors=['#d62728'])
bqplot.pyplot.plot(range(30), history_tcnn.history['val_loss'], colors=['#9467bd'])
bqplot.pyplot.plot(range(30), history_bigru.history['val_loss'], colors=['#8c564b'])
bqplot.pyplot.plot(range(30), history_lstm.history['val_loss'], colors=['#e377c2'])

acc_plt  = bqplot.pyplot.figure(min_aspect_ratio=4/3, max_aspect_ratio=4/3)
bqplot.pyplot.plot(range(30), history_embd.history['val_acc'], axes_options=axes_acc, colors=['#1f77b4'])
bqplot.pyplot.plot(range(30), history_simplernn.history['val_acc'], colors=['#ff7f0e'])
bqplot.pyplot.plot(range(30), history_gru.history['val_acc'], colors=['#2ca02c'])
bqplot.pyplot.plot(range(30), history_attn.history['val_acc'], colors=['#d62728'])
bqplot.pyplot.plot(range(30), history_tcnn.history['val_acc'], colors=['#9467bd'])
bqplot.pyplot.plot(range(30), history_bigru.history['val_acc'], colors=['#8c564b'])
bqplot.pyplot.plot(range(30), history_lstm.history['val_acc'], colors=['#e377c2'])

display(ipywidgets.HBox([loss_plt, acc_plt]))