# High-level RNN MXNet Example

In [1]:
import os
import sys
import numpy as np
import mxnet as mx
from mxnet.io import DataDesc
from common.params_lstm import *
from common.utils import *

In [2]:
# Force one-gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("MXNet: ", mx.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.14.1
MXNet:  0.12.0
GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']
CUDA Version 8.0.61
CuDNN Version  6.0.21


In [4]:
def create_symbol(CUDNN=True,
                  maxf=MAXFEATURES, edim=EMBEDSIZE, nhid=NUMHIDDEN, maxl=MAXLEN):
    # https://mxnet.incubator.apache.org/api/python/rnn.html
    data = mx.symbol.Variable('data')
    embedded_step = mx.symbol.Embedding(data=data, input_dim=maxf, output_dim=edim)
    
    # Fusing RNN layers across time step into one kernel
    # Improves speed but is less flexible
    # Currently only supported if using cuDNN on GPU
    if not CUDNN:
        gru_cell = mx.rnn.GRUCell(num_hidden=nhid)
    else:
        gru_cell = mx.rnn.FusedRNNCell(num_hidden=nhid, num_layers=1, mode='gru')
    
    begin_state = gru_cell.begin_state()
    # Call the cell to get the output of one time step for a batch.
    # TODO: TNC layout (sequence length, batch size, and feature dimensions) is faster for RNN
    outputs, states = gru_cell.unroll(length=maxl, inputs=embedded_step, merge_outputs=False)
    
    fc1 = mx.symbol.FullyConnected(data=outputs[-1], num_hidden=2) 
    input_y = mx.symbol.Variable('softmax_label')  
    m = mx.symbol.SoftmaxOutput(data=fc1, label=input_y, name="softmax")
    return m

In [5]:
def init_model(m, batchs=BATCHSIZE, maxl=MAXLEN, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):
    ctx = [mx.gpu(0)]
    mod = mx.mod.Module(context=ctx, symbol=m)
    mod.bind(data_shapes=[DataDesc(name='data', shape=(batchs, maxl))],
             label_shapes=[DataDesc(name='softmax_label', shape=(batchs,))])
    # Glorot-uniform initializer
    mod.init_params(initializer=mx.init.Xavier(rnd_type='uniform'))
    mod.init_optimizer(optimizer='Adam', 
                       optimizer_params=(('learning_rate', lr),
                                         ('beta1', b1),
                                         ('beta2', b2),
                                         ('epsilon', eps)))
    return mod

In [6]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)
# TNC layout faster for RNN
# Train iterator
train_iter = mx.io.NDArrayIter(x_train, y_train, BATCHSIZE, shuffle=True)

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Data does not exist. Downloading https://ikpublictutorial.blob.core.windows.net/deeplearningframeworks/imdb.npz
Preparing train set...
Preparing test set...
Trimming to 30000 max-features
Padding to length 150
(25000, 150) (25000, 150) (25000,) (25000,)
int32 int32 int32 int32
CPU times: user 6.05 s, sys: 512 ms, total: 6.56 s
Wall time: 8.13 s


In [7]:
%%time
# Load symbol
# See Notebook "MXNet_RNN_TNC.ipynb" for example with TNC layout
sym = create_symbol()

CPU times: user 44 ms, sys: 709 µs, total: 44.7 ms
Wall time: 45.6 ms




In [8]:
%%time
# Initialise model
model = init_model(sym)

CPU times: user 975 ms, sys: 572 ms, total: 1.55 s
Wall time: 1.55 s


In [9]:
%%time
# Main training loop: 12.7s
metric = mx.metric.create('acc')
for j in range(EPOCHS):
    train_iter.reset()
    metric.reset()
    for batch in train_iter:
        model.forward(batch, is_train=True) 
        model.update_metric(metric, batch.label)
        model.backward()              
        model.update()
    print('Epoch %d, Training %s' % (j, metric.get()))

Epoch 0, Training ('accuracy', 0.7748960997442456)
Epoch 1, Training ('accuracy', 0.9239130434782609)
Epoch 2, Training ('accuracy', 0.9643941815856778)
CPU times: user 19.9 s, sys: 5.64 s, total: 25.6 s
Wall time: 24.1 s


In [10]:
%%time
# Main evaluation loop: 1.52s
y_guess = model.predict(mx.io.NDArrayIter(x_test, batch_size=BATCHSIZE, shuffle=False))
y_guess = np.argmax(y_guess.asnumpy(), axis=-1)

CPU times: user 2.36 s, sys: 351 ms, total: 2.71 s
Wall time: 2.54 s


In [11]:
print("Accuracy: ", 1.*sum(y_guess == y_test)/len(y_guess))

Accuracy:  0.85864
