# High-level RNN CNTK Example

In [1]:
import numpy as np
import os
import sys
import cntk
from cntk.layers import Embedding, LSTM, GRU, Dense, Recurrence
from cntk import sequence
from common.params_lstm import *
from common.utils import *

In [2]:
# Force one-gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("CNTK: ", cntk.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.5.2 |Anaconda custom (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]
Numpy:  1.14.1
CNTK:  2.4
GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']
CUDA Version 8.0.61
CuDNN Version  6.0.21


In [4]:
def create_symbol(CUDNN=True, edim=EMBEDSIZE, nhid=NUMHIDDEN):
    # Weight initialiser from uniform distribution
    # Activation (unless states) is None
    with cntk.layers.default_options(init = cntk.glorot_uniform()):
        x = Embedding(edim)(features) # output: list of len=BATCHSIZE of arrays with shape=(MAXLEN, EMBEDSIZE)
        
        # Since we have a vanilla RNN, instead of using the more flexible Recurrence(GRU) unit, which allows for
        # example LayerNormalisation to be added to the network, we can use optimized_rnnstack which quickly
        # goes down to the CuDNN level. This is another reason not to read much into the speed comparison because
        # it becomes a measure of which framework has the fastest way to go down to CuDNN.
        if not CUDNN:
            x = Recurrence(GRU(nhid))(x) # output: list of len=BATCHSIZE of arrays with shape=(MAXLEN, NUMHIDDEN)
        else:
            W = cntk.parameter((cntk.InferredDimension, 4))
            x = cntk.ops.optimized_rnnstack(x, W, nhid, 
                                            num_layers=1, bidirectional=False, recurrent_op='gru')
        
        x = sequence.last(x) #o utput: array with shape=(BATCHSIZE, NUMHIDDEN)
        x = Dense(2)(x) # output: array with shape=(BATCHSIZE, 2)
        return x

In [5]:
def init_model(m, labels, lr=LR, b1=BETA_1, b2=BETA_2, eps=EPS):
    # Loss (dense labels); check if support for sparse labels
    loss = cntk.cross_entropy_with_softmax(m, labels)  
    # ADAM, set unit_gain to False to match others
    learner = cntk.adam(m.parameters,
                        lr=cntk.learning_rate_schedule(lr, cntk.UnitType.minibatch) ,
                        momentum=cntk.momentum_schedule(b1), 
                        variance_momentum=cntk.momentum_schedule(b2),
                        epsilon=eps,
                        unit_gain=False)
    trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)), [learner])
    return trainer

In [6]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = imdb_for_library(
    seq_len=MAXLEN, max_features=MAXFEATURES, one_hot=True) # CNTK format
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
Trimming to 30000 max-features
Padding to length 150
(25000, 150) (25000, 150) (25000, 2) (25000, 2)
int32 int32 float32 float32
CPU times: user 5.77 s, sys: 379 ms, total: 6.15 s
Wall time: 6.15 s


In [7]:
%%time
# Placeholders
features = sequence.input_variable(shape=MAXFEATURES, is_sparse=True)
labels = cntk.input_variable(2)
# Load symbol
sym = create_symbol()

CPU times: user 10.6 ms, sys: 32.4 ms, total: 43 ms
Wall time: 52.9 ms


In [8]:
%%time
trainer = init_model(sym, labels)

CPU times: user 110 ms, sys: 262 ms, total: 371 ms
Wall time: 377 ms


In [9]:
%%time
# Main training loop: 14.6s
for j in range(EPOCHS):
    for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):
        data_1hot = cntk.Value.one_hot(data, MAXFEATURES) #TODO: do this externally and generate batches of 1hot
        trainer.train_minibatch({features: data_1hot, labels: label})
    # Log (this is just last batch in epoch, not average of batches)
    eval_error = trainer.previous_minibatch_evaluation_average
    print("Epoch %d  |  Accuracy: %.6f" % (j+1, (1-eval_error)))

Epoch 1  |  Accuracy: 0.890625
Epoch 2  |  Accuracy: 0.875000
Epoch 3  |  Accuracy: 0.968750
CPU times: user 13.7 s, sys: 1.31 s, total: 15 s
Wall time: 14.6 s


In [10]:
%%time
# Main evaluation loop: 2.55s
z = cntk.softmax(sym)
n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE
y_guess = np.zeros(n_samples, dtype=np.int)
y_truth = np.argmax(y_test[:n_samples], axis=-1)
c = 0
for data, label in yield_mb(x_test, y_test, BATCHSIZE):
    data = cntk.Value.one_hot(data, MAXFEATURES)
    predicted_label_probs = z.eval({features : data})
    y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)
    c += 1

CPU times: user 2.38 s, sys: 156 ms, total: 2.54 s
Wall time: 2.54 s


In [11]:
print("Accuracy: ", 1.*sum(y_guess == y_truth)/len(y_guess))

Accuracy:  0.8565304487179487
