In [1]:
import random
import string
import mxnet as mx
from mxnet import gluon, nd
import numpy as np
from mxnet import autograd

max_num = 999
dataset_size = 60000
seq_len = 5
split = .8
lr = 0.01
batch_size = 512
ctx = mx.gpu() if mx.context.num_gpus() > 0 else mx.cpu()





In [2]:
X = mx.random.uniform(low=0, high=max_num, shape=(dataset_size, seq_len)).astype('int32').asnumpy()
Y = X.copy()
Y.sort()

In [3]:
print("Input {}\nTarget {}".format(X[0].tolist(), Y[0].tolist()))

Input [548, 592, 714, 843, 602]
Target [548, 592, 602, 714, 843]


In [4]:
# Use char to training
vocab = string.digits + " "
print(vocab)
vocab_idx = {c: i for i, c in enumerate(vocab)}
print(vocab_idx)



0123456789 
{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, ' ': 10}


In [5]:
# transform numbers into text of maximum length max_len and
# one hot encode the characters
# Ex: "30 10" corresponding to [3, 0, 10, 1, 0]
# then one hot encode and get a matrix

max_len = len(str(max_num))*seq_len+(seq_len - 1)

def transform(x, y):
    x_string = ' '.join(map(str, x.tolist()))
    x_string_padded = x_string + ' '*(max_len - len(x_string))
    x = [vocab_idx[c] for c in x_string_padded]

    # y_string = ' '.join(map(str, y.tolist()))
    # y_string_padded = y_string + ' '*(max_len-len(y_string))
    # y = [vocab_idx[c] for c in y_string_padded]
    y_string = ' '.join(map(str, y.tolist()))
    # y_string = ' '.join(map(str, y.tolist()))

    y_string_padded = y_string + ' '*(max_len - len(y_string))
    # y = [vocab_idx[c] for c in y_string_padded]
    y = [vocab_idx[c] for c in y_string_padded]

    return mx.nd.one_hot(mx.nd.array(x), len(vocab)), mx.nd.array(y)

In [6]:
split_idx = int(split*len(X))
train_dataset = gluon.data.ArrayDataset(X[:split_idx], Y[:split_idx]).transform(transform)
test_dataset = gluon.data.ArrayDataset(X[:split_idx], Y[:split_idx]).transform(transform)
train_dataset


<mxnet.gluon.data.dataset._LazyTransformDataset at 0x21c54d6b6d8>

In [7]:
print("Input {}".format(X[0]))
print("Transformed data Input {}".format(train_dataset[0][0]))
print("Target {}".format(Y[0]))
print("Transformed data Target {}".format(train_dataset[0][1]))

Input [548 592 714 843 602]
Transformed data Input 
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]
<NDArray 19x11 @cpu(0)>
Target [548 592 602 714 843]
Transformed data Target 
[ 5.  4.  8. 10.  5.  9.  2. 10.  6.  0.  2. 10.  7.  1.  4. 10.  8.  4.
  3.]
<NDArray 19 @cpu(0)>


In [10]:
train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size,
                                   shuffle=True,
                                   last_batch='rollover')
test_data = gluon.data.DataLoader(test_dataset, batch_size=batch_size,
                                  shuffle=False, last_batch='rollover')


In [8]:
net = gluon.nn.HybridSequential()
with net.name_scope():
    net.add(
        gluon.rnn.LSTM(hidden_size=128, num_layers=2,
                       layout="NTC", bidirectional=True),
        gluon.nn.Dense(len(vocab), flatten=False)
    )

net.initialize(mx.init.Xavier(), ctx=ctx)
loss = gluon.loss.SoftmaxCELoss()

In [11]:
schedule = mx.lr_scheduler.FactorScheduler(step=len(train_data)*10, factor=0.75)
schedule.base_lr = lr



In [12]:
trainer = gluon.Trainer(net.collect_params(), optimizer='adam', optimizer_params={'learning_rate': lr, 'lr_scheduler': schedule})


In [13]:
epochs = 100
for e in range(epochs):
    epoch_loss = 0.0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)

        with mx.autograd.record():
            output = net(data)
            l = loss(output, label)

        l.backward()
        trainer.step(data.shape[0])

        epoch_loss += l.mean()

    print("Epoch {} Loss: {} LR: {}".format(e, epoch_loss.asscalar() / (i+1), trainer.learning_rate))

Epoch 0 Loss: 1.6791600053028395 LR: 0.01
Epoch 1 Loss: 1.2018382295649102 LR: 0.01
Epoch 2 Loss: 0.9183298476198887 LR: 0.01
Epoch 3 Loss: 0.7715385923994348 LR: 0.01
Epoch 4 Loss: 0.5245490535613029 LR: 0.01
Epoch 5 Loss: 0.3499081794251787 LR: 0.01
Epoch 6 Loss: 0.25045983334805105 LR: 0.01


KeyboardInterrupt: 

In [None]:
#Testing

n = random.randint(0, len(test_data) - 1)

x_orig = X[split_idx+n]
y_orig = Y[split_idx+n]


In [None]:
def get_pred(x):
    x, _ = transform(x, x)
    output = net(x.as_in_context(ctx).expand_dims(axis=0))

    pred = ''.join([vocab[int(o)] for o in output[0].argmax(axis=1).asnumpy().tolist()])
    return pred

In [None]:
x_ = " ".join(map(str, x_orig))
label = ' '.join(map(str, y_orig))
print("X {}\nPredicted {} \nLabel".format(x_, get_pred(x_orig), label))