In [84]:
%matplotlib inline
from random import choice, randrange
import mxnet as mx
import numpy as np
ctx=mx.cpu(0)
import logging
logging.getLogger().setLevel(logging.DEBUG)

In [104]:
vocabulary=list("ace")
EOS='§'
SOS='#'
vocabulary.append(EOS)
vocabulary.append(SOS)
vocab_size=len(vocabulary)
MAX_STRING_LEN = 10
MAX_INPUST_LEN = 100
num_hidden=30
embed_size=256
batch_size=26
int2char = {i:c for i,c in enumerate(vocabulary)}
char2int = {c:i for i,c in enumerate(vocabulary)}

In [105]:
print(int2char)
print("vocab size: "+str(vocab_size))

{0: 'a', 1: 'c', 2: 'e', 3: '§', 4: '#'}
vocab size: 5


In [106]:
def generate_strings(min_len, max_len):
    random_length = randrange(min_len, max_len)
    random_char_list = [choice(vocabulary[:-2]) for _ in range(random_length)]
    random_string = ''.join(random_char_list) 
    return SOS+random_string+EOS

In [107]:
generate_strings(4,10)

'#accaeeae§'

In [108]:
def text2ints(string):
    return [char2int[char] for char in string]

def ints2text(numbers):
    return ''.join([int2char[num] for num in numbers])

def int2onehot(numbers):
    return mx.nd.one_hot(mx.nd.array(numbers),vocab_size)

def onehot2int(matrix):
    fin=[]
    for vec in matrix:
        fin.append(int(vec.argmax(axis=0).asnumpy().tolist()[0]))
    return fin

In [109]:
string=generate_strings(19,20)
print(string, len(string))
assert ints2text(text2ints(string)) == string

#ecceeaacceaaecaacee§ 21


In [110]:
train_set = [text2ints(generate_strings(MAX_STRING_LEN-2, MAX_STRING_LEN-1)) for _ in range(3000)]
inverse_train_set = [[char2int[SOS]]+sentence[1:-1][::-1]+[char2int[EOS]] for sentence in train_set]
eval_set = [text2ints(generate_strings(MAX_STRING_LEN-2, MAX_STRING_LEN-1)) for _ in range(100)]
inverse_eval_set = [[char2int[SOS]]+sentence[1:-1][::-1]+[char2int[EOS]] for sentence in eval_set]

#print(train_set[0])
#print(inverse_train_set[0])

[4, 1, 1, 2, 1, 1, 1, 1, 1, 3]
[4, 1, 1, 1, 1, 1, 2, 1, 1, 3]


In [120]:
train_iter = mx.io.NDArrayIter(
    data=mx.nd.one_hot(mx.nd.array(train_set),vocab_size),
    label=mx.nd.one_hot(mx.nd.array(inverse_train_set),vocab_size),
    batch_size=batch_size
)
eval_iter = mx.io.NDArrayIter(
    data=mx.nd.one_hot(mx.nd.array(eval_set),vocab_size),
    label=mx.nd.one_hot(mx.nd.array(inverse_eval_set),vocab_size),
    batch_size=batch_size
)

In [None]:
#def sym_gen(seq_len):
    

In [119]:
data = mx.sym.Variable('data')
label = mx.sym.Variable('softmax_label')

embed = mx.sym.Embedding(
    data=data,
    input_dim=vocab_size,
    output_dim=embed_size
)

In [113]:
bi_cell = mx.rnn.BidirectionalCell(
    mx.rnn.GRUCell(num_hidden=num_hidden, prefix="gru1_"),
    mx.rnn.GRUCell(num_hidden=num_hidden, prefix="gru2_"),
    output_prefix="bi_"
)

encoder = mx.rnn.ResidualCell(bi_cell)
        
_, encoder_state = encoder.unroll(
    length=MAX_STRING_LEN,
    inputs=embed,
    merge_outputs=False
)

encoder_state = mx.sym.concat(encoder_state[0][0],encoder_state[1][0])
print(mx.symbol_doc.SymbolDoc.get_output_shape(encoder_state, data=(batch_size,MAX_STRING_LEN,vocab_size)))


decoder = mx.rnn.GRUCell(num_hidden=num_hidden)

rnn_output, decoder_state = decoder.unroll(
    length=num_hidden*2,
    inputs=encoder_state,
    merge_outputs=True
)
print(mx.symbol_doc.SymbolDoc.get_output_shape(rnn_output, data=(batch_size,MAX_STRING_LEN,vocab_size)))

{'concat4_output': (26, 60)}
{'concat5_output': (26, 60, 30)}


In [114]:
flat=mx.sym.Flatten(data=rnn_output)
print(mx.symbol_doc.SymbolDoc.get_output_shape(flat, data=(batch_size,MAX_STRING_LEN,vocab_size)))

fc1=mx.sym.FullyConnected(
    data=flat,
    num_hidden=MAX_STRING_LEN*vocab_size
)
#drop=mx.sym.Dropout(data=fc1, p=0.5)
act=mx.sym.Activation(data=fc1, act_type='relu')

print(mx.symbol_doc.SymbolDoc.get_output_shape(act, data=(batch_size,MAX_STRING_LEN,vocab_size)))

out = mx.sym.Reshape(data=act, shape=((0,MAX_STRING_LEN,vocab_size)))

#out=mx.sym.round(out)

print(mx.symbol_doc.SymbolDoc.get_output_shape(out, data=(batch_size,MAX_STRING_LEN,vocab_size)))

{'flatten5_output': (26, 1800)}
{'activation5_output': (26, 50)}
{'reshape5_output': (26, 10, 5)}


In [115]:
net = mx.sym.LinearRegressionOutput(data=out, label=label)

In [116]:
model = mx.module.Module(net)
model.fit(
    train_data=train_iter,
    eval_data=eval_iter,
    eval_metric = 'acc',
    optimizer=mx.optimizer.Adam(rescale_grad=1/batch_size),
    #optimizer_params={'learning_rate':0.001, 'momentum':0.9},
    initializer=mx.initializer.Xavier(),
    batch_end_callback=mx.callback.Speedometer(batch_size, 32),
    num_epoch=8
)

INFO:root:Epoch[0] Batch [32]	Speed: 478.69 samples/sec	accuracy=0.813706
INFO:root:Epoch[0] Batch [64]	Speed: 467.91 samples/sec	accuracy=0.818029
INFO:root:Epoch[0] Batch [96]	Speed: 472.84 samples/sec	accuracy=0.825264
INFO:root:Epoch[0] Train-accuracy=0.829957
INFO:root:Epoch[0] Time cost=6.292
INFO:root:Epoch[0] Validation-accuracy=0.835577
INFO:root:Epoch[1] Batch [32]	Speed: 483.88 samples/sec	accuracy=0.837832
INFO:root:Epoch[1] Batch [64]	Speed: 478.76 samples/sec	accuracy=0.848606
INFO:root:Epoch[1] Batch [96]	Speed: 503.25 samples/sec	accuracy=0.859519
INFO:root:Epoch[1] Train-accuracy=0.866518
INFO:root:Epoch[1] Time cost=6.214
INFO:root:Epoch[1] Validation-accuracy=0.863269
INFO:root:Epoch[2] Batch [32]	Speed: 515.22 samples/sec	accuracy=0.870443
INFO:root:Epoch[2] Batch [64]	Speed: 495.21 samples/sec	accuracy=0.881923
INFO:root:Epoch[2] Batch [96]	Speed: 489.35 samples/sec	accuracy=0.891322
INFO:root:Epoch[2] Train-accuracy=0.893887
INFO:root:Epoch[2] Time cost=6.026
INFO

In [117]:
test_set = [text2ints(generate_strings(MAX_STRING_LEN-2, MAX_STRING_LEN-1)) for _ in range(5)]

#print(test_set[0])
#print(ints2text(test_set[0]))

inverse_test_set = [[char2int[SOS]]+sentence[1:-1][::-1]+[char2int[EOS]] for sentence in test_set]    

#print(inverse_test_set[0])
#print(ints2text(inverse_test_set[0]))
    
#print(mx.nd.one_hot(mx.nd.array(test_set[0]),vocab_size))
#print(mx.nd.one_hot(mx.nd.array(inverse_test_set[0]),vocab_size))

In [118]:
test_iter = mx.io.NDArrayIter(
    data=mx.nd.one_hot(mx.nd.array(test_set),vocab_size),
    label=mx.nd.one_hot(mx.nd.array(inverse_test_set),vocab_size),
    batch_size=1
)

#test_iter.reset()
predictions=model.predict(test_iter)

for i,pred in enumerate(predictions):
    print(i)
    print(ints2text((test_set[i])))
    print(ints2text(onehot2int(mx.ndarray.round(predictions[i]))))
    print("--------------------")
    

0
#eecceeae§
#eaeeccee§
--------------------
1
#aaceeeca§
#aceeecaa§
--------------------
2
#aeeaaecc§
#cceaaeea§
--------------------
3
#ecaaeecc§
#cceeaace§
--------------------
4
#aeaaaacc§
#ccaaaaea§
--------------------
