In [1]:
import mxnet as mx
from mxnet import autograd, gluon, nd
from mxnet.gluon import nn, rnn, Block
from mxnet.contrib import text

from io import open
import collections
import datetime

In [2]:
PAD = '<pad>'
BOS = '<bos>'
EOS = '<eos>'

In [4]:
epochs = 50
epoch_period = 10

learning_rate = 0.005
max_seq_len = 5

encoder_num_layers = 1
decoder_num_layers = 2

encoder_drop_prob = 0.1
decoder_drop_prob = 0.1

encoder_hidden_dim = 256
decoder_hidden_dim = 256
aligment_dim = 25

ctx = mx.cpu(0)

In [7]:
def read_data(max_seq_len):
    input_tokens = []
    output_tokens = []
    input_seqs = []
    output_seqs = []
    
    with open('../gluon-tutorials/data/fr-en-small.txt') as f:
        lines = f.readlines()
        for line in lines:
            input_seq, output_seq = line.rstrip().split('\t')
            cur_input_tokens = input_seq.split(' ')
            cur_output_tokens = output_seq.split(' ')
            
            if len(cur_input_tokens) < max_seq_len and len(cur_output_tokens) < max_seq_len:
                input_tokens.extend(cur_input_tokens)
                cur_input_tokens.append(EOS)
                while len(cur_input_tokens) < max_seq_len:
                    cur_input_tokens.append(PAD)
                input_seqs.append(cur_input_tokens)
                output_tokens.extend(cur_output_tokens)
                cur_output_tokens.append(EOS)
                while len(cur_output_tokens) < max_seq_len:
                    cur_output_tokens.append(PAD)
                output_seqs.append(cur_output_tokens)
            
        fr_vocab = text.vocab.Vocabulary(collections.Counter(input_tokens),
                                         reserved_tokens=[PAD, BOS, EOS])
        en_vocab = text.vocab.Vocabulary(collections.Counter(output_tokens),
                                         reserved_tokens=[PAD, BOS, EOS])
    return fr_vocab, en_vocab, input_seqs, output_seqs

In [11]:
input_vocab, output_vocab, input_seqs, output_seqs = read_data(max_seq_len)
X = nd.zeros((len(input_seqs), max_seq_len), ctx=ctx)
Y = nd.zeros((len(output_seqs), max_seq_len), ctx=ctx)

for i in range(len(input_seqs)):
    X[i] = nd.array(input_vocab.to_indices(input_seqs[i]), ctx=ctx)
    Y[i] = nd.array(output_vocab.to_indices(output_seqs[i]), ctx=ctx)
    
dataset = gluon.data.ArrayDataset(X, Y)

print input_seqs
print X
print input_vocab.token_to_idx

[[u'elle', u'est', u'vieille', u'.', '<eos>'], [u'elle', u'est', u'tranquille', u'.', '<eos>'], [u'elle', u'a', u'tort', u'.', '<eos>'], [u'elle', u'est', u'canadienne', u'.', '<eos>'], [u'elle', u'est', u'japonaise', u'.', '<eos>'], [u'ils', u'sont', u'russes', u'.', '<eos>'], [u'ils', u'se', u'disputent', u'.', '<eos>'], [u'ils', u'regardent', u'.', '<eos>', '<pad>'], [u'ils', u'sont', u'acteurs', u'.', '<eos>'], [u'elles', u'sont', u'crevees', u'.', '<eos>']]

[[  5.   6.  21.   4.   3.]
 [  5.   6.  20.   4.   3.]
 [  5.   9.  19.   4.   3.]
 [  5.   6.  11.   4.   3.]
 [  5.   6.  15.   4.   3.]
 [  7.   8.  17.   4.   3.]
 [  7.  18.  13.   4.   3.]
 [  7.  16.   4.   3.   1.]
 [  7.   8.  10.   4.   3.]
 [ 14.   8.  12.   4.   3.]]
<NDArray 10x5 @cpu(0)>
{u'russes': 17, u'regardent': 16, u'acteurs': 10, '<pad>': 1, u'disputent': 13, u'est': 6, u'crevees': 12, u'.': 4, u'tort': 19, u'japonaise': 15, u'canadienne': 11, u'vieille': 21, u'elle': 5, u'sont': 8, '<eos>': 3, u'a': 9, u

In [12]:
class Encoder(Block):
    """编码器"""
    def __init__(self, input_dim, hidden_dim, num_layers, drop_prob):
        super(Encoder, self).__init__()
        with self.name_scope():
            self.embedding = nn.Embedding(input_dim, hidden_dim)
            self.dropout = nn.Dropout(drop_prob)
            self.rnn = rnn.GRU(hidden_dim, num_layers, dropout=drop_prob,
                              input_size=hidden_dim)
    
    def forward(self, inputs, state):
        # input 尺寸：（1， num_steps）, emb尺寸：(num_steps, 1, 256)
        emb = self.embedding(input).swapaxes(0, 1)
        emb = self.dropout(emb)
        output, state = self.rnn(emb, state)
        
        return output, state
    
    def begin_state(self, *args, **kwargs):
        return self.rnn.begin_state(*args, **kwargs)

In [None]:
class Decoder(Block):
    def __init__(self, hidden_dim, output_dim, num_layers, max_seq_len,
                 drop_prob, alignment_dim, encoder_hidden_dim):
        super(Decoder, self).__init__()
        
        with self.name_scope():
            self.embdedding = nn.Embedding(output_dim, hidden_dim)
            self.dropout = nn.Dropout(drop_prob)
            self.attention = nn.Sequential()
            with self.attention.name_scope():
                self.attention.add(nn.Dense(alignment_dim, in_units=hidden_dim + encoder_hidden_dim,
                    activation="tanh", flatten=False))
                self.