In [1]:
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np

from utils import *
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
m = 1000
dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m, locale='zh')

In [3]:
dataset[:10]

[('0点43分32秒', 'ABS>00:43:32'),
 ('52秒后', '+52S        '),
 ('下午11:57:47', 'ABS>23:57:47'),
 ('上午12点50分32秒', 'ABS>00:50:32'),
 ('再过2分钟', '+2M         '),
 ('周六7点55分36秒', 'TW6>07:55:36'),
 ('下午5点1分', 'ABS>17:01:48'),
 ('33小时以后', '+33H        '),
 ('下午8点24分55秒', 'ABS>20:24:55'),
 ('5秒以后', '+5S         ')]

In [4]:
Tx = 30
Ty = 12
X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)

print('X.shape', X.shape)
print('Y.shape', Y.shape)
print('Xoh.shape', Xoh.shape)
print('Yoh.shape', Yoh.shape)

X.shape (1000, 30)
Y.shape (1000, 12)
Xoh.shape (1000, 30, 41)
Yoh.shape (1000, 12, 23)


In [7]:
index = 0
print('Source time:', dataset[index][0])
print('Target time:', dataset[index][1])
print()
print('Source after preprocessing (indices):', X[index])
print('Target after preprocessing (indices):', Y[index])
print()
print('Source after preprocessing (one-hot):', Xoh[index])
print('Target after preprocessing (one-hot):', Yoh[index])

Source time: 0点43分32秒
Target time: ABS>00:43:32

Source after preprocessing (indices): [ 0 34  4  3 22  3  2 35 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40
 40 40 40 40 40]
Target after preprocessing (indices): [14 15 20 13  2  2 12  6  5 12  5  4]

Source after preprocessing (one-hot): [[ 1.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  1.]
 [ 0.  0.  0. ...,  0.  0.  1.]
 [ 0.  0.  0. ...,  0.  0.  1.]]
Target after preprocessing (one-hot): [[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.
   0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.
   0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  1.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.
   0.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.

In [8]:
repeator = RepeatVector(Tx, name='rep')
concatenator = Concatenate(axis=-1, name='conc')
densor = Dense(1, activation='relu', name='densor')
activator = Activation(softmax, name='attention_weights')
dotor = Dot(axes=1, name='doter')

In [9]:
def one_step_attention(a, s_prev):
    s_prev = repeator(s_prev)
    concat = concatenator([a, s_prev])
    e = densor(concat)
    alphas = activator(e)
    context = dotor([alphas, a])
    
    return context

In [10]:
n_a = 64
n_s = 128
post_activation_LSTM_cell = LSTM(n_s, return_state = True, name='post_activation')
output_layer = Dense(len(machine_vocab), activation=softmax, name='output')

In [11]:
def model(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size):
    X = Input(shape=(Tx, human_vocab_size))
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0
    
    outputs = []
    
    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
    
    for t in range(Ty):
        context = one_step_attention(a, s)
        s, _, c = post_activation_LSTM_cell(context, initial_state=[s, c])
        out = output_layer(s)
        outputs.append(out)
    
    model = Model(inputs = [X, s0, c0], outputs = outputs, name='TranslationModel')
    
    return model

In [12]:
model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))

In [13]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 30, 41)       0                                            
__________________________________________________________________________________________________
s0 (InputLayer)                 (None, 128)          0                                            
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 30, 128)      54272       input_1[0][0]                    
__________________________________________________________________________________________________
rep (RepeatVector)              (None, 30, 128)      0           s0[0][0]                         
                                                                 post_activation[0][0]            
          

In [14]:
opt = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [15]:
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Yoh.swapaxes(0,1))

In [16]:
model.fit([Xoh, s0, c0], outputs, epochs=10, batch_size=100)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f4fc23e87b8>

In [17]:
EXAMPLES = ['明天下午3:04', '这周六10:20', '上午9点10分', '10分钟以后']

for example in EXAMPLES:    
    source = string2int(example, Tx, human_vocab)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0,1)
    prediction = model.predict([source, s0, c0])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine_vocab[int(i)] for i in prediction]
    
    print("source:", example)
    print("output:", ''.join(output))

ValueError: Error when checking : expected input_1 to have 3 dimensions, but got array with shape (41, 30)