In [15]:
import tensorflow as tf
import keras
import jieba
import pickle as pkl
import numpy as np
import nltk
from tensorflow.python.layers import core as layers_core

# 预处理

In [16]:
# 读入
def data_preprocess(path):
    corpus_en = [] 
    with open(path, 'r',encoding='utf-8') as f:
        for i, line in enumerate(f): # i从0开始
            line = line.strip()
            corpus_en.append(line)
                
    return corpus_en

# 分词
def segment(corpus, tokenizer):
    tokenized_corpus = []
    tokenized_corpus = ' '.join([_ for _ in tokenizer(corpus) if _.strip(' ')])
    tokenized_corpus = tokenized_corpus.split(' \n ')
    return tokenized_corpus

 

# 把数据中的字/词转成对应id
def transform(data, word2id):
    ret_data = []
    for sentence in data:
        ret_data.append([word2id.get(word, 1) for word in sentence.split()]) # word2id.get(word, 1) 如果word不在字典中，则返回默认值1
    return ret_data

# padding  对长度小于max的以0填充，大于的截断
def padding(data, max_len):
    return tf.keras.preprocessing.sequence.pad_sequences(data, max_len, padding='post', truncating='post')

def transform2word(data, id2word):
    """
    把id转成word
    """
    ret_data = []
    for sentence in data:
        ret_data.append(''.join([id2word.get(word, '<UNK>') for word in sentence]))
    return ret_data

# Model

In [17]:
class NMTModel(object):
    """
    带Attention的NMT模型
    """
    def __init__(self, 
                 src_max_vocab_size, 
                 tgt_max_vocab_size, 
                 embedding_size,
                 hidden_size,
                 src_max_seq_len,
                 tgt_max_seq_len,
                 tgt_start_id,
                 tgt_end_id,
                 max_gradient_norm=5,
                 maximum_iterations=None,
                 optimizer='adam',
                 ):
        self.initializer = tf.random_uniform_initializer(
        -0.05, 0.05)
        self.optimizer = optimizer
        # 源词表大小
        self.src_max_vocab_size = src_max_vocab_size
        # 目标词表大小
        self.tgt_max_vocab_size = tgt_max_vocab_size
        # 输入embedding大小（src与tgt的embedding_size可以不同）
        self.embedding_size = embedding_size
        # 隐层大小
        self.hidden_size = hidden_size
        # 源序列长度
        self.src_max_seq_len = src_max_seq_len
        # 目标序列长度
        self.tgt_max_seq_len = tgt_max_seq_len
        # 目标序列起始id（输入的初始id值）
        self.tgt_start_id = tgt_start_id
        # 目标的终结id（模型预测到该id后停止预测）
        self.tgt_end_id = tgt_end_id
        if maximum_iterations is None:
            self.maximum_iterations = self.tgt_max_seq_len                                  # !!!!!!!!!!!
        else:
            self.maximum_iterations = maximum_iterations
        self.max_gradient_norm = max_gradient_norm
        self.add_placeholders()
        self.batch_size = tf.shape(self.X)[0]
        self.add_embeddings()
        self.encoder()
        self.decoder()
        self.add_loss()
        self.add_train_op()

    def add_placeholders(self):
        # X, Y_out, Y_in, X_len, Y_in_len, Y_out_len
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y_out = tf.placeholder(tf.int32, [None, None])
        self.Y_in = tf.placeholder(tf.int32, [None, None])
        self.X_len = tf.placeholder(tf.int32, [None, ])
        self.Y_in_len = tf.placeholder(tf.int32, [None, ])
        self.Y_out_len = tf.placeholder(tf.int32, [None, ])
        self.lr = tf.placeholder(tf.float32)
        self.dropout = tf.placeholder(tf.float32)

    def add_embeddings(self):
        with tf.variable_scope('embeddings', initializer=self.initializer):
            # 创建变量
            self.X_emb = tf.get_variable('X_emb', 
                shape=(self.src_max_vocab_size, self.embedding_size), 
                dtype=tf.float32)
            self.Y_emb = tf.get_variable('Y_emb', 
                shape=(self.tgt_max_vocab_size, self.embedding_size), 
                dtype=tf.float32)

            self.encoder_input = tf.nn.embedding_lookup(self.X_emb, self.X)
            self.decoder_input = tf.nn.embedding_lookup(self.Y_emb, self.Y_in)

    def encoder(self):
        with tf.variable_scope('encoder'):
            fw_encoder_cell = tf.contrib.rnn.GRUCell(self.hidden_size)
            fw_encoder_cell = tf.contrib.rnn.DropoutWrapper(fw_encoder_cell, input_keep_prob=1-self.dropout)
            bw_encoder_cell = tf.contrib.rnn.GRUCell(self.hidden_size)
            bw_encoder_cell = tf.contrib.rnn.DropoutWrapper(bw_encoder_cell, input_keep_prob=1-self.dropout)

            # 双向RNN
            encoder_outputs, bi_last_state = tf.nn.bidirectional_dynamic_rnn(     
                    fw_encoder_cell, bw_encoder_cell, self.encoder_input, 
                    self.X_len, dtype=tf.float32)
            self.encoder_outputs = tf.concat(encoder_outputs, axis=-1)
            self.encoder_last_state = bi_last_state


    def decoder(self):
        with tf.variable_scope('decoder'):
            decoder_cell = tf.contrib.rnn.GRUCell(self.hidden_size)
            decoder_cell = tf.contrib.rnn.DropoutWrapper(decoder_cell, input_keep_prob=1-self.dropout)
            attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                                    self.hidden_size, self.encoder_outputs,
                                    memory_sequence_length=self.X_len)
            decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
                                    decoder_cell, attention_mechanism,
                                    attention_layer_size=self.hidden_size)

            projection_layer = layers_core.Dense(
            self.tgt_max_vocab_size, use_bias=False)

        # 训练或评估的时候，decoder的output是真实的target，input是target右移一个词
        with tf.variable_scope('dynamic_decode'):
            # Helper
            helper = tf.contrib.seq2seq.TrainingHelper(
                self.decoder_input, tf.ones((self.batch_size, ), dtype=tf.int32) * self.tgt_max_seq_len, time_major=False)
            # Decoder
            decoder_initial_state = decoder_cell.zero_state(self.batch_size, dtype=tf.float32).clone(
                cell_state=self.encoder_last_state[0])
            decoder = tf.contrib.seq2seq.BasicDecoder(
                decoder_cell, helper, decoder_initial_state,
                output_layer=projection_layer)
            # Dynamic decoding
            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder)
            self.logits = outputs.rnn_output
            self.pred = tf.argmax(self.logits, axis=2)

        # 预测的时候，decoder的每个timestep的输入为前一个时刻的输出
        with tf.variable_scope('dynamic_decode', reuse=True):
            # Helper
            helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                self.Y_emb,
                start_tokens=tf.fill([self.batch_size], self.tgt_start_id),
                end_token=self.tgt_end_id)
            decoder_initial_state = decoder_cell.zero_state(self.batch_size, dtype=tf.float32).clone(
                cell_state=self.encoder_last_state[0])
            # Decoder
            decoder = tf.contrib.seq2seq.BasicDecoder(
                decoder_cell, helper, decoder_initial_state,
                output_layer=projection_layer)
            # Dynamic decoding
            outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder, maximum_iterations=self.maximum_iterations)                      # !!!!!!!!!!!!!!!!!!!!!!!!!!!
            self.translations = outputs.sample_id
            
    def add_loss(self): # 交叉熵
        target_weights = tf.sequence_mask(
                         self.Y_out_len, self.tgt_max_seq_len, dtype=self.logits.dtype)
        crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
                         labels=self.Y_out, logits=self.logits)
        self.loss_op = (tf.reduce_sum(crossent * target_weights) / tf.to_float(self.batch_size))

    def add_train_op(self):   # 优化器
        params = tf.trainable_variables()
        gradients = tf.gradients(self.loss_op, params)
        clipped_gradients, _ = tf.clip_by_global_norm(
            gradients, self.max_gradient_norm)
        # Optimization
        if self.optimizer == 'sgd':
            optimizer = tf.train.GradientDescentOptimizer(self.lr)
        elif self.optimizer == 'adadelta':
            optimizer = tf.train.AdaDeltaOptimizer(self.lr)
        else:
            optimizer = tf.train.AdamOptimizer(self.lr)
        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.train_op = optimizer.apply_gradients(
            zip(clipped_gradients, params), global_step=self.global_step)

# 预测函数

In [18]:
# 一些模型对应的参数
optimizer = 'adam'
lr = 1e-3
# 55379
# 4844
src_max_vocab_size = 60004
tgt_max_vocab_size = 7027
embedding_size = 128
hidden_size = 256
src_max_seq_len = 40     # 源句的裁剪长度
tgt_max_seq_len = 40       # 目标句的裁剪长度
tgt_start_id = 2 # <S> 
tgt_end_id = 0 # <PAD>
max_gradient_norm = 1.
maximum_iterations = 70  #!!!    目标句 测试或验证 的翻译长度
cf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
cf.gpu_options.per_process_gpu_memory_fraction = 0.8

In [19]:
def predict(X):
    with open(r'C:\Users\19843\Desktop\natural_language_processing\NMT\data\data_edu\vocab_dict_and_corpus_biEduNew.pkl', 'rb') as fr:
        en_word2id, en_id2word, ch_word2id, ch_id2word, __, __, __, __ = pkl.load(fr)
    if type(X) == str:
        X = X
    elif type(X) == list or type(X) == tuple:
        X = '\n'.join(X)
    else:
        raise ValueError('You must ensure the `X` be string or list!')
    X = segment(X, jieba.cut)
    X = transform(X, en_word2id)
    X = padding(X, src_max_seq_len)
    X_len = np.sum((X > 0), axis=1)
    # X -> (src_max_seq_len, ) or (batch, sec_max_seq_len, )
    with tf.Session(config=cf) as sess:
        model = NMTModel(src_max_vocab_size=src_max_vocab_size, 
                             tgt_max_vocab_size=tgt_max_vocab_size, 
                             embedding_size=embedding_size,
                             hidden_size=hidden_size,
                             src_max_seq_len=src_max_seq_len,
                             tgt_max_seq_len=tgt_max_seq_len,
                             tgt_start_id=tgt_start_id,
                             tgt_end_id=tgt_end_id,
                             max_gradient_norm=max_gradient_norm,
                             maximum_iterations=maximum_iterations,
                             optimizer=optimizer)
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(r'C:\Users\19843\Desktop\natural_language_processing\NMT\model\fromEdu'))
        
        translations = sess.run(model.translations, 
                            feed_dict={ model.X:X,
                                        model.Y_out:[[]],
                                        model.Y_in:[[]], 
                                        model.X_len:X_len,
                                        model.Y_in_len:[],
                                        model.Y_out_len:[],
                                        model.lr:lr,
                                        model.dropout:0.})
        translations = transform2word(translations, ch_id2word)
    return translations


In [20]:
# import time
# t1 = time.time()
testPath = r'finalTest\toTest.txt'
test_en = data_preprocess(testPath)

def predict_wN(test_en_wN):
    tf.reset_default_graph()
    pred = predict(test_en_wN)
    print(len(pred))
    # 后处理，去除padding等
    for i in range(len(pred)):
        pred[i] = pred[i][:pred[i].find('<PAD>')]
    # t2 = time.time()
    # print(t2 - t1)
    return pred

# write 
pred = '\n'.join(predict_wN(test_en))


INFO:tensorflow:Restoring parameters from C:\Users\19843\Desktop\natural_language_processing\NMT\model\fromEdu\my_model-76494
2500


In [21]:
pred=pred.replace('\u2022','')
pred=pred.replace('\xab','')
pred=pred.replace('\ufffd','')
pred=pred.replace('\u2219','')
with open(r'finalTest\pred5.txt', 'w') as f:
    f.write(pred)

In [12]:
# import time
# t1 = time.time()
testPath = r'validTest\toTest.txt'
test_en = data_preprocess(testPath)
iteration = len(test_en)//64+1

def predict_wN(test_en_wN):
    tf.reset_default_graph()
    pred = predict(test_en_wN)
    # 后处理，去除padding等
    for i in range(len(pred)):
        pred[i] = pred[i][:pred[i].find('<PAD>')]
    # t2 = time.time()
    # print(t2 - t1)
    return pred

pred = []
for i in range(iteration):
    if i != iteration:
        pred.append(predict_wN(test_en[i*64:(i+1)*64]))
    else:
        pred.append(predict_wN(test_en[i*64:]))


# write 
pred = '\n'.join(pred)
with open(r'validTest\pred1.txt', 'w') as f:
    f.write(pred)

Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\19843\AppData\Local\Temp\jieba.cache
Loading model cost 0.979 seconds.
Prefix dict has been built succesfully.


Instructions for updating:
Colocations handled automatically by placer.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from good

InternalError: Blas GEMM launch failed : a.shape=(64, 384), b.shape=(384, 512), m=64, n=512, k=384
	 [[node encoder/bidirectional_rnn/fw/fw/while/gru_cell/MatMul (defined at <ipython-input-6-0c2212708a03>:84) ]]
	 [[node dynamic_decode_1/decoder/while/Identity (defined at <ipython-input-6-0c2212708a03>:134) ]]

Caused by op 'encoder/bidirectional_rnn/fw/fw/while/gru_cell/MatMul', defined at:
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tornado\platform\asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\asyncio\base_events.py", line 438, in run_forever
    self._run_once()
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\asyncio\base_events.py", line 1451, in _run_once
    handle._run()
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tornado\ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tornado\ioloop.py", line 743, in _run_callback
    ret = callback()
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 781, in inner
    self.run()
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2848, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2874, in _run_cell
    return runner(coro)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 3049, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 3214, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-26082b34bc1a>", line 20, in <module>
    pred.append(predict_wN(test_en[i*64:(i+1)*64]))
  File "<ipython-input-12-26082b34bc1a>", line 9, in predict_wN
    pred = predict(test_en_wN)
  File "<ipython-input-7-50718c495839>", line 26, in predict
    optimizer=optimizer)
  File "<ipython-input-6-0c2212708a03>", line 45, in __init__
    self.encoder()
  File "<ipython-input-6-0c2212708a03>", line 84, in encoder
    self.X_len, dtype=tf.float32)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\util\deprecation.py", line 324, in new_func
    return func(*args, **kwargs)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn.py", line 443, in bidirectional_dynamic_rnn
    time_major=time_major, scope=fw_scope)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\util\deprecation.py", line 324, in new_func
    return func(*args, **kwargs)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn.py", line 671, in dynamic_rnn
    dtype=dtype)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn.py", line 879, in _dynamic_rnn_loop
    swap_memory=swap_memory)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3556, in while_loop
    return_same_structure)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3087, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3022, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3525, in <lambda>
    body = lambda i, lv: (i + 1, orig_body(*lv))
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn.py", line 845, in _time_step
    skip_conditionals=True)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn.py", line 276, in _rnn_step
    new_output, new_state = call_cell()
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn.py", line 833, in <lambda>
    call_cell = lambda: cell(input_t, state)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1284, in __call__
    output, new_state = self._cell(inputs, state, scope=scope)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 371, in __call__
    *args, **kwargs)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\layers\base.py", line 530, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 554, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 565, in call
    array_ops.concat([inputs, state], 1), self._gate_kernel)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\math_ops.py", line 2455, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 5333, in mat_mul
    name=name)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 3300, in create_op
    op_def=op_def)
  File "C:\Users\19843\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

InternalError (see above for traceback): Blas GEMM launch failed : a.shape=(64, 384), b.shape=(384, 512), m=64, n=512, k=384
	 [[node encoder/bidirectional_rnn/fw/fw/while/gru_cell/MatMul (defined at <ipython-input-6-0c2212708a03>:84) ]]
	 [[node dynamic_decode_1/decoder/while/Identity (defined at <ipython-input-6-0c2212708a03>:134) ]]
