In [1]:
import tensorflow as tf
import numpy as np
from model import AttentionSeq2Seq

In [2]:
maximum_src_str_len = 40
maximum_tgt_str_len = 40
source_vocab_size = -1
target_vocab_size = -1

_PAD = "_PAD"
_GO = "_GO"
_EOS = "_EOS"

input_str = "I declare resumed the session of the European Parliament adjourned on Friday 17 December 1999, and I would like once again to wish you a happy new year in the hope that you enjoyed a pleasant festive period."
target_str ="Je déclare reprise la session du Parlement européen qui avait été interrompue le vendredi 17 décembre dernier et je vous renouvelle tous mes vux en espérant que vous avez passé de bonnes vacances."

def input_process(input_str):
    input_str = input_str[:-1] + " " + _EOS
    for i in range(maximum_src_str_len-len(input_str.split(" "))):
        input_str += " " + _PAD
    return input_str

def target_process(target_str):
    target_str = _GO + " " + target_str[:-1]
    target_str = target_str + " " + _EOS
    for i in range(maximum_tgt_str_len-len(target_str.split(" "))):
        target_str += " " + _PAD
    return target_str
    
input_str_list = [input_process(input_str)]
target_str_list = [target_process(target_str)]

from tensorflow.contrib import learn
source_vocab_processor = learn.preprocessing.VocabularyProcessor(maximum_src_str_len)
input_data = np.array(list(source_vocab_processor.fit_transform(input_str_list)))
source_vocab_size = len(source_vocab_processor.vocabulary_)
print("Source Vocabulary Size: {:d}".format(len(source_vocab_processor.vocabulary_)))

target_vocab_processor = learn.preprocessing.VocabularyProcessor(maximum_tgt_str_len)
target_data = np.array(list(target_vocab_processor.fit_transform(target_str_list)))
target_vocab_size = len(target_vocab_processor.vocabulary_)
print("Target Vocabulary Size: {:d}".format(len(target_vocab_processor.vocabulary_)))

source_vocab_dict = source_vocab_processor.vocabulary_._mapping
target_vocab_dict = target_vocab_processor.vocabulary_._mapping
source_vocab_rev_dict = source_vocab_processor.vocabulary_._reverse_mapping
target_vocab_rev_dict = target_vocab_processor.vocabulary_._reverse_mapping

Source Vocabulary Size: 36
Target Vocabulary Size: 36


In [3]:
input_data    = input_data.tolist()[0]
target_data   = target_data.tolist()[0]
label_data = [target_data[i+1] for i in range(len(target_data) - 1)] + [target_vocab_dict[_PAD]]

In [4]:
input_batch = []
target_batch = []
label_batch = []

for _ in range(100):
    input_batch.append(input_data)
    target_batch.append(target_data)
    label_batch.append(label_data)

In [5]:
def decode(bytes):
    sentence_marks = [0, target_vocab_dict["_PAD"], target_vocab_dict["_EOS"], target_vocab_dict["_GO"]]
    word = ""
    
    for b in bytes:
        if not b[0] in sentence_marks:
            word += " " + target_vocab_rev_dict[b[0]]
    word += "."
    return word[1:]

In [6]:
sess = tf.Session()

batch_size = 100
enc_sequence_len = maximum_src_str_len
dec_sequence_len = maximum_tgt_str_len
enc_symbol_size = source_vocab_size
dec_symbol_size = target_vocab_size
embedding_size = 10
hidden_size = 10
layer_size = 1

model = AttentionSeq2Seq(sess,
                         batch_size,
                         enc_sequence_len, 
                         dec_sequence_len,
                         enc_symbol_size,
                         dec_symbol_size,
                         embedding_size,
                         hidden_size)

sess.run(tf.global_variables_initializer())
go_signal = []
for _ in range(batch_size):
    go_signal.append([target_vocab_dict[_GO]])

for step in range(500):
    loss, _ = model.train(input_batch, target_batch, label_batch)
    print("Average loss at step ", step, ": ", loss)
    pred = model.prediction(input_batch, go_signal)
    pred_sentence = decode(pred[0])
    print('Predicted sentence : ', pred_sentence)
    if pred_sentence == target_str:
        print("Matched!!")
        break

decoder/embeddings
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_0_step/rnn/transpose
decoder_0_step/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_1_step/rnn/transpose
decoder_1_step/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_2_step/rnn/transpose
decoder_2_step/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_3_step/rnn/transpose
decoder_3_step/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_4_step/rnn/transpose
decoder_4_step/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_5_step/rnn/transpose
decoder_5_step/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_6_step/rnn/transpose
decoder_6_step/rnn/whi

decoder_16_step_1/rnn/transpose
decoder_16_step_1/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_17_step_1/rnn/transpose
decoder_17_step_1/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_18_step_1/rnn/transpose
decoder_18_step_1/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_19_step_1/rnn/transpose
decoder_19_step_1/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_20_step_1/rnn/transpose
decoder_20_step_1/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_21_step_1/rnn/transpose
decoder_21_step_1/rnn/while/Exit_2
decoder/fully_connected_weight
attention/W_a
attention/U_a
attention/v_a
attention/b
decoder_22_step_1/rnn/transpose
decoder_22_step_1/rnn/while/Exit_2
decoder/fully_connect

최종 실험 결과….

원래 : Je déclare reprise la(session)(du)Parlement européen qui avait été interrompue le vendredi 17 décembre(dernier)et je vous (renouvelle)tous mes vux en espérant que vous avez passé  (de)  bonnes vacances.
예측 : Je déclare reprise la <UNK> <UNK> Parlement européen qui avait été interrompue le vendredi 17 décembre <UNK> et je  vous     <UNK>   tous mes vux en espérant que vous avez passé <UNK> bonnes vacances _EOS _PAD _PAD _PAD _PAD _PAD _PAD.