In [1]:
from keras.models import load_model
enc_model = load_model('encoder_model', compile=False)
dec_model = load_model('decoder_model', compile=False)

In [2]:
import pickle

inv_vocab = pickle.load(open("vocabulary.pkl", "rb"))
vocab = {w:v for v, w in inv_vocab.items()}
keyword_list = pickle.load(open("keyword.pkl", "rb"))

In [4]:
import numpy as np
from laonlp import word_tokenize
import re
from keras.preprocessing.sequence import pad_sequences

In [6]:
def remove_special_character(text):
    return re.sub(r'[^0-9a-zA-Zກຂຄງຈສຊຍດຕຖທນບປຜຝພຟມຢລຫຼຣວຫອຮໜໝໆຽະາ ິ ີ ຶ ື ໂ ໍເແ ຸ ູຳໄໃ ັ ົ ່ ້ ໌+]', '', text)

def remove_spaces(text):
    
    return text.replace(" ", "")

In [120]:
enc_in = pickle.load(open("question.pkl", "rb"))

In [121]:
enc_in

array([[517,   0,   0, ...,   0,   0,   0],
       [252,   0,   0, ...,   0,   0,   0],
       [243,   0,   0, ...,   0,   0,   0],
       ...,
       [304, 413, 445, ...,   0,   0,   0],
       [304, 413, 445, ...,   0,   0,   0],
       [304, 413, 445, ...,   0,   0,   0]])

In [126]:
def decode_sequence(input_seq):
    for sentence in enc_in:
        if np.array_equal(input_seq, [sentence]):
            states_value = enc_model.predict(input_seq)

            target_seq = np.zeros((1,1))
            target_seq[0,0] = vocab['<SOS>']

            stop_condition = False
            decoded_sentence = ''
            while not stop_condition:
                output_tokens, h, c = dec_model.predict([target_seq] + states_value)
                sampled_token_index = np.argmax(output_tokens[0, -1, :])
                sampled_word = inv_vocab[sampled_token_index]
                decoded_sentence += '' + sampled_word

                if(sampled_word == '<EOS>' or len(word_tokenize(decoded_sentence)) > 100):
                    stop_condition = True

                target_seq = np.zeros((1,1))
                target_seq[0,0] = sampled_token_index
                
                states_value = [h,c]

            return decoded_sentence
    return "ຂໍອະໄພ, ບໍ່ສາມາດຕອບຄຳຖາມນີ້ໄດ້"  

In [125]:
#TEST
input_text = "ເກຣດA ໄດ້ຄະແນນເທົ່າໃດ"
input_seq = remove_special_character(input_text)
input_seq = remove_spaces(input_seq)
txt = []
lst = []
input_seq = word_tokenize(input_seq)
words = []
for word in input_seq:
    if word in keyword_list:
        words.append(word)
    #words = [w for w in input_seq if w in keyword_list]
for x in words:
    try:
        lst.append(vocab[x])
    except:
        lst.append(vocab['<OUT>'])
    
txt.append(lst)

input_padded_seq = pad_sequences(txt, 15, padding='post', truncating='post')
response = decode_sequence(input_padded_seq)
print(f"You: {input_text} / {input_padded_seq}")
print(f"Bot: {response}")

You: ເກຣດA ໄດ້ຄະແນນເທົ່າໃດ / [[107 262   0   0   0   0   0   0   0   0   0   0   0   0   0]]
Bot: ເກຣດ A ຕ້ອງມີຄະແນນຕັ້ງແຕ່ 91 ຄະແນນຂຶ້ນໄປ<EOS>


In [127]:
while True:
    input_text = input("You: ")
    if input_text == 'exit':
        break
    input_seq = remove_special_character(input_text)
    input_seq = remove_spaces(input_seq)
    txt = []
    lst = []
    input_seq = word_tokenize(input_seq)
    words = [word for word in input_seq if word in keyword_list]
            
    for x in words:
        try:
            lst.append(vocab[x])
        except:
            lst.append(vocab['<OUT>'])
    
    txt.append(lst)
    
    input_padded_seq = pad_sequences(txt, 15, padding='post', truncating='post')
    response = decode_sequence(input_padded_seq)
    print(f"You: {input_text} / {input_padded_seq}")
    print(f"Bot: {response}")

You: ສາຂາວິທະຍາສາດຄອມພິວເຕີມີຫຼັກສູດໃດແດ່ / [[523 496 259 554   0   0   0   0   0   0   0   0   0   0   0]]
Bot: ສາຂາວິທະຍາສາດຄອມພິວເຕີປະກອບມີ 3 ຫຼັກສູດຄື: ຫຼັກສູດຕໍ່ເນື່ອງປະລິນຍາຕີ, ຫຼັກສູດປະລິນຍາຕີ ແລະ ຫຼັກສູດປະລິນຍາໂທ<EOS>
You: ຢາກຮຽນຂຽນໂປຣແກຣມຕ້ອງຮຽນສາຂາໃດ / [[586 672 586 523   0   0   0   0   0   0   0   0   0   0   0]]
Bot: ຂໍອະໄພ, ບໍ່ສາມາດຕອບຄຳຖາມນີ້ໄດ້
You: ຢາກຂຽນເວັບຕ້ອງຮຽນສາຂາໃດ / [[586 523   0   0   0   0   0   0   0   0   0   0   0   0   0]]
Bot: ຂໍອະໄພ, ບໍ່ສາມາດຕອບຄຳຖາມນີ້ໄດ້
