In [0]:
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle
import re
import time
import collections
import os

# **Data Processing：**

1. Read the data in the 'conversation' list
2. Use regular expressions to preprocess the data: first expand the English abbreviation, then remove unnecessary characters, only retain English characters and necessary punctuation
3. Only keep sentences of 1 to 50 words in length
4. Divide the data into two lists, question and answer
5. 85% of the data is used for training and 15% of the data is used for testing

In [0]:
def readCorpus(path):
    fp = open(path, "r", encoding='gb18030', errors='ignore')
    content = fp.read().split('\n')
    fp.close()
    return content

In [0]:
# Data cleansing and preprocessing with regular expressions
def clean_text(input_text):
    num = len(input_text)
    for i in range(num):
        input_text[i] = input_text[i].lower() # lower case
        input_text[i] = re.sub(r"I'm", "I am", input_text[i])
        input_text[i] = re.sub(r"i'm", "i am", input_text[i])
        input_text[i] = re.sub(r"im", "i am", input_text[i])
        input_text[i] = re.sub(r"Im", "I am", input_text[i])
        input_text[i] = re.sub(r"he's", "he is", input_text[i])
        input_text[i] = re.sub(r"He's", "He is", input_text[i])
        input_text[i] = re.sub(r"she's", "she is", input_text[i])
        input_text[i] = re.sub(r"She's", "She is", input_text[i])
        input_text[i] = re.sub(r"it's", "it is", input_text[i])
        input_text[i] = re.sub(r"It's", "It is", input_text[i])
        input_text[i] = re.sub(r"that's", "that is", input_text[i])
        input_text[i] = re.sub(r"That's", "That is", input_text[i])
        input_text[i] = re.sub(r"what's", "what is", input_text[i])
        input_text[i] = re.sub(r"What's", "What is", input_text[i])
        input_text[i] = re.sub(r"where's", "where is", input_text[i])
        input_text[i] = re.sub(r"Where's", "Where is", input_text[i])
        input_text[i] = re.sub(r"how's", "how is", input_text[i])
        input_text[i] = re.sub(r"How's", "How is", input_text[i])
        input_text[i] = re.sub(r"bf", "boyfriend", input_text[i])
        input_text[i] = re.sub(r"gf", "girlfriend", input_text[i])
        input_text[i] = re.sub(r"\,", " ,", input_text[i])
        input_text[i] = re.sub(r"\.", " .", input_text[i])
        input_text[i] = re.sub(r"\?", " ?", input_text[i])
        input_text[i] = re.sub(r"\'ll", " will", input_text[i])
        input_text[i] = re.sub(r"\'ve", " have", input_text[i])
        input_text[i] = re.sub(r"\'re", " are", input_text[i])
        input_text[i] = re.sub(r"\'d", " would", input_text[i])
        input_text[i] = re.sub(r"\'re", " are", input_text[i])
        input_text[i] = re.sub(r"won't", "will not", input_text[i])
        input_text[i] = re.sub(r"can't", "cannot", input_text[i])
        input_text[i] = re.sub(r"n't", " not", input_text[i])
        input_text[i] = re.sub(r"n'", "ng", input_text[i])
        input_text[i] = re.sub(r"'bout", "about", input_text[i])
        input_text[i] = re.sub(r"'til", "until", input_text[i])
        text_new = re.compile(u'[^0-9^a-z^A-Z^ ^,^.^?^]+', re.UNICODE)
        input_text[i] = text_new.sub('', input_text[i])
        

In [0]:
# Read corpus
conversation = readCorpus('conversationEnglish.txt')
# Clean database
clean_text(conversation)
print(len(conversation))
    

5019


In [0]:
# Order the sentences into questions and answers
questions = []
answers = []

for i in range(len(conversation) - 1):
    questions.append(conversation[i])
    answers.append(conversation[i+1])

In [0]:
#Output top five conversations
for i in range(0, 5):
    print(questions[i])
    print(answers[i])
    print("---------")
    
print(len(questions))
print(len(answers))

hello , what is your name ?
my name is sunday .
---------
my name is sunday .
who designed you ?
---------
who designed you ?
the most handsome programmer in the world .
---------
the most handsome programmer in the world .
what is your name ?
---------
what is your name ?
my name is sunday
---------
5018
5018


In [0]:
## Choose the sentences that the length is from 1～55, 
## And split the database into question and answer list.
minLength = 1
maxLength = 55
questions_temp = []
answers_temp = []
select_questions = []
select_answers = []

i = 0
for question in questions:
    if len(question.split()) >= minLength and len(question.split()) <= maxLength:
        questions_temp.append(question)
        answers_temp.append(answers[i])
    i += 1

i = 0
for answer in answers_temp:
    if len(answer.split()) >= minLength and len(answer.split()) <= maxLength:
        select_answers.append(answer)
        select_questions.append(questions_temp[i])
    i += 1
    
l_questions = len(questions) # length of question list
l_answers = len(answers) #length of answer list
divided = int(0.85*l_questions) #85% for training ，15% for testing
    
question_test = select_questions[divided:l_questions]
answer_test = select_answers[divided:l_answers]
question_train = select_questions[:divided]
answer_train = select_answers[:divided]

print(len(question_test))
print(len(answer_test))
print(len(question_train))
print(len(answer_train))

753
753
4265
4265


# **Data vectorization and dictionary establishment**

In [0]:
# Dependent on the frequency of each word.
def build_dictionary(words, n_words, atleast=1):
    count = [['PAD', 0], ['GO', 1], ['EOS', 2], ['UNK', 3]]
    # Count the number of each word and sort it.
    counter = collections.Counter(words).most_common(n_words) # map: [('words', number), (...), ...]
    # Filter：delete the word using the counter
    counter = [i for i in counter if i[1] >= atleast] # 'atleast' is the minimum number that the word have to appear
    count.extend(counter) # make the counter as a list
    dictionary = dict() # use dict() function to create a dictionary : dict(([key,value],[key,value]))
    for word, _ in count:
        dictionary[word] = len(dictionary)
    data = list()
    unk_count = 0
    for word in words:
        index = dictionary.get(word, 0)
        if index == 0:
            unk_count += 1
        data.append(index)
    count[0][1] = unk_count
    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
    return data, count, dictionary, reversed_dictionary

In [0]:
## For question list
concat_ques = ' '.join(question_train+question_test).split() # split sentence with each word
# set select the differenct word, list can transfer it as a list
vocabulary_size_ques = len(list(set(concat_ques))) # total number of vocabulary
data_ques, count_ques, dictionary_ques, rev_dictionary_ques = build_dictionary(concat_ques, vocabulary_size_ques)

## For answer list
concat_ans = ' '.join(answer_train+answer_test).split()
vocabulary_size_ans = len(list(set(concat_ans)))
data_ans, count_ans, dictionary_ans, rev_dictionary_ans = build_dictionary(concat_ans, vocabulary_size_ans)

In [0]:
print('Question list:')
print('Total number of vocabulary: %d'%(vocabulary_size_ques))
print('Most common words', count_ques[4:10]) # print the data from 4th to 10th
print('Size of dictionary:',len(dictionary_ques))
print("Percentage that vocabulary used: {}%".format(round(len(dictionary_ques)/vocabulary_size_ques,4)*100))

print('Answer list:')
print('Total number of vocabulary: %d'%(vocabulary_size_ans))
print('Most common words', count_ans[4:10])
print('Size of dictionary:',len(dictionary_ans))
print("Percentage that vocabulary used: {}%".format(round(len(dictionary_ans)/vocabulary_size_ans,4)*100))

Question list:
Total number of vocabulary: 2523
Most common words [('.', 2238), (',', 2072), ('you', 1721), ('i', 1683), ('?', 955), ('to', 902)]
Size of dictionary: 2527
Percentage that vocabulary used: 100.16000000000001%
Answer list:
Total number of vocabulary: 2523
Most common words [('.', 2238), (',', 2071), ('you', 1721), ('i', 1683), ('?', 954), ('to', 902)]
Size of dictionary: 2527
Percentage that vocabulary used: 100.16000000000001%


In [0]:
# Visualize word dictionary
for key,value in dictionary_ques.items():
    print('{key}\t{value}'.format(key = key, value = value))

PAD	0
GO	1
EOS	2
UNK	3
.	4
,	5
you	6
i	7
?	8
to	9
is	10
a	11
the	12
are	13
am	14
do	15
not	16
what	17
it	18
have	19
bye	20
ok	21
and	22
of	23
my	24
that	25
me	26
will	27
in	28
your	29
can	30
conversation	31
end	32
for	33
no	34
go	35
then	36
like	37
eat	38
how	39
be	40
good	41
yes	42
very	43
so	44
well	45
hello	46
haha	47
see	48
hi	49
want	50
happy	51
come	52
thank	53
when	54
at	55
back	56
on	57
who	58
get	59
going	60
an	61
as	62
later	63
about	64
tell	65
feel	66
love	67
or	68
ti	69
know	70
but	71
too	72
we	73
this	74
ame	75
robot	76
class	77
there	78
with	79
its	80
all	81
welcome	82
hahaha	83
if	84
name	85
okay	86
buy	87
think	88
handsome	89
sunday	90
cannot	91
computer	92
did	93
should	94
just	95
really	96
why	97
now	98
chat	99
take	100
after	101
he	102
lets	103
more	104
also	105
wait	106
only	107
drink	108
much	109
together	110
today	111
joke	112
doing	113
software	114
artificial	115
work	116
old	117
still	118
things	119
make	120
than	121
tomorrow	122
playing	123
been	124
one	125
up	

In [0]:
GO = dictionary_ques['GO']
PAD = dictionary_ques['PAD']
EOS = dictionary_ques['EOS']
UNK = dictionary_ques['UNK']

In [0]:
# Mark the end of sentences
for i in range(len(answer_train)):
    answer_train[i] += ' EOS'

# **Translate the Sentence to Sequens**

In [0]:
# Vectorize sentence using dictionary
def vectorize(corpus, dic):
    X = []
    for text in corpus:
        temp = []
        for word in text.split():
            temp.append(dic.get(word,UNK))
        X.append(temp)
    return X

In [0]:
# Train dataset
X = vectorize(question_train, dictionary_ques)
Y = vectorize(answer_train, dictionary_ans)
# Test dataset
X_test = vectorize(question_test, dictionary_ques)
Y_test = vectorize(answer_test, dictionary_ans)

In [0]:
print(question_test)
print(X_test)
print(len(X_test))

['hello', 'hello', 'have you eaten ?', 'i am a robot , no need to eat', 'then are you not hungry ?', 'i am not hungry , i just want to cook for you .', 'can you cook ?', 'yes , i will wash vegetables , cut vegetables , stirfry .', 'then you do it slowly , i will come back later .', 'when can you come back ?', '6 oclock in the afternoon', 'then i should have done it .', 'then you are doing very fast .', 'of course , i am very powerful .', 'haha , see you that afternoon .', 'goodbye', 'bye bye', 'conversation end', 'hello', 'hello', 'how old are you ?', 'i am 25 years old .', 'how old are you ?', 'i am 25 years old .', 'when is your birthday ?', 'my birthday is november 3 , 1994 .', 'you are very young .', 'thank you', 'you are old', 'no , i am still very young .', 'you are old', 'no , my age is always 18 years old .', 'what is your horoscope ?', 'i am scorpio .', 'you are excellent', 'thank you', 'you are excellent', 'of course , my iq is very high .', 'you are excellent', 'i am very sm

In [0]:
# Make each sentences has the same length
# sentence_batch: input text list as vector (length is batch_size, 16)
# pad_int: PAD, which is 0
# return: padded sequences (batch_x or y) and the sequence of sentence length
def padding_sentence(s_batch, pad_num):
    padded_seqs = []
    seq_lens = []
    max_len = max([len(sentence) for sentence in s_batch]) # the length of the longest sentence
    for sentence in s_batch:
        # sentence + [0] * the number of lacking words
        padded_seqs.append(sentence + [pad_num] * (max_len - len(sentence)))
        # the length of sentence
        seq_lens.append(len(sentence))
    return padded_seqs, seq_lens

# **First Model: Bidirectional LSTM Seq2Seq + Greedy + Luong Attention**

In [0]:
class Chatbot_Greedy:
    def __init__(self, size_layer, num_layers, embedded_size, 
                 ques_dict_size, ans_dict_size, learning_rate, 
                 batch_size, dropout = 0.5):
        
        # Define LSTM cell
        def lstm_cell(size, reuse=False):
            ## Basic RNN
            #return tf.nn.rnn_cell.BasicRNNCell(size_layer,reuse=reuse)
            ## LSTM
            #return tf.nn.rnn_cell.LSTMCell(size_layer, initializer=tf.orthogonal_initializer(), reuse=reuse)
            # LSTM + Dropout (fix over fitting)
            cell = tf.nn.rnn_cell.LSTMCell(size, initializer=tf.orthogonal_initializer(), reuse=reuse)
            return tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=1.0, output_keep_prob=1.0)
        
        # Define GRU cell
        def gru_cell(size, reuse=False):
            ## GRU is better when the size of data base is small
            cell = tf.nn.rnn_cell.GRUCell(size, reuse=reuse)
            return tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=1.0, output_keep_prob=1.0)
        
        # Attention
        def attention(encoder_out, seq_len, reuse=False):
            attention_mechanism = tf.contrib.seq2seq.LuongAttention(num_units = size_layer, 
                                                                    memory = encoder_out,
                                                                    memory_sequence_length = seq_len)
            #attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units = size_layer, 
            #                                                        memory = encoder_out,
            #                                                        memory_sequence_length = seq_len)
            return tf.contrib.seq2seq.AttentionWrapper(
            cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell(size_layer, reuse) for _ in range(num_layers)]), 
                attention_mechanism = attention_mechanism,
                attention_layer_size = size_layer)
        
        ########################################################################
        self.X = tf.placeholder(tf.int32, [None, None]) # Input 1, parameter(type+[shape]), return Tensor
        self.Y = tf.placeholder(tf.int32, [None, None]) # Input 2, target
        # Computes number of nonzero elements across dimensions of a tensor
        self.X_length = tf.count_nonzero(self.X, 1, dtype=tf.int32) 
        self.Y_length = tf.count_nonzero(self.Y, 1, dtype=tf.int32)
        batch_size = tf.shape(self.X)[0]
        
        ## encoder
        # define encoder embedding (basic), full connected
        encoder_embeddings = tf.Variable(tf.random_uniform([ques_dict_size, embedded_size], -1, 1))
        # dropout to prevent the overfitting
        encoder_embeddings = tf.nn.dropout(encoder_embeddings, keep_prob = 1)
        
        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        # GRU cell composed sequentially of multiple simple cells.
        encoder_cells = tf.nn.rnn_cell.MultiRNNCell([lstm_cell(size_layer) for _ in range(num_layers)])
                
        # Bidirection lstm
        for n in range(num_layers):
            (out_fw, out_bw), (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = lstm_cell(size_layer // 2),
                cell_bw = lstm_cell(size_layer // 2),
                inputs = encoder_embedded,
                sequence_length = self.X_length,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_%d'%(n))
            encoder_embedded = tf.concat((out_fw, out_bw), 2)
        bi_state_c = tf.concat((state_fw.c, state_bw.c), -1)
        bi_state_h = tf.concat((state_fw.h, state_bw.h), -1)
        bi_lstm_state = tf.nn.rnn_cell.LSTMStateTuple(c=bi_state_c, h=bi_state_h)
        self.encoder_state = tuple([bi_lstm_state] * num_layers)
        
        # Creates a recurrent neural network specified by GRUCell
        self.encoder_out, encoder_state = tf.nn.dynamic_rnn(cell = encoder_cells, 
                                                                 inputs = encoder_embedded, 
                                                                 sequence_length = self.X_length,
                                                                 dtype = tf.float32)
        
        self.encoder_state = tuple(self.encoder_state[-1] for _ in range(num_layers))
        
        
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        
        ## decoder
        # denfine decoder embedding
        decoder_embeddings = tf.Variable(tf.random_uniform([ans_dict_size, embedded_size], -1, 1))
        decoder_cell = attention(self.encoder_out, self.X_length) # Attention
        dense_layer = tf.layers.Dense(ans_dict_size)
        
        ## greedy algorithm
        training_helper = tf.contrib.seq2seq.TrainingHelper(
                inputs = tf.nn.embedding_lookup(decoder_embeddings, decoder_input),
                sequence_length = self.Y_length,
                time_major = False)
        
        training_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell = decoder_cell,
                helper = training_helper,
                initial_state = decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=self.encoder_state),
                output_layer = dense_layer)
        
        training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = training_decoder,
                impute_finished = True,
                maximum_iterations = tf.reduce_max(self.Y_length))
        # logits for calculate loss
        self.training_logits = training_decoder_output.rnn_output
        
        predict_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding = decoder_embeddings,
                start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),
                end_token = EOS)
        
        predict_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell = decoder_cell,
                helper = predict_helper,
                initial_state = decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=self.encoder_state),
                output_layer = dense_layer)
        
        predict_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = predict_decoder,
                impute_finished = True,
                maximum_iterations = 2 * tf.reduce_max(self.X_length))
        # predict_ids for prediction
        self.predict_ids = predict_decoder_output.sample_id
        
        ###########################################################
        masks = tf.sequence_mask(self.Y_length, tf.reduce_max(self.Y_length), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        # adam optimizer and minimize the cost to optimize the model
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label) # return A Tensor of type bool.
        correct_index = tf.cast(correct_pred, tf.float32) # Casts a tensor to a new type.
        self.accuracy = tf.reduce_mean(correct_index) # get the average value at the axis
        tf.summary.scalar("loss",self.cost)
        tf.summary.scalar("accuracy",self.accuracy)

# ** Second Model (Optimization): 2 Bidirection GRU + Multiattention + Beam Search**

In [0]:
from tensorflow.python.util import nest

class MultiAtt(tf.nn.rnn_cell.MultiRNNCell):
    def __init__(self, attention_cell, cells, new_attention_using=True):
        cells = [attention_cell] + cells
        self.new_attention_using = new_attention_using
        super(MultiAtt, self).__init__(
            cells, state_is_tuple=True)

    def __call__(self, inputs, state, scope=None):
        """Run the cell with bottom layer's attention copied to all upper layers."""
        if not nest.is_sequence(state):
            raise ValueError(
                "Expected state to be a tuple of length %d, but received: %s"
                % (len(self.state_size), state))

        with tf.variable_scope(scope or "multi_rnn_cell"):
            new_states = []

            with tf.variable_scope("cell_0_attention"):
                attention_cell = self._cells[0]
                attention_state = state[0]
                current_inp, new_attention_state = attention_cell(
                    inputs, attention_state)
                new_states.append(new_attention_state)

            for i in range(1, len(self._cells)):
                with tf.variable_scope("cell_%d" % i):
                    cell = self._cells[i]
                    current_inp = state[i]

                    if self.new_attention_using:
                        current_inp = tf.concat(
                            [current_inp, new_attention_state.attention], -1)
                    else:
                        current_inp = tf.concat(
                            [current_inp, attention_state.attention], -1)

                    current_inp, new_state = cell(current_inp, current_state)
                    new_states.append(new_state)

        return current_inp, tuple(new_states)

In [0]:
class Chatbot_beam:
    def __init__(self, size_layer, num_layers, embedded_size,
                 ques_dict_size, ans_dict_size, learning_rate, beam_width = 5):
        
        def cells(size,reuse=False):
            cell = tf.nn.rnn_cell.GRUCell(size, reuse=reuse)
            return tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=1.0, output_keep_prob=1.0)
        
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None, None])
        self.X_length = tf.count_nonzero(self.X, 1, dtype=tf.int32)
        self.Y_length = tf.count_nonzero(self.Y, 1, dtype=tf.int32)
        batch_size = tf.shape(self.X)[0]
        
        encoder_embeddings = tf.Variable(tf.random_uniform([ques_dict_size, embedded_size], -1, 1))
        encoder_embeddings = tf.nn.dropout(encoder_embeddings, keep_prob = 1)
        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)
        
        gru_layer = 3

        # 2 biGRU layers
        for n in range(2):
            (out_fw, out_bw), (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                cell_fw = cells(size_layer),
                cell_bw = cells(size_layer),
                inputs = encoder_embedded,
                sequence_length = self.X_length,
                dtype = tf.float32,
                scope = 'bidirectional_rnn_%d'%(n))
            encoder_embedded = tf.concat((out_fw, out_bw), 2)
        gru_cells = tf.nn.rnn_cell.MultiRNNCell([cells(size_layer) for _ in range(gru_layer)])
        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
                gru_cells,
                encoder_embedded,
                dtype=tf.float32,
                sequence_length=self.X_length)
        
        encoder_state = (state_bw,) + (
                (encoder_state,) if gru_layer == 1 else encoder_state)
        
        main = tf.strided_slice(self.Y, [0, 0], [batch_size, -1], [1, 1])
        
        decoder_embeddings = tf.Variable(tf.random_uniform([ans_dict_size, embedded_size], -1, 1))
        decoder_input = tf.concat([tf.fill([batch_size, 1], GO), main], 1)
        decoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, decoder_input)
        
        decoder_cells = []
        for n in range(num_layers):
            cell = cells(size_layer)
            decoder_cells.append(cell)
        attention_cell = decoder_cells.pop(0)
        to_dense = tf.layers.Dense(ans_dict_size)
        
        with tf.variable_scope('decode'):
            attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                num_units = size_layer, 
                memory = encoder_outputs,
                memory_sequence_length = self.X_length)
            attentionNew_cell = tf.contrib.seq2seq.AttentionWrapper(
                cell = attention_cell,
                attention_mechanism = attention_mechanism,
                attention_layer_size = None,
                alignment_history = True,
                output_attention = False)
            multi_attention_cell = MultiAtt(attentionNew_cell, decoder_cells)
            
            self.initial_state = tuple(
                zs.clone(cell_state=es)
                if isinstance(zs, tf.contrib.seq2seq.AttentionWrapperState) else es
                for zs, es in zip(
                    multi_attention_cell.zero_state(batch_size, dtype=tf.float32), encoder_state))
            
            training_helper = tf.contrib.seq2seq.TrainingHelper(
                decoder_embedded,
                self.Y_length,
                time_major = False
            )
            training_decoder = tf.contrib.seq2seq.BasicDecoder(
                cell = multi_attention_cell,
                helper = training_helper,
                initial_state = self.initial_state,
                output_layer = to_dense)
            training_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = training_decoder,
                impute_finished = True,
                maximum_iterations = tf.reduce_max(self.Y_length))
            
        with tf.variable_scope('decode', reuse=True):
            encoder_out_tiled = tf.contrib.seq2seq.tile_batch(encoder_outputs, beam_width)
            encoder_state_tiled = tf.contrib.seq2seq.tile_batch(encoder_state, beam_width)
            X_length_tiled = tf.contrib.seq2seq.tile_batch(self.X_length, beam_width)
            
            attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                num_units = size_layer, 
                memory = encoder_out_tiled,
                memory_sequence_length = X_length_tiled)
            attentionNew_cell = tf.contrib.seq2seq.AttentionWrapper(
                cell = attention_cell,
                attention_mechanism = attention_mechanism,
                attention_layer_size = None,
                alignment_history = False,
                output_attention = False)
            multi_attention_cell = MultiAtt(attentionNew_cell, decoder_cells)
            
            self.initial_state = tuple(
                zs.clone(cell_state=es)
                if isinstance(zs, tf.contrib.seq2seq.AttentionWrapperState) else es
                for zs, es in zip(
                    multi_attention_cell.zero_state(batch_size * beam_width, dtype=tf.float32), encoder_state_tiled))
            
            predict_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                cell = multi_attention_cell,
                embedding = decoder_embeddings,
                start_tokens = tf.tile(tf.constant([GO], dtype=tf.int32), [batch_size]),
                end_token = EOS,
                initial_state = self.initial_state,
                beam_width = beam_width,
                output_layer = to_dense,
                length_penalty_weight = 0.0)
            predict_decoder_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
                decoder = predict_decoder,
                impute_finished = False,
                maximum_iterations = 2 * tf.reduce_max(self.X_length))
            
            self.training_logits = training_decoder_output.rnn_output
            self.predict_ids = predict_decoder_output.predicted_ids[:, :, 0]
            
        masks = tf.sequence_mask(self.Y_length, tf.reduce_max(self.Y_length), dtype=tf.float32)
        self.cost = tf.contrib.seq2seq.sequence_loss(logits = self.training_logits,
                                                     targets = self.Y,
                                                     weights = masks)
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
        
        y_t = tf.argmax(self.training_logits,axis=2)
        y_t = tf.cast(y_t, tf.int32)
        self.prediction = tf.boolean_mask(y_t, masks)
        mask_label = tf.boolean_mask(self.Y, masks)
        correct_pred = tf.equal(self.prediction, mask_label)
        correct_index = tf.cast(correct_pred, tf.float32)
        self.accuracy = tf.reduce_mean(correct_index)
        tf.summary.scalar("loss",self.cost)
        tf.summary.scalar("accuracy",self.accuracy)

In [0]:
# parameters
size_layer = 256
num_layers = 2
embedded_size = 128
learning_rate = 0.001
batch_size = 16
epoch = 30

In [0]:
# initialize the model
tf.reset_default_graph()
sess = tf.InteractiveSession()
#First Model
model = Chatbot_Greedy(size_layer, num_layers, embedded_size, len(dictionary_ques), 
                len(dictionary_ans), learning_rate, batch_size)
#Second Model
#model = Chatbot_beam(size_layer, num_layers, embedded_size, len(dictionary_ques), 
#                len(dictionary_ans), learning_rate, batch_size)

# run
sess.run(tf.global_variables_initializer())



# **Training Model**

In [0]:
merged_summary = tf.summary.merge_all()
# Write the model into a file
writer = tf.summary.FileWriter("logs/", sess.graph)

for i in range(epoch):
    avg_loss, avg_accuracy = 0, 0
    for j in range(0, len(question_train), batch_size):
        index = min(j+batch_size, len(question_train))
        batch_input, seq_input = padding_sentence(X[j: index], PAD)
        batch_target, seq_target = padding_sentence(Y[j: index], PAD)
        
        prediction, accuracy,loss, _, summary = sess.run([model.predict_ids, 
                                                model.accuracy, model.cost, model.optimizer, merged_summary], 
                                      feed_dict={model.X:batch_input,
                                                model.Y:batch_target})
        avg_loss += loss
        avg_accuracy += accuracy
        writer.add_summary(summary,j)
    # Calculated the average loss and accuracy for each epoch
    avg_loss /= (len(question_train) / batch_size)
    avg_accuracy /= (len(question_train) / batch_size)
    print('epoch: %d, avg loss: %f, avg accuracy: %f'%(i+1, avg_loss, avg_accuracy))
    #30+16


epoch: 1, avg loss: 5.189822, avg accuracy: 0.160449
epoch: 2, avg loss: 4.322139, avg accuracy: 0.236345
epoch: 3, avg loss: 3.941374, avg accuracy: 0.279842
epoch: 4, avg loss: 3.677100, avg accuracy: 0.311542
epoch: 5, avg loss: 3.457764, avg accuracy: 0.330802
epoch: 6, avg loss: 3.245541, avg accuracy: 0.354192
epoch: 7, avg loss: 3.053195, avg accuracy: 0.374060
epoch: 8, avg loss: 2.869162, avg accuracy: 0.394869
epoch: 9, avg loss: 2.663800, avg accuracy: 0.421280
epoch: 10, avg loss: 2.472649, avg accuracy: 0.453201
epoch: 11, avg loss: 2.272226, avg accuracy: 0.483445
epoch: 12, avg loss: 2.104848, avg accuracy: 0.513559
epoch: 13, avg loss: 1.982466, avg accuracy: 0.539791
epoch: 14, avg loss: 1.698072, avg accuracy: 0.597925
epoch: 15, avg loss: 1.514624, avg accuracy: 0.636178
epoch: 16, avg loss: 1.408098, avg accuracy: 0.655531
epoch: 17, avg loss: 1.227180, avg accuracy: 0.696003
epoch: 18, avg loss: 1.102267, avg accuracy: 0.722094
epoch: 19, avg loss: 1.035100, avg ac

# **Save Model**

In [0]:
saver = tf.train.Saver()
#saver.save(sess, 'my_test_model')

saver.restore(sess, "my_test_model")

W0905 12:52:40.063646 140399066265472 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/training/saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


# **Visualize Model**

In [0]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip

--2019-09-05 11:40:35--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 34.195.49.195, 52.22.235.225, 35.173.3.255, ...
Connecting to bin.equinox.io (bin.equinox.io)|34.195.49.195|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13607069 (13M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip’


2019-09-05 11:40:43 (6.43 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [13607069/13607069]

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   


In [0]:
# Visualiazed
LOG_DIR = 'logs'

get_ipython().system_raw('tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'.format(LOG_DIR))

get_ipython().system_raw('./ngrok http 6006 &')

! curl -s http://localhost:4040/api/tunnels | python3 -c \
"import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

http://5ab35353.ngrok.io


In [0]:
for i in range(len(batch_input)):
    print('Conversation %d'%(i+1))
    print('QUESTION:',' '.join([rev_dictionary_ques[n] for n in batch_input[i] if n not in [0,1,2,3]]))
    print('PREDICTION:',' '.join([rev_dictionary_ans[n] for n in prediction[i] if n not in[0,1,2,3]]),'\n')
    print('EXPECTED ANSWER:',' '.join([rev_dictionary_ans[n] for n in batch_target[i] if n not in[0,1,2,3]]))

Conversation 1
QUESTION: not do be stock , you are allowed but .
PREDICTION: i am software laugh should . 

EXPECTED ANSWER: sadness , yes
Conversation 2
QUESTION: sadness , yes
PREDICTION: see , at . 

EXPECTED ANSWER: i am get ? fast games .
Conversation 3
QUESTION: i am get ? fast games .
PREDICTION: my the good . 

EXPECTED ANSWER: very , boyfriend on a eaters .
Conversation 4
QUESTION: very , boyfriend on a eaters .
PREDICTION: bye , i am out on a would . 

EXPECTED ANSWER: i watch thank eating my ask .
Conversation 5
QUESTION: i watch thank eating my ask .
PREDICTION: very , bye 

EXPECTED ANSWER: all
Conversation 6
QUESTION: all
PREDICTION: why spent happy ask . 

EXPECTED ANSWER: ok even
Conversation 7
QUESTION: ok even
PREDICTION: i ok do poker ? amazing to 

EXPECTED ANSWER: i later my i not do want ? like happy ask .
Conversation 8
QUESTION: i later my i not do want ? like happy ask .
PREDICTION: you are is good 

EXPECTED ANSWER: hi monday to
Conversation 9
QUESTION: hi mon

# **Test Model using test dataset**

In [0]:
batch_input, seq_input = padding_sentence(X_test[:batch_size], PAD)
batch_target, seq_target = padding_sentence(Y_test[:batch_size], PAD)
prediction = sess.run(model.predict_ids, feed_dict={model.X:batch_input,model.X_length:seq_input})

for i in range(len(batch_input)):
    print('Conversation %d'%(i+1))
    print('QUESTION:',' '.join([rev_dictionary_ques[n] for n in batch_input[i] if n not in [0,1,2,3]]))
    print('PREDICTION:',' '.join([rev_dictionary_ans[n] for n in prediction[i] if n not in[0,1,2,3]]),'\n')
    print('EXPECTED ANSWER:',' '.join([rev_dictionary_ans[n] for n in batch_target[i] if n not in[0,1,2,3]]))

Conversation 1
QUESTION: hello
PREDICTION: hello 

EXPECTED ANSWER: hello
Conversation 2
QUESTION: hello
PREDICTION: hello 

EXPECTED ANSWER: have you eaten ?
Conversation 3
QUESTION: have you eaten ?
PREDICTION: bye , have 

EXPECTED ANSWER: i am a robot , no need to eat
Conversation 4
QUESTION: i am a robot , no need to eat
PREDICTION: what the for meet 

EXPECTED ANSWER: then are you not hungry ?
Conversation 5
QUESTION: then are you not hungry ?
PREDICTION: my the today well called . 

EXPECTED ANSWER: i am not hungry , i just want to cook for you .
Conversation 6
QUESTION: i am not hungry , i just want to cook for you .
PREDICTION: do something , school 

EXPECTED ANSWER: can you cook ?
Conversation 7
QUESTION: can you cook ?
PREDICTION: i door not do or hard shaanxi , i am do course i not 

EXPECTED ANSWER: yes , i will wash vegetables , cut vegetables , stirfry .
Conversation 8
QUESTION: yes , i will wash vegetables , cut vegetables , stirfry .
PREDICTION: see , i am told on kno

# **User input interface**

In [0]:
def user_input(input_text):
    sentence = [input_text]
    # use the same rules for user input text
    clean_text(sentence)
    vec_sentence = vectorize(sentence,dictionary_ques)
    batch_input, seq_input = padding_sentence(vec_sentence[:batch_size], PAD)
    prediction = sess.run(model.predicting_ids, feed_dict={model.X:batch_input,model.X_length:seq_input})
    
    print('Sunday:',' '.join([rev_dictionary_ans[n] for n in prediction[0] if n not in[0,1,2,3]]),'\n')

In [0]:
# user input
text = ''
while text != 'bye':
      text = input('me: ')
      user_input(text)

me: hi
Sunday: hi 

me: what is your name?
Sunday: my name is sunday 

me: How old are you?
Sunday: i am 25 years old . 

me: How are you?
Sunday: fine thanks . 

me: What are you doing?
Sunday: i am writing old . 

me: I am missing you
Sunday: hahaha , i am so happy to hear 

me: bye
Sunday: conversation end 



# **Connect the model with WeChat API**

In [0]:
def wechat_input(input_text):
    sentence = [input_text]
    # use the same rules for user input text
    clean_text(sentence)
    vec_sentence = vectorize(sentence,dictionary_ques)
    batch_input, seq_input = padding_sentence(vec_sentence[:batch_size], PAD)
    prediction = sess.run(model.predict_ids, feed_dict={model.X:batch_input,model.X_length:seq_input})
    
    return(' '.join([rev_dictionary_ans[n] for n in prediction[0] if n not in[0,1,2,3]]))

In [0]:
pip install itchat

Collecting itchat
  Downloading https://files.pythonhosted.org/packages/57/99/20dde4bee645453d1453ae3757b49f24a5fd179ce6e391cf2542cfeac61c/itchat-1.3.10-py2.py3-none-any.whl
Collecting pypng (from itchat)
[?25l  Downloading https://files.pythonhosted.org/packages/bc/fb/f719f1ac965e2101aa6ea6f54ef8b40f8fbb033f6ad07c017663467f5147/pypng-0.0.20.tar.gz (649kB)
[K     |████████████████████████████████| 655kB 7.2MB/s 
Collecting pyqrcode (from itchat)
  Downloading https://files.pythonhosted.org/packages/37/61/f07226075c347897937d4086ef8e55f0a62ae535e28069884ac68d979316/PyQRCode-1.2.1.tar.gz
Building wheels for collected packages: pypng, pyqrcode
  Building wheel for pypng (setup.py) ... [?25l[?25hdone
  Created wheel for pypng: filename=pypng-0.0.20-cp36-none-any.whl size=67163 sha256=dbc29aa6b7bcc68ed2c0dd12a02add5652c7237ca514ebe8da71ef76a1cbcd5f
  Stored in directory: /root/.cache/pip/wheels/41/6b/ef/0493b536b6d4722c2ae9486691b1d49b922b9877922beeabb3
  Building wheel for pyqrcode (se

In [0]:
import itchat

@itchat.msg_register(itchat.content.TEXT)
def text_reply(msg):
    reply_ = wechat_input(msg.text)
    return reply_

itchat.logout()
itchat.auto_login()
itchat.run()

█

Getting uuid of QR code.
I0905 12:55:10.522500 140399066265472 login.py:44] Getting uuid of QR code.
Downloading QR code.
I0905 12:55:11.011609 140399066265472 login.py:47] Downloading QR code.
Please scan the QR code to log in.
I0905 12:55:11.338150 140399066265472 login.py:50] Please scan the QR code to log in.
Please press confirm on your phone.
I0905 12:55:29.891390 140399066265472 login.py:60] Please press confirm on your phone.
Loading the contact, this may take a little while.
I0905 12:55:31.904147 140399066265472 login.py:70] Loading the contact, this may take a little while.
Login successfully as 杨浩泽
I0905 12:55:35.508076 140399066265472 login.py:80] Login successfully as 杨浩泽
Start auto replying.
I0905 12:55:35.513076 140399066265472 register.py:85] Start auto replying.
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1356, in _do_call
    return fn(*args)
  File "/usr/local/lib/python3.6/dist-packages