In [0]:
import numpy as np
import pickle

### Loading the data

In [0]:
with open('train_qa.txt', mode = 'rb') as f:
    train_data = pickle.load(f)
    
with open('test_qa.txt', mode = 'rb') as f:
    test_data = pickle.load(f)

In [3]:
print(len(train_data))
print(len(test_data))

print(' '.join(train_data[0][0]))
print(' '.join(train_data[0][1]))
print(train_data[0][2])

10000
1000
Mary moved to the bathroom . Sandra journeyed to the bedroom .
Is Sandra in the hallway ?
no


### Create a vocabulary

In [0]:
vocab = set()

all_data = train_data + test_data

for story, question, answer in all_data:
    vocab = vocab.union(set(story))
    vocab = vocab.union(set(question))

vocab.add('no')
vocab.add('yes')

In [5]:
print(vocab)

{'Sandra', 'football', 'took', 'garden', 'put', 'yes', 'left', 'up', 'got', 'John', '.', 'Is', 'grabbed', 'Mary', 'moved', '?', 'dropped', 'kitchen', 'bathroom', 'the', 'went', 'travelled', 'journeyed', 'bedroom', 'down', 'milk', 'there', 'picked', 'Daniel', 'in', 'hallway', 'office', 'back', 'no', 'to', 'discarded', 'apple'}


In [0]:
# An extra +1 for keras zero padding
vocab_len = len(vocab) + 1

In [0]:
max_len_story = max(len(data[0]) for data in all_data)
max_len_question = max(len(data[1]) for data in all_data)

### Cleaning and tokenizing the data

In [8]:
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer

Using TensorFlow backend.


In [0]:
tokenizer = Tokenizer(filters = [])
tokenizer.fit_on_texts(vocab)

In [0]:
train_story_text = []
train_question_text = []
train_answer_text = []

for story, question, answer in train_data:
    train_story_text.append(story)
    train_question_text.append(question)

In [0]:
train_story_seq = tokenizer.texts_to_sequences(train_story_text)

### Padding the train and test data

In [0]:
def pad_data(data):

    X_story = []
    X_question = []
    y = []

    for story, question, answer in data:
        story_idx = [tokenizer.word_index[word.lower()]for word in story]
        question_idx = [tokenizer.word_index[word.lower()] for word in question]
        answer_idx = np.zeros(len(tokenizer.word_index) + 1)
        answer_idx[tokenizer.word_index[answer]] = 1

        X_story.append(story_idx)
        X_question.append(question_idx)
        y.append(answer_idx)
        
    return (pad_sequences(X_story, maxlen = max_len_story), pad_sequences(X_question, maxlen = max_len_question), np.array(y))

In [0]:
train_story, train_question, train_answer = pad_data(train_data)
test_story, test_question, test_answer = pad_data(test_data)

### Creating the model

In [0]:
from keras.models import Sequential, Model
from keras.layers.embeddings import Embedding
from keras.layers import Input, Activation, Dense, Permute, Dropout
from keras.layers import add, dot, concatenate
from keras.layers import LSTM

In [15]:
# Placeholders for inputs
story_input = Input((max_len_story, ))
question_input = Input((max_len_question, ))





***Input encoder m***

In [16]:
# Input gets embedded into a sequence of vectors
# Output -> (samples, max_len_story, embedding_dim)

input_encoder_m = Sequential()
input_encoder_m.add(Embedding(input_dim = vocab_len, output_dim = 64))
input_encoder_m.add(Dropout(0.3))



Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


***Input encoder c***

In [0]:
# Input gets embedded into a sequence of vectors of size max_len_question
# Output -> (samples, max_len_story, max_len_question)

input_encoder_c = Sequential()
input_encoder_c.add(Embedding(input_dim = vocab_len, output_dim = max_len_question))
input_encoder_c.add(Dropout(0.3))

***Question encoder***

In [0]:
# Question gets embedded into sequence of vectors
# Output -> (samples, max_len_question, embedding_dim)

question_encoder = Sequential()
question_encoder.add(Embedding(input_dim = vocab_len, output_dim = 64, input_length = max_len_question))
question_encoder.add(Dropout(0.3))

### Encode the sequences

In [0]:
# Encode the input and question sequences(which are indices) to sequences of dense vectors

input_encoded_m = input_encoder_m(story_input)
input_encoded_c = input_encoder_c(story_input)
question_encoded = question_encoder(question_input)

***Compute dot product between input_encoded_m and question_encoded***

We choose the encoded axis for dot product.

In [0]:
match = dot([input_encoded_m, question_encoded], axes = (2, 2))
match = Activation('softmax')(match)

***Add this match matrix with input_encoded_c***

In [0]:
response = add([match, input_encoded_c]) # Shape -> (samples, max_len_story, max_len_question)
response = Permute((2, 1))(response) # Shape -> (samples, max_len_question, max_len_story)

***Concatenate results***

In [0]:
answer = concatenate([response, question_encoded])

***Reduce the layers***

In [0]:
# Add LSTM layer
answer = LSTM(32)(answer)

# Dropout layer
answer = Dropout(0.5)(answer)

# Decode the output
answer = Dense(vocab_len)(answer)

In [24]:
# Convert output to either 0 or 1
answer = Activation('softmax')(answer)

# Build the model ([Input placeholder], result)
model = Model([story_input, question_input], answer)

# Compile the model
model.compile(optimizer = 'rmsprop', loss = 'categorical_crossentropy', metrics = ['accuracy'])





In [25]:
print(model.summary())

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 156)          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 6)            0                                            
__________________________________________________________________________________________________
sequential_1 (Sequential)       multiple             2432        input_1[0][0]                    
__________________________________________________________________________________________________
sequential_3 (Sequential)       (None, 6, 64)        2432        input_2[0][0]                    
____________________________________________________________________________________________

### Setting up ModelCheckPoint, EarlyStopping and ReduceLROnPlateau

In [0]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

checkpoint = ModelCheckpoint('./chatbot.h5',
                             monitor='val_loss',
                             mode="min",
                             save_best_only = True,
                             verbose=1)

earlystop = EarlyStopping(monitor = 'val_loss', 
                          min_delta = 0, 
                          patience = 15,
                          verbose = 1,
                          restore_best_weights = True)

reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                              factor = 0.2,
                              patience = 15,
                              verbose = 1,
                              min_delta = 0.0001)

callbacks = [earlystop, checkpoint, reduce_lr]

### Train the model

In [27]:
history = model.fit([train_story, train_question], train_answer, batch_size = 32, epochs = 50,
                     validation_data = ([test_story, test_question], test_answer), callbacks = callbacks)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 10000 samples, validate on 1000 samples
Epoch 1/50






Epoch 00001: val_loss improved from inf to 0.69463, saving model to ./chatbot.h5
Epoch 2/50

Epoch 00002: val_loss improved from 0.69463 to 0.69324, saving model to ./chatbot.h5
Epoch 3/50

Epoch 00003: val_loss did not improve from 0.69324
Epoch 4/50

Epoch 00004: val_loss improved from 0.69324 to 0.69313, saving model to ./chatbot.h5
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.69313
Epoch 6/50

Epoch 00006: val_loss did not improve from 0.69313
Epoch 7/50

Epoch 00007: val_loss did not improve from 0.69313
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.69313
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.69313
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.69313
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.69313
Epoch 12/50

Epoch 00012: val_loss improved from 0.69313

### Testing the model

In [0]:
predictions = model.predict(([test_story, test_question]))

In [29]:
story =' '.join(word for word in test_data[0][0])
print(story)

Mary got the milk there . John moved to the bedroom .


In [30]:
query = ' '.join(word for word in test_data[0][1])
print(query)

Is John in the kitchen ?


In [31]:
print('Actual answer: ',test_data[0][2])

Actual answer:  no


In [32]:
val_max = np.argmax(predictions[0])

for key, val in tokenizer.word_index.items():
    if val == val_max:
        k = key

print("Predicted answer: ", k)
print("Confidence in prediction: ", predictions[0][val_max])

Predicted answer:  no
Confidence in prediction:  0.9694801
