## Memory Networks

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import re

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, Concatenate, Activation, dot, Lambda, Reshape, Add
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import RMSprop
import tensorflow.keras.backend as K

## Single Supporting Fact

In [2]:
data_dir = "C:\\IMP\\datasets\\bAbI_datasets\\tasks_1-20_v1-2\\en"

def load_data(given_dir):
    with open("{}\\{}".format(data_dir, given_dir), encoding="utf8") as f:
        story = []
        question = []
        answer = []
        data = []
        for line in f:
                number, sentence = line.split(" ", 1)

                # New story
                if int(number) == 1:
                    story = []

                tokenized_story = re.findall(r"[A-Za-z]+|[,.?]", sentence.strip())

                # Answer and the supporting number is in the line both seperated by a tab.
                if "\t" in sentence:
                    question, answer, supporting_number = sentence.split("\t")
                    tokenized_question = re.findall(r"[A-Za-z]+|[,.?]", question.strip())
                    tokenized_answer = re.findall(r"[A-Za-z]+|[,.?]", answer.strip())
                    story_so_far = [[str(i)] + s for i, s in enumerate(story)]
                    data.append((story_so_far, tokenized_question, tokenized_answer))
                else:
                    story.append(tokenized_story)
        
        return data

df_train = load_data("qa1_single-supporting-fact_train.txt")
df_test = load_data("qa1_single-supporting-fact_test.txt")

In [3]:
def get_mappings(data):
    # Get the mappings
    word2idx = {"<PAD>": 0}

    count = 1
    for stories, question, answer in data:
        for story in stories:
            for word in story:
                if word not in word2idx:
                    word2idx[word] = count 
                    count += 1
        for word in question:
            if word not in word2idx:
                word2idx[word] = count 
                count += 1
        for word in answer:
            if word not in word2idx:
                word2idx[word] = count 
                count += 1

    idx2word = {v: k for k, v in word2idx.items()}
    
    return word2idx, idx2word

In [4]:
all_data = df_train + df_test
word2idx, idx2word = get_mappings(all_data)

max_input_len = max([len(story) for s, q, a in all_data for story in s])
max_query_len = max([len(story) for s, q, a in all_data for story in s])
max_no_of_sentences_in_story = max([len(s) for s, q, a in all_data])

In [5]:
def encode_mappings(data, max_input_len, max_query_len):
    # Encode the mappings into the data
    inputs, queries, outputs = [], [], []
    for stories, question, answer in data:
        inputs.append([[word2idx[word] for word in story] for story in stories])
        queries.append([word2idx[word] for word in question])
        outputs.append([word2idx[word] for word in answer])

    # Pad sequences
    inputs = [pad_sequences(x, maxlen=max_input_len) for x in inputs]
    queries = pad_sequences(queries, maxlen=max_query_len)
    
    return inputs, queries, np.array(outputs)

inputs_train, queries_train, outputs_train = encode_mappings(df_train, max_input_len, max_query_len)
inputs_test, queries_test, outputs_test = encode_mappings(df_test, max_input_len, max_query_len)

In [6]:
def stack_inputs(inputs, max_input_len, max_no_of_sentences_in_story):
    """
    this is like 'pad_sequences' but for entire stories
    we are padding each story with zeros so every story
    has the same number of sentences
    append an array of zeros of size:
    (max_sentences - num sentences in story, max words in sentence)
    """
    for i, story in enumerate(inputs):
        inputs[i] = np.concatenate(
          [
            story, 
            np.zeros((max_no_of_sentences_in_story - story.shape[0], max_input_len), 'int')
          ]
        )
    return np.stack(inputs)

inputs_train = stack_inputs(inputs_train, max_input_len, max_no_of_sentences_in_story)
inputs_test = stack_inputs(inputs_test, max_input_len, max_no_of_sentences_in_story)

In [7]:
# Model parameters
EMBEDDING_DIM = 15
EPOCHS = 10
BATCH_SIZE = 32

vocab_size = len(word2idx) + 1

In [21]:
# Shapes
# embedded_story -> (vocab_size, max_no_of_sentences_in_story, EMBEDDING_DIM)
# embedded_question -> (vocab_size, 1, EMBEDDING_DIM)

# Model inputs
input_story = Input((max_no_of_sentences_in_story, max_input_len))
input_question = Input((max_query_len, ))

# Model
embedded_story = Embedding(vocab_size, EMBEDDING_DIM)(input_story)
embedded_story = Lambda(lambda x: K.sum(x, axis=2))(embedded_story) # Take sum of word vectors in each sentence

embedded_question = Embedding(vocab_size, EMBEDDING_DIM)(input_question)
embedded_question = Lambda(lambda x: K.sum(x, axis=1))(embedded_question)

embedded_question = Reshape((1, EMBEDDING_DIM))(embedded_question) # So that we can dot it with stories later

# x = Dot(axes=2)([embedded_story, embedded_question])            
x = dot([embedded_story, embedded_question], 2)
x = Reshape((max_no_of_sentences_in_story, ))(x)              # Flatten
x = Activation("softmax")(x)

story_weights = Reshape((max_no_of_sentences_in_story, 1))(x)         # Unflatten it again for dot product

# x = Dot(axes=1)([story_weights, embedded_story])
x = dot([story_weights, embedded_story], 1)
x = Reshape((EMBEDDING_DIM, ))(x)
x = Dense(vocab_size, activation="softmax")(x)

model = Model([input_story, input_question], x)

model.compile(optimizer=RMSprop(lr=1e-2), loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [22]:
model.summary()

Model: "model_15"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 8)]          0                                            
__________________________________________________________________________________________________
input_5 (InputLayer)            [(None, 10, 8)]      0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 8, 15)        495         input_6[0][0]                    
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 10, 8, 15)    495         input_5[0][0]                    
___________________________________________________________________________________________

In [23]:
result = model.fit([inputs_train, queries_train], outputs_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
                   validation_data=([inputs_test, queries_test], outputs_test))

Train on 1000 samples, validate on 1000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [27]:
# Check how we weight each input sentence given a story and question
weights_model = Model([input_story, input_question], story_weights)
result_model = Model([input_story, input_question], x)

# choose a random story
story_idx = np.random.choice(len(inputs_train))

# get weights from debug model
i = inputs_train[story_idx:story_idx+1]
q = queries_train[story_idx:story_idx+1]
weights = weights_model.predict([i, q]).flatten()
idx = result_model.predict([i, q])

story, question, ans = df_train[story_idx]

print("Story:\n")
for i, line in enumerate(story):
    print("{:1.5f}".format(weights[i]), "\t", " ".join(line))

print("Question:", " ".join(question))
print("Answer:", ans[0])
print("Prediction: ", idx2word[np.argmax(idx)])

Story:

0.00005 	 0 Sandra travelled to the office .
0.00031 	 1 Sandra went to the garden .
0.99698 	 2 Mary went back to the garden .
0.00266 	 3 Daniel went to the kitchen .
Question: Where is Mary ?
Answer: garden
Prediction:  garden
