In [1]:
from __future__ import print_function

import keras
from keras.models import Sequential, Model
from keras.layers.embeddings import Embedding
from keras.layers import Input, Activation, Dense, Permute, Dropout, add, dot, concatenate, Dot, Merge, Lambda, multiply
from keras.layers import LSTM
from keras.utils.data_utils import get_file
from keras.preprocessing.sequence import pad_sequences
from sklearn import cross_validation, metrics
from functools import reduce
from itertools import chain

import tarfile
import numpy as np
import re
import os


def tokenize(sent):
    '''Return the tokens of a sentence including punctuation.
    >>> tokenize('Bob dropped the apple. Where is the apple?')
    ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?']
    '''
    return [x.strip() for x in re.split('(\W+)?', sent) if x.strip()]


def load_task(data_dir, task_id, only_supporting=False):
    '''Load the nth task. There are 20 tasks in total.

    Returns a tuple containing the training and testing data for the task.
    '''
    assert task_id > 0 and task_id < 21

    files = os.listdir(data_dir)
    files = [os.path.join(data_dir, f) for f in files]
    s = 'qa{}_'.format(task_id)
    train_file = [f for f in files if s in f and 'train' in f][0]
    test_file = [f for f in files if s in f and 'test' in f][0]
    train_data = get_stories(train_file, only_supporting)
    test_data = get_stories(test_file, only_supporting)
    return train_data, test_data

def parse_stories(lines, only_supporting=False):
    '''Parse stories provided in the bAbI tasks format
    If only_supporting is true, only the sentences that support the answer are kept.
    '''
    data = []
    story = []
    for line in lines:
        line = str.lower(line)
        nid, line = line.split(' ', 1)
        nid = int(nid)
        if nid == 1:
            story = []
        if '\t' in line: # question
            q, a, supporting = line.split('\t')
            q = tokenize(q)
            #a = tokenize(a)
            # answer is one vocab word even if it's actually multiple words
            a = [a]
            substory = None

            # remove question marks
            if q[-1] == "?":
                q = q[:-1]

            if only_supporting:
                # Only select the related substory
                supporting = map(int, supporting.split())
                substory = [story[i - 1] for i in supporting]
            else:
                # Provide all the substories
                substory = [x for x in story if x]

            data.append((substory, q, a))
            story.append('')
        else: # regular sentence
            # remove periods
            sent = tokenize(line)
            if sent[-1] == ".":
                sent = sent[:-1]
            story.append(sent)
    return data


def get_stories(f, only_supporting=False):
    '''Given a file name, read the file, retrieve the stories, and then convert the sentences into a single story.
    If max_length is supplied, any stories longer than max_length tokens will be discarded.
    '''
    with open(f) as f:
        return parse_stories(f.readlines(), only_supporting=only_supporting)
    
def vectorize_data(data, word_idx, sentence_size, memory_size):
    """
    Vectorize stories and queries.

    If a sentence length < sentence_size, the sentence will be padded with 0's.

    If a story length < memory_size, the story will be padded with empty memories.
    Empty memories are 1-D arrays of length sentence_size filled with 0's.

    The answer array is returned as a one-hot encoding.
    """
    S = []
    Q = []
    A = []
    for story, query, answer in data:
        ss = []
        for i, sentence in enumerate(story, 1):
            ls = max(0, sentence_size - len(sentence))
            ss.append([word_idx[w] for w in sentence] + [0] * ls)

        # take only the most recent sentences that fit in memory
        ss = ss[::-1][:memory_size][::-1]

        # pad to memory_size
        lm = max(0, memory_size - len(ss))
        for _ in range(lm):
            ss.append([0] * sentence_size)

        lq = max(0, sentence_size - len(query))
        q = [word_idx[w] for w in query] + [0] * lq

#        y = np.zeros(len(word_idx) + 1) # 0 is reserved for nil word
#        for a in answer:
#            y[word_idx[a]] = 1

        S.append(ss)
        Q.append(q)
        A.append(word_idx[answer[0]])
    return np.array(S), np.array(Q), np.array(A)


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
path_1k = '/home/ro/dataset/tasks_1-20_v1-2/en/'
path_10k = '/home/ro/dataset/tasks_1-20_v1-2/en-10k/'

In [3]:
# task data
train, test = load_task(path_1k, 1)
data = train + test

vocab = sorted(reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a) for s, q, a in data)))
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

max_story_size = max(map(len, (s for s, _, _ in data)))
mean_story_size = int(np.mean([ len(s) for s, _, _ in data ]))
sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data)))
query_size = max(map(len, (q for _, q, _ in data)))
memory_size = min(320, max_story_size)

vocab_size = len(word_idx) + 1 # +1 for nil word
sentence_size = max(query_size, sentence_size) # for the position

print("Vocab size", vocab_size)
print("Longest sentence length", sentence_size)
print("Longest story length", max_story_size)
print("Average story length", mean_story_size)
print("Query size", query_size)

# train/validation/test sets
trainS, trainQ, trainA = vectorize_data(train, word_idx, sentence_size, memory_size)
testS, testQ, testA = vectorize_data(test, word_idx, sentence_size, memory_size)

print('trainS example', trainS[0])
print('trainQ example', trainQ[0])
print('trainA example', trainA[0])

print('-')
print('trainS shape:', trainS.shape)
print('testS shape:', testS.shape)
print('-')
print('trainQ shape:', trainQ.shape)
print('testQ shape:', testQ.shape)
print('-')
print('trainA shape:', trainA.shape)
print('testA shape:', testA.shape)
print('-')

Vocab size 20
Longest sentence length 6
Longest story length 10
Average story length 6
Query size 3
trainS example [[11 12 16 15  2  0]
 [ 8 18 16 15  6  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
trainQ example [19  7 11  0  0  0]
trainA example 2
-
trainS shape: (1000, 10, 6)
testS shape: (1000, 10, 6)
-
trainQ shape: (1000, 6)
testQ shape: (1000, 6)
-
trainA shape: (1000,)
testA shape: (1000,)
-


  return _compile(pattern, flags).split(string, maxsplit)


In [41]:
from keras import backend as K

d = 20
k_hop = 3

# placeholders
story = Input(shape=(max_story_size, sentence_size,))
question = Input(shape=(sentence_size,))

# encoders
emb_A = []
for i in range(k_hop + 1) :
    emb_A.append(Embedding(input_dim=vocab_size, output_dim=d, embeddings_initializer='random_normal'))

m_emb = []
c_emb = []
u_emb = []

m_emb.append(emb_A[0](story))
m_emb[0] = Lambda(lambda x: K.sum(x, axis=2))(m_emb[0])

u_emb.append(emb_A[0](question))
u_emb[0] = Lambda(lambda x: K.sum(x, axis=1))(u_emb[0])

for i in range(k_hop) :
    u_temp = Lambda(lambda x: K.expand_dims(x, 1))(u_emb[i])
    u_temp = Lambda(lambda x: K.tile(x, (1, max_story_size, 1)))(u_temp)

    probs = multiply([m_emb[i], u_temp])
    probs = Lambda(lambda x: K.sum(x, axis=2))(probs)
    probs = Activation('softmax')(probs)
    probs_temp = Lambda(lambda x: K.expand_dims(x, 2))(probs)
    probs_temp = Lambda(lambda x: K.tile(x, (1, 1, d)))(probs_temp)
    
    c_emb.append(emb_A[i + 1](story))
    c_emb[i] = Lambda(lambda x: K.sum(x, axis=2))(c_emb[i])
    
    o_weight = multiply([c_emb[i], probs_temp])
    o_weight = Lambda(lambda x: K.sum(x, axis=1))(o_weight)

    u_emb.append(add([o_weight, u_emb[i]]))

    m_emb.append(emb_A[i + 1](story))
    m_emb[i + 1] = Lambda(lambda x: K.sum(x, axis=2))(m_emb[i + 1])

answer = Dense(vocab_size, activation='softmax', kernel_initializer='random_normal')(u_emb[-1])
#answer = Activation('softmax')(answer)

from keras import optimizers

sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model = Model([story, question], answer)
model.compile(optimizer=sgd, loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_31 (InputLayer)           (None, 10, 6)        0                                            
__________________________________________________________________________________________________
input_32 (InputLayer)           (None, 6)            0                                            
__________________________________________________________________________________________________
embedding_61 (Embedding)        multiple             400         input_31[0][0]                   
                                                                 input_32[0][0]                   
__________________________________________________________________________________________________
lambda_392 (Lambda)             (None, 20)           0           embedding_61[1][0]               
__________

In [42]:
# train
model.fit([trainS, trainQ], trainA,
          batch_size=16,
          epochs=100,
          validation_data=([testS, testQ], testA))

Train on 1000 samples, validate on 1000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100

<keras.callbacks.History at 0x7f7c05f7f2e8>

In [44]:
model.evaluate([testS, testQ], testA, batch_size=32)



[1.0210048589706422, 0.626]

## -------------------

In [None]:
from keras import backend as K

d = 3
vocab_size = 4
max_story_size = 2
sentence_size = 5

In [79]:
# placeholders
story = Input(shape=(max_story_size, sentence_size,))
question = Input(shape=(sentence_size,))

# encoders
shared_memory_a = Embedding(input_dim=vocab_size, output_dim=d, embeddings_initializer='ones')
shared_memory_b = Embedding(input_dim=vocab_size, output_dim=d, embeddings_initializer='ones')
shared_memory_c = Embedding(input_dim=vocab_size, output_dim=d)

memory_m1 = shared_memory_a(story)
memory_m1 = Lambda(lambda x: K.sum(x, axis=2))(memory_m1)
memory_c1 = shared_memory_c(story)
memory_c1 = Lambda(lambda x: K.sum(x, axis=2))(memory_c1)
u1 = shared_memory_b(question)
u1 = Lambda(lambda x: K.sum(x, axis=1))(u1)

model_1 = Model(question, u1)
model_1.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=['accuracy'])
model_1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_45 (InputLayer)        (None, 5)                 0         
_________________________________________________________________
embedding_72 (Embedding)     (None, 5, 3)              12        
_________________________________________________________________
lambda_158 (Lambda)          (None, 3)                 0         
Total params: 12
Trainable params: 12
Non-trainable params: 0
_________________________________________________________________


In [80]:
s = np.array([[[1,1,2,0,3],[3,2,2,1,3]]])
q = np.array([[1,1,2,0,3]])
a = np.array([1])
print(q.shape)
res = model_1.predict(q)
res

(1, 2, 5)
(1, 5)
(1,)


array([[5., 5., 5.]], dtype=float32)

In [98]:
u1 = Input(shape=(d,))
memory_m1 = Input(shape=(max_story_size, d,))

u1_temp = Lambda(lambda x: K.expand_dims(x, 1))(u1)
u1_temp = Lambda(lambda x: K.tile(x, (1, max_story_size, 1)))(u1_temp)
probs = multiply([memory_m1, u1_temp])
probs = Lambda(lambda x: K.sum(x, axis=2))(probs)
probs = Activation('softmax')(probs)
model_2 = Model([memory_m1, u1], probs)
model_2.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=['accuracy'])
model_2.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_65 (InputLayer)           (None, 3)            0                                            
__________________________________________________________________________________________________
lambda_173 (Lambda)             (None, 1, 3)         0           input_65[0][0]                   
__________________________________________________________________________________________________
input_66 (InputLayer)           (None, 2, 3)         0                                            
__________________________________________________________________________________________________
lambda_174 (Lambda)             (None, 2, 3)         0           lambda_173[0][0]                 
__________________________________________________________________________________________________
multiply_4

In [99]:
m1 = np.array([[[1,2,3],[4,5,6]]])
u1 = np.array([[3,2,4]])
a = np.array([1])
print(u1.shape)
res = model_2.predict([m1, u1])
res

(1, 3)


array([[1.8795287e-12, 1.0000000e+00]], dtype=float32)

In [104]:
probs = Input(shape=(max_story_size,))
memory_c1 = Input(shape=(max_story_size, d,))

probs_temp = Lambda(lambda x: K.expand_dims(x, 2))(probs)
probs_temp = Lambda(lambda x: K.tile(x, (1, 1, d)))(probs_temp)

o_weight = multiply([memory_c1, probs_temp])
o_weight = Lambda(lambda x: K.sum(x, axis=1))(o_weight)

model_2 = Model([memory_c1, probs], o_weight)
model_2.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=['accuracy'])
model_2.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_72 (InputLayer)           (None, 2)            0                                            
__________________________________________________________________________________________________
lambda_182 (Lambda)             (None, 2, 1)         0           input_72[0][0]                   
__________________________________________________________________________________________________
input_73 (InputLayer)           (None, 2, 3)         0                                            
__________________________________________________________________________________________________
lambda_183 (Lambda)             (None, 2, 3)         0           lambda_182[0][0]                 
__________________________________________________________________________________________________
multiply_4

In [107]:
c1 = np.array([[[1,2,3],[4,5,6]]])
p = np.array([[0.2,0.8]])
a = np.array([1])
print(u1.shape)
res = model_2.predict([c1, p])
res

(1, 3)


array([[3.4, 4.4, 5.4]], dtype=float32)

In [151]:
u = Input(shape=(d,))

u_temp = Lambda(lambda x: K.expand_dims(x, 1))(u)
u_temp = Lambda(lambda x: K.tile(x, (1, vocab_size, 1)))(u_temp)
answer = Lambda(lambda x: np.array(shared_memory_a.get_weights()[0]) * x)(u_temp)
answer = Lambda(lambda x: K.sum(x, axis=2))(answer)
#answer = Activation('softmax')(answer)

model = Model(u, answer)
model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_86 (InputLayer)        (None, 3)                 0         
_________________________________________________________________
lambda_193 (Lambda)          (None, 1, 3)              0         
_________________________________________________________________
lambda_194 (Lambda)          (None, 4, 3)              0         
_________________________________________________________________
lambda_195 (Lambda)          (None, 4, 3)              0         
_________________________________________________________________
lambda_196 (Lambda)          (None, 4)                 0         
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________


In [164]:
o1 = np.array([[4,5,6]])
u1 = np.array([[3,2,4]])
a = np.array([1])
print(u1.shape)
res = model.fit(u1, a,epochs = 10)
res = model.predict(u1)
res

(1, 3)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


array([[36., 13., 45.,  9.]], dtype=float32)

In [165]:
w = shared_memory_a.get_weights()[0]
w[2] = 5
w[0] = 3
w[0][0] = 8
w[1][2] = 3
w

array([[8., 3., 3.],
       [1., 1., 3.],
       [5., 5., 5.],
       [1., 1., 1.]], dtype=float32)

In [163]:
shared_memory_a.set_weights([w])

In [5]:
from keras.utils import plot_model
plot_model(model, to_file='model_single.png', show_shapes = True)
