In [33]:
from tensorflow import keras
import numpy as np
import tensorflow as tf

In [6]:
dataset_location = "E:/datasets/babi/tasks_1-20_v1-2/en/qa1_single-supporting-fact_train.txt"
maxlen = 10
num_sentences = 60
embedding_size=200
num_episodes = 1
num_cells = 128

In [19]:
with open(dataset_location) as f:
    data = f.read().split("\n")
sep_data = []
temp = []
for i in range(len(data) - 1):
    if data[i + 1].split(" ")[0] == "1":
        temp.append(" ".join(data[i].split(" ")[1: ]))
        sep_data.append(temp)
        temp = []
    else:
        temp.append(" ".join(data[i].split(" ")[1:]))
sep_data.append(temp)
input_sentences = []
questions = []
answers = []
for i in range(len(sep_data)):
    temp = []
    for j in range(len(sep_data[i])):
        sen_tab_split = sep_data[i][j].split("\t")
        if len(sen_tab_split) == 3:
            input_sentences.append(temp.copy())
            questions.append(sen_tab_split[0])
            answers.append(sen_tab_split[1])
        else:
            temp.append(sep_data[i][j])
tokenizer = keras.preprocessing.text.Tokenizer()
for i in range(len(input_sentences)):
    tokenizer.fit_on_texts(input_sentences[i])
tokenizer.fit_on_texts(questions)
for i in range(len(input_sentences)):
    input_sentences[i] = tokenizer.texts_to_sequences(input_sentences[i])
    input_sentences[i] = keras.preprocessing.sequence.pad_sequences(input_sentences[i], maxlen=maxlen)
questions = tokenizer.texts_to_sequences(questions)
questions = keras.preprocessing.sequence.pad_sequences(questions, maxlen=maxlen)
zeros = np.zeros((num_sentences, maxlen)).tolist()
for i in range(len(input_sentences)):
    input_sentences[i] = zeros + input_sentences[i].tolist()
    input_sentences[i] = input_sentences[i][-num_sentences:]
input_sentences = np.array(input_sentences)
answers = np.array([tokenizer.word_index[i] - 1 for i in answers])

In [45]:
class PositionalEncoder(keras.layers.Layer):
    def __init__(self, input_length, embedding_size, **kwargs):
        self.input_length = input_length
        self.embedding_size = embedding_size
        super(PositionalEncoder, self).__init__(**kwargs)
        
    def build(self, input_shape):
        super(PositionalEncoder, self).build(input_shape)
        
    def get_config(self):
        config = super(PositionalEncoder, self).get_config()
        config["input_length"] = self.input_length
        config["embedding_size"] = self.embedding_size
        return config

    def call(self, x):
        l = []
        for i in range(self.input_length):
            l.append((1 - (i / self.input_length)) + (1 / self.embedding_size) * (1 - (2 * (i / self.input_length))))
        l = [l for i in range(keras.backend.int_shape(x)[1])]
        l = np.array(l)
        l = np.reshape(l, (keras.backend.int_shape(x)[1], self.input_length, 1))
        temp = x * l
        out = keras.layers.Lambda(lambda x: keras.backend.sum(x, axis=2))(temp)
        return out
    
    def compute_output_shape(self, input_shape):
        return (keras.backend.int_shape(input_shape)[1], keras.backend.int_shape(input_shape)[3],)
    

In [46]:
def get_model(input_length, vocab_size, embedding_size, episodes, num_sentences=10, num_cells=128):
    input_sentence = keras.layers.Input(shape=(num_sentences, input_length,))
    question_sentence = keras.layers.Input(shape=(input_length,))
    embeddings = keras.layers.Embedding(vocab_size + 1, embedding_size)
    input_sentence_emb = embeddings(input_sentence)
    question_sentence_emb = embeddings(question_sentence)
    
    pe_features = PositionalEncoder(input_length, embedding_size)(input_sentence_emb)
    gru_features_input = keras.layers.Bidirectional(keras.layers.GRU(num_cells, return_sequences=True))(pe_features)
    gru_features_question = keras.layers.Bidirectional(keras.layers.GRU(num_cells))(question_sentence_emb)
    m = gru_features_question
    attention_dense1_layer = keras.layers.TimeDistributed(keras.layers.Dense(num_cells * 2, activation="tanh"))
    attention_dense2_layer = keras.layers.TimeDistributed(keras.layers.Dense(1))
    rt_kernel_layer = keras.layers.Dense(num_cells * 2, activation=None, use_bias=False)
    rt_recurrent_layer = keras.layers.Dense(num_cells * 2, activation=None)
    h_bar_kernel_layer = keras.layers.Dense(num_cells * 2, activation=None)
    h_bar_recurrent_layer = keras.layers.Dense(num_cells * 2, activation=None, use_bias=False)
    for _ in range(episodes):
        ct_mul_q = keras.layers.Multiply()([gru_features_input, gru_features_question])
        ct_mul_m = keras.layers.Multiply()([gru_features_input, m])
        ct_sub_q = keras.layers.Subtract()([gru_features_input, gru_features_question])
        ct_sub_m = keras.layers.Subtract()([gru_features_input, m])
        absolute_layer = keras.layers.Lambda(lambda x: keras.backend.abs(x))
        attention_features = keras.layers.Concatenate()([ct_mul_q, ct_mul_m, absolute_layer(ct_sub_q), absolute_layer(ct_sub_m)])
        attention_dense1 = attention_dense1_layer(attention_features)
        attention_dense2 = attention_dense2_layer(attention_dense1)
        attention_dense2 = keras.layers.Flatten()(attention_dense2)
        attention_score = keras.layers.Activation("softmax")(attention_dense2)
        attention_score = keras.layers.Reshape((num_sentences, 1))(attention_score)
        gru_feature_input_timestep = keras.layers.Lambda(lambda x: tf.unstack(x, axis=1))(gru_features_input)
        attention_timestep = keras.layers.Lambda(lambda x: tf.unstack(x, axis=1))(attention_score)
        assert len(gru_feature_input_timestep) == num_sentences
        assert len(attention_timestep) == num_sentences
        for i in range(num_sentences):
            h = m
            rt_kernel = rt_kernel_layer(gru_feature_input_timestep[i])
            rt_recurrent = rt_recurrent_layer(h)
            reset_gate = keras.layers.Add()([rt_kernel, rt_recurrent])
            reset_gate = keras.layers.Activation("sigmoid")(reset_gate)
            h_bar_kernel = h_bar_kernel_layer(gru_feature_input_timestep[i])
            h_bar_recurrent = h_bar_recurrent_layer(h)
            h_bar_recurrent = keras.layers.Multiply()([h_bar_recurrent, reset_gate])
            h_bar = keras.layers.Add()([h_bar_recurrent, h_bar_kernel])
            h_bar = keras.layers.Activation("tanh")(h_bar)
            update_attention_gate = keras.layers.RepeatVector(num_cells * 2)(attention_timestep[i])
            update_attention_gate = keras.layers.Flatten()(update_attention_gate)
            one_minus_attention_gate = keras.layers.Lambda(lambda x: np.ones(num_cells * 2) - x)(update_attention_gate)
            h = keras.layers.Add()([keras.layers.Multiply()([update_attention_gate, h_bar]), keras.layers.Multiply()([one_minus_attention_gate, h])])
        m = keras.layers.Dense(num_cells * 2, activation="relu")(keras.layers.Concatenate()([m, h, gru_features_question]))
    dense_output = keras.layers.Dense(vocab_size, activation="softmax")(keras.layers.Concatenate()([m, gru_features_question]))
    model = keras.models.Model(inputs=[input_sentence, question_sentence], outputs=dense_output)
    return model
        
            

In [50]:
model = get_model(maxlen, len(tokenizer.word_index), embedding_size, num_episodes, num_sentences, num_cells)
model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"])
model_checkpoint = keras.callbacks.ModelCheckpoint("model.{epoch:02d}-{val_loss:.2f}.hdf5", save_best_only=True)
model.fit([input_sentences, questions], answers, epochs=100, batch_size=50, validation_split=0.1)

Train on 900 samples, validate on 100 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100


Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0xc582cfbcc0>