In [0]:
from __future__ import absolute_import
from __future__ import print_function

from sklearn import metrics
from itertools import chain
from six.moves import range, reduce
import re

import tensorflow as tf
from tensorflow import keras

import numpy as np

from google.colab import files

import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib import cm

In [0]:
import pickle

def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [0]:
# ############################ data utils

def load_task3(train_file, only_supporting=False, test_count=10):
    train2_data = list()
    for line in train_file:
        line = line.replace('\n', '')
        _, qna = line.split('##')
        if (len(qna.split(' ')) == 2):
            train2_data.append(line)

    train_data = parse_stories3(train2_data[:-1 * test_count], -1, only_supporting)
    test_data = parse_stories3(train2_data[-1 * test_count:], -1, only_supporting)
    return train_data, test_data

def tokenize(sent):
    '''Return the tokens of a sentence including punctuation.
    >>> tokenize('Bob dropped the apple. Where is the apple?')
    ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?']
    '''
    return [x.strip() for x in re.split('(\W+)?', sent) if x.strip()]


def parse_stories3(lines, q_position=-1, only_supporting=False):
    '''Parse stories provided in the bAbI tasks format
    If only_supporting is true, only the sentences that support the answer are kept.
    '''
    # In circle O , diameter AB is perpendicular to chord CD at E .##perpendicular AB CD

    data = []
    story = []
    for line in lines:
        if(len(lines)>1):
            q_position = -1
        story = []
        line = line.replace('\n', '')
        fact1, qna = line.split('##')

        fact1 = tokenize(fact1)
        if fact1[-1] == "?" or fact1[-1] == ".":
            fact1 = fact1[:-1]

        for word in fact1:
            story.append(word)
        qna = qna.split(' ')
        if (len(qna) == 2):
            q1, a1 = qna[:-1], qna[-1:]
            if not re.search(r'\b(([A-Z]+)|([a-z]))\b', a1[0]):
                continue
            q1 = ' '.join(q1)
            q1 = tokenize(q1)
            min_dis = 0
            if (q_position == -1):
                q_position, a_index, min_dis = get_q_index(fact1, a1, q1)
                if (min_dis == 0):
                    continue
            data.append((story, a1, q1, q_position))
    return data


def vectorize_datas(data, word_idx, sentence_size, memory_size, q_position=-1):
    """
    Vectorize stories and queries.

    If a sentence length < sentence_size, the sentence will be padded with 0's.

    If a story length < memory_size, the story will be padded with empty memories.
    Empty memories are 1-D arrays of length sentence_size filled with 0's.

    The answer array is returned as a one-hot encoding.
    """
    S = []
    Q = []
    A = []
    for story, query, answer, q_position in data:
        if (q_position == -1):
            q_position = story.index(query[0])

        if (answer[0] not in story):
            continue
        ss = []
        for i, sentence in enumerate(story, 1):
            ls = 1
            ss.append([word_idx[sentence]] + [0] * ls)
        ss = ss[::-1][:memory_size][::-1]

        ss[q_position][-1] = len(word_idx) - memory_size
        count = 0
        for i in range(q_position - 1, -1, -1):
            count += 1
            ss[i][-1] = len(word_idx) - memory_size

        count = 0
        for j in range(q_position + 1, len(ss)):
            count += 1
            ss[j][-1] = len(word_idx) - memory_size

        lm = max(0, memory_size - len(ss))
        for _ in range(lm):
            ss.append([0, 0])
        lq = max(0, sentence_size - len(query))
        q = [word_idx[w] for w in query] + [len(word_idx) - memory_size] * lq

        y = np.zeros(memory_size)  # 0 is reserved for nil word
        for a in answer:
            y = np.zeros(memory_size)
            y[story.index(answer[0])] = 1

        S.append(ss)
        Q.append(q)
        A.append(y)
    return S, np.array(Q), np.array(A)


def vectorize_data(story, query, answer, word_idx,
                   sentence_size, memory_size, q_position=-1, S=[], Q=[], A=[]):
    S = []
    Q = []
    A = []
    ss = []
    if (q_position == -1):
        q_position = story.index(query[0])
    for i, sentence in enumerate(story, 1):
        ls = 1
        ss.append([word_idx[sentence]] + [0] * ls)
    ss = ss[::-1][:memory_size][::-1]
    ss[q_position][-1] = len(word_idx) - memory_size
    count = 0
    for i in range(q_position - 1, -1, -1):
        count += 1
        ss[i][-1] = len(word_idx) - memory_size

    count = 0
    for j in range(q_position + 1, len(ss)):
        count += 1
        ss[j][-1] = len(word_idx) - memory_size
    lm = max(0, memory_size - len(ss))
    for _ in range(lm):
        ss.append([0, 0])
    lq = max(0, sentence_size - len(query))
    q = [word_idx[w] for w in query] + [len(word_idx) - memory_size] * lq

    y = np.zeros(len(word_idx) + 1)  # 0 is reserved for nil word
    for a in answer:
        y = np.zeros(memory_size)
        y[0] = 1

    S.append(ss)
    Q.append(q)
    A.append(y)
    return S, Q, A


def vectorize_question(story, query, answer, word_idx, sentence_size, memory_size, S=[], Q=[], A=[]):
    Q = []
    lq = max(0, sentence_size - len(query))
    q = [word_idx[w] for w in query] + [0] * lq
    Q.append(q)
    return Q


def position_encoding(sentence_size, embedding_size):
    """
    Position Encoding described in section 4.1 [1]
    """
    encoding = np.ones((embedding_size, sentence_size), dtype=np.float32)
    ls = sentence_size + 1
    le = embedding_size + 1
    for i in range(1, le):
        for j in range(1, ls):
            encoding[i - 1, j - 1] = (i - (embedding_size + 1) / 2) * (j - (sentence_size + 1) / 2)
    encoding = 1 + 4 * encoding / embedding_size / sentence_size
    encoding[:, -1] = 1.0
    return np.transpose(encoding)


def zero_nil_slot(t, name=None):
    """
    Overwrites the nil_slot (first row) of the input Tensor with zeros.

    The nil_slot is a dummy slot and should not be trained and influence
    the training algorithm.
    """
    with tf.op_scope([t], name, "zero_nil_slot") as name:
        t = tf.convert_to_tensor(t, name="t")
        s = tf.shape(t)[1]
        z = tf.zeros(tf.stack([1, s]))
        return tf.concat(axis=0, values=[z, tf.slice(t, [1, 0], [-1, -1])], name=name)


def add_gradient_noise(t, stddev=1e-3, name=None):
    """
    Adds gradient noise as described in http://arxiv.org/abs/1511.06807 [2].

    The input Tensor `t` should be a gradient.

    The output will be `t` + gaussian noise.

    0.001 was said to be a good fixed value for memory networks [2].
    """
    with tf.op_scope([t, stddev], name, "add_gradient_noise") as name:
        t = tf.convert_to_tensor(t, name="t")
        gn = tf.random_normal(tf.shape(t), stddev=stddev)
        return tf.add(t, gn, name=name)


def get_q_index(fact1, q1, a1):
    a_indices = [i for i, x in enumerate(fact1) if x == a1[0]]
    q_indices = [i1 for i1, x1 in enumerate(fact1) if x1 == q1[0]]
    a_indices = np.sort(a_indices)
    q_indices = np.sort(q_indices)
    a_index = 0
    q_index = 0
    min_dis = [0]
    for qi in q_indices:
        for ai in a_indices:
            if not (ai > qi):
                continue
            a_index = ai
            q_index = qi
            min_dis = np.absolute([qi - ai])
            break
    if (min_dis[0] == 0):
        for ai in a_indices:
            for qi in q_indices:
                if qi < ai:
                    continue
                a_index = ai
                q_index = qi
                if ('is' in fact1[ai:qi]):
                    return q_index, a_index, min_dis[0]
                min_dis = np.absolute([qi - ai])
                break

    return q_index, a_index, min_dis[0]


In [0]:
keywords = open('keywords.txt','r').readlines()
keyword_dict = dict(zip([i.split('##')[0] for i in keywords], [int(i.split('##')[1]) for i in keywords]))

evaluate_rule_list_file = open("evaluate_rule_file_v1.txt", "r") .readlines()
evaluate_data = list()
evaluate_rules = list()
rule_dict = dict()
for qline in evaluate_rule_list_file:
  qkey = qline.split('##')[0]
  qLinewrules = qline.split('##')[1]
  evaluate_data.append(qline.split('$$')[0])
  rule_list = qLinewrules.split('$$')[1].split('$')
  rule_dict[qkey] = [rule.replace('\n','').strip() for rule in rule_list if rule.replace('\n','').strip()>0 ]
  rule_dict[qkey] = rule_dict[qkey][:-1]
  evaluate_rules.extend(rule_dict[qkey])
 
  

In [0]:
class MemN2N(object):
    """End-To-End Memory Network."""

    def __init__(self, batch_size, vocab_size, sentence_size, memory_size, embedding_size,
                 hops=3,
                 max_grad_norm=40.0,
                 nonlin=None,
                 initializer=tf.random_normal_initializer(stddev=0.1),
                 encoding=position_encoding,
                 session=tf.Session(),
                 name='MemN2N'):
        """Creates an End-To-End Memory Network

        Args:
            batch_size: The size of the batch.

            vocab_size: The size of the vocabulary (should include the nil word). The nil word
            one-hot encoding should be 0.

            sentence_size: The max size of a sentence in the data. All sentences should be padded
            to this length. If padding is required it should be done with nil one-hot encoding (0).

            memory_size: The max size of the memory. Since Tensorflow currently does not support jagged arrays
            all memories must be padded to this length. If padding is required, the extra memories should be
            empty memories; memories filled with the nil word ([0, 0, 0, ......, 0]).

            embedding_size: The size of the word embedding.

            hops: The number of hops. A hop consists of reading and addressing a memory slot.
            Defaults to `3`.

            max_grad_norm: Maximum L2 norm clipping value. Defaults to `40.0`.

            nonlin: Non-linearity. Defaults to `None`.

            initializer: Weight initializer. Defaults to `tf.random_normal_initializer(stddev=0.1)`.

            optimizer: Optimizer algorithm used for SGD. Defaults to `tf.train.AdamOptimizer(learning_rate=1e-2)`.

            encoding: A function returning a 2D Tensor (sentence_size, embedding_size). Defaults to `position_encoding`.

            session: Tensorflow Session the model is run with. Defaults to `tf.Session()`.

            name: Name of the End-To-End Memory Network. Defaults to `MemN2N`.
        """

        self._batch_size = batch_size
        self._vocab_size = vocab_size
        self._sentence_size = sentence_size
        self._memory_size = memory_size
        self._embedding_size = embedding_size
        self._hops = hops
        self._max_grad_norm = max_grad_norm
        self._nonlin = nonlin
        self._init = initializer
        self._name = name

        self._build_inputs()
        self._build_vars()

        self._opt = tf.train.AdamOptimizer(learning_rate=self._lr)

        self._encoding = tf.constant(encoding(self._sentence_size, self._embedding_size), name="encoding")

        logits= self._inference(self._stories, self._queries)  # (batch_size, vocab_size)

        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                                labels=tf.cast(self._answers, tf.float32),
                                                                name="cross_entropy")
        cross_entropy_sum = tf.reduce_sum(cross_entropy, name="cross_entropy_sum")
        loss_op = cross_entropy_sum
        grads_and_vars = self._opt.compute_gradients(loss_op)
        grads_and_vars1 = []
        for grad, var in grads_and_vars:
            if grad is not None:
                grads_and_vars1.append([grad, var])
            else:
                grads_and_vars1.append([tf.zeros_like(var), var])

        grads_and_vars = grads_and_vars1

        grads_and_vars = [(tf.clip_by_norm(g, self._max_grad_norm), v) for g, v in grads_and_vars]
        grads_and_vars = [(add_gradient_noise(g), v) for g, v in grads_and_vars]
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self._nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))
        train_op = self._opt.apply_gradients(nil_grads_and_vars, name="train_op")

        predict_out = logits
        predict_op = tf.argmax(logits, 1, name="predict_op")
        predict_proba_op = tf.nn.softmax(logits, name="predict_proba_op")
        predict_log_proba_op = tf.log(predict_proba_op, name="predict_log_proba_op")
        predict_att = tf.nn.softmax(att, name="predict_att")

        self.loss_op = loss_op
        self.predict_op = predict_op
        self.predict_out = predict_out
        self.predict_att = predict_att

        self.predict_proba_op = predict_proba_op
        self.predict_log_proba_op = predict_log_proba_op
        self.train_op = train_op

        init_op = tf.global_variables_initializer()
        self._sess = session
        self._sess.run(init_op)

    def _build_inputs(self):
        self._stories = tf.placeholder(tf.int32, [None, self._memory_size, self._sentence_size], name="stories")
        self._queries = tf.placeholder(tf.int32, [None, self._sentence_size], name="queries")
        self._answers = tf.placeholder(tf.int32, [None, self._memory_size], name="answers")
        self._lr = tf.placeholder(tf.float32, [], name="learning_rate")

    def _build_vars(self):
        with tf.variable_scope(self._name):
            nil_word_slot = tf.zeros([1, self._embedding_size])
            A = tf.concat(axis=0, values=[nil_word_slot, self._init([self._vocab_size - 1, self._embedding_size])])
            C = tf.concat(axis=0, values=[nil_word_slot, self._init([self._vocab_size - 1, self._embedding_size])])

            Q_emb = tf.concat(axis=0, values=[nil_word_slot, self._init([self._vocab_size - 1, self._embedding_size])])

            self.A_1 = tf.Variable(A, name="A")
            self.Q_1 = tf.Variable(Q_emb, name="Q_emb")

            self.C = []

            for hopn in range(self._hops-1):
                with tf.variable_scope('hop_{}'.format(hopn)):
                    self.C.append(tf.Variable(C, name="C"))
        self._nil_vars = set([self.A_1.name] + [x.name for x in self.C])

    def _inference(self, stories, queries):
        with tf.variable_scope(self._name):
            q_emb = tf.nn.embedding_lookup(self.Q_1, queries)
            u_0 = tf.reduce_sum(q_emb * self._encoding, 1)
            u = [u_0]
            m_emb_A = tf.nn.embedding_lookup(self.A_1, stories)
            m_A = tf.reduce_sum(m_emb_A * self._encoding, 2)
            u_temp = tf.transpose(tf.expand_dims(u[-1], -1), [0, 2, 1])
            dotted = tf.reduce_sum(m_A * u_temp, 2)

            probs = tf.nn.softmax(dotted)
            return probs

    def batch_fit(self, stories, queries, answers, learning_rate):
        """Runs the training algorithm over the passed batch

        Args:
            stories: Tensor (None, memory_size, sentence_size)
            queries: Tensor (None, sentence_size)
            answers: Tensor (None, vocab_size)

        Returns:
            loss: floating-point number, the loss computed for the batch
        """
        feed_dict = {self._stories: stories, self._queries: queries, self._answers: answers, self._lr: learning_rate}
        loss, _ = self._sess.run([self.loss_op, self.train_op], feed_dict=feed_dict)
        return loss

    def predict(self, stories, queries):
        """Predicts answers as one-hot encoding.

        Args:
            stories: Tensor (None, memory_size, sentence_size)
            queries: Tensor (None, sentence_size)

        Returns:
            answers: Tensor (None, vocab_size)
        """

        feed_dict = {self._stories: stories, self._queries: queries}

        return self._sess.run(self.predict_op, feed_dict=feed_dict)

    def predict_out(self, stories, queries):
        """Predicts answers as one-hot encoding.

        Args:
            stories: Tensor (None, memory_size, sentence_size)
            queries: Tensor (None, sentence_size)

        Returns:
            answers: Tensor (None, vocab_size)
        """
        feed_dict = {self._stories: stories, self._queries: queries}
        return self._sess.run(self.predict_out, feed_dict=feed_dict)

    def predict_proba(self, stories, queries):
        """Predicts probabilities of answers.

        Args:
            stories: Tensor (None, memory_size, sentence_size)
            queries: Tensor (None, sentence_size)

        Returns:
            answers: Tensor (None, vocab_size)
        """
        feed_dict = {self._stories: stories, self._queries: queries}
        return self._sess.run(self.predict_proba_op, feed_dict=feed_dict)

    def predict_log_proba(self, stories, queries):
        """Predicts log probabilities of answers.

        Args:
            stories: Tensor (None, memory_size, sentence_size)
            queries: Tensor (None, sentence_size)
        Returns:
            answers: Tensor (None, vocab_size)
        """
        feed_dict = {self._stories: stories, self._queries: queries}
        return self._sess.run(self.predict_log_proba_op, feed_dict=feed_dict)

    def predict_att(self, stories, queries):
        """Predicts log probabilities of answers.

        Args:
            stories: Tensor (None, memory_size, sentence_size)
            queries: Tensor (None, sentence_size)
        Returns:
            answers: Tensor (None, vocab_size)
        """
        feed_dict = {self._stories: stories, self._queries: queries}
        return self._sess.run(self.predict_att, feed_dict=feed_dict)


In [0]:
print("Started Task:", 1)

learning_rate = 0.001
anneal_rate = 10
anneal_stop_epoch = 100
max_grad_norm=40.0
evaluation_interval=10
batch_size=32
hops=3
epochs=100
embedding_size=200
memory_size=40
task_id=1
random_state=10

train_data2 = open('unary_fact_ext','r').readlines()

train2, test2 = load_task3(train_data2)

train_data = None
data2 = train2 + test2

vocab = sorted(reduce(lambda x, y: x | y, (set(list(s) + q + a) for s, q, a, _ in data2)))
vocab.extend(keyword_dict.keys())

edata = [tokenize(p.replace('\n','')) for p in evaluate_data]
edata = [item for sublist in edata for item in sublist]
vocab.extend(edata)

edata = [tokenize(p.replace('\n','')) for p in evaluate_rules]
edata = [item for sublist in edata for item in sublist]
vocab.extend(edata)

vocab = list(set(vocab))
edata= None
word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

max_story_size = max(map(len, (s for s, _, _, _ in data2)))
mean_story_size = int(np.mean([ len(s) for s, _, _, _ in data2 ]))
sentence_size = 1
query_size = max(map(len, (q for _, q, _, _ in data2)))
memory_size = min(memory_size, max_story_size)

print('word_ids', len(word_idx))
for i in range(memory_size*2):
    word_idx['time{}'.format(i+1)] = 'time{}'.format(i+1)
print('word_ids2', len(word_idx))
    

vocab_size = len(word_idx) + 1 # +1 for nil word
sentence_size = max(query_size, sentence_size) # for the position
sentence_size += 1  # +1 for time words

print("Longest sentence length", sentence_size)
print("Longest story length", max_story_size)
print("Average story length", mean_story_size)

S2, Q2, A2 = vectorize_datas(train2, word_idx, sentence_size, memory_size)

trainS2 = S2
trainQ2 = Q2
trainA2 = A2
S2 = Q2 = A2 = None
testS2, testQ2, testA2 = vectorize_datas(test2, word_idx, sentence_size, memory_size)

trainS2shape = np.array(trainS2).shape
testS2shape = np.array(testS2).shape

print("Training set shape", trainS2shape)

n_train = trainS2shape[0]
n_test = testS2shape[0]

print("Training Size", n_train)
print("Testing Size", n_test)

print("Training labels shape", trainA2.shape)

train_labels2 = np.argmax(trainA2.reshape(-1,memory_size),axis=1)
print('train_labels',train_labels2.shape,'\n',train_labels2[0])

trainA2 = trainA2.reshape(-1,memory_size)
test_labels2 = np.array(np.argmax(testA2.reshape(-1,memory_size),axis=1))

tf.set_random_seed(random_state)

batches = zip(range(0, n_train-batch_size, batch_size), range(batch_size, n_train, batch_size))
batches = [(start, end) for start, end in batches]

batches2 = zip(range(0, n_train-batch_size, batch_size), range(batch_size, n_train, batch_size))
batches2 = [(start, end) for start, end in batches]


model = None
val_acc_list = list()
train_acc_list = list()
ep_list= list()

val_acc_list2 = list()
train_acc_list2 = list()
ep_list2 = list()
lr_list = list()

tf.reset_default_graph()
sess2 = tf.Session()

print ('batch_size, vocab_size, sentence_size, memory_size, embedding_size, hops, max_grad_norm\n',batch_size, vocab_size, sentence_size, memory_size, embedding_size, hops, max_grad_norm)
model2 = MemN2N(batch_size, vocab_size, sentence_size, memory_size, embedding_size, session=sess2,
               hops=hops, max_grad_norm=max_grad_norm)
    

train_acc2 = 0.0  
for t in range(1, epochs+1):
    anneal = 1.0    
    lr = learning_rate / anneal
    
    np.random.shuffle(batches)
    total_cost2 = 0.0
    
    for start, end in batches:
        s2 = trainS2[start:end]
        q2 = trainQ2[start:end]
        a2 = trainA2[start:end]
        cost_t2 = model2.batch_fit(s2, q2, a2, lr)
        total_cost2 += cost_t2
        

    if t % evaluation_interval == 0:
        train_preds2 = []
        for start in range(0, n_train, batch_size):
            end = start + batch_size
            s2 = trainS2[start:end]
            q2 = trainQ2[start:end]
            if(len(s2)!=batch_size):
              continue

            pred2 = model2.predict(s2, q2)
            train_preds2 += list(pred2)

        train_acc2 = metrics.accuracy_score(np.array(train_preds2), train_labels2[:len(train_preds2)])
        train_acc_list2.append(train_acc2)  
        test_preds2=  model2.predict(testS2, testQ2)       
        val_acc2 = metrics.accuracy_score(test_preds2, test_labels2)
        val_acc_list2.append(val_acc2)  
        ep_list2.append(t)



test_preds2 = model2.predict(testS2, testQ2)
test_acc2 = metrics.accuracy_score(test_preds2, test_labels2)
print("Testing Accuracy:", train_acc2, test_acc2)

mpl.pyplot.scatter(ep_list2,train_acc_list2)
mpl.pyplot.show()

mpl.pyplot.scatter(ep_list2,val_acc_list2)
mpl.pyplot.show()

  


In [0]:
mpl.pyplot.scatter(ep_list2,train_acc_list2)
mpl.pyplot.show()

mpl.pyplot.scatter(ep_list2,val_acc_list2)
mpl.pyplot.show()


In [0]:
anneal_stop_epoch = 100
anneal_rate = 10
anneal_rates = [(i+1)*5 for i in range(200) if (i+1)*5<=200]
unknowns = [i+1 for i in range(10)]
anneal_stops = [(i+1)*100 for i in range(17) if (i+1)*100<=1600]
for an in unknowns:
#   anneal_rate = an
  lr_list = list()
  for t in range(1, epochs+1):
      # Stepped learning rate
      if t - 1 <= anneal_stop_epoch:
          anneal = 1.2 ** ((t - 1) // anneal_rate)
      else:
          anneal = 1.2 ** (anneal_stop_epoch // anneal_rate)
      lr = learning_rate / anneal
      lr_list.append(lr)

  mpl.pyplot.scatter([i for i in range(1600)],lr_list)
  mpl.pyplot.show()    
    

In [0]:
log_info = 1
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np

def plot_attention(in_seq, out_seq, attentions):
    """ From http://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html"""

    out_seq = out_seq
    attentions = attentions
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(attentions, cmap='bone')
    ax.set_xticklabels([' '] + [str(x) for x in in_seq], rotation=90)
    ax.set_yticklabels([' '] + [str(x) for x in out_seq])

    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()

In [0]:
import re


def predictLong(se, q_position=-1):
    raws = se.split('##')[0].split()
    testS, testQ = se.split('##')
    splitQ = testQ.split(' ')
    tempQ = ' '.join(splitQ[:2])
    tempS = testS + '##' + 'point '+ tempQ 

    s, q, a, q_position = parse_stories3([tempS], q_position)[0]
    s, q, a = vectorize_data(s, q, a, word_idx, sentence_size, memory_size, q_position)
    a1 = model2.predict(s, q)

    if (a1[0] + 1 > len(raws)):
        return [0]
    if (log_info):
        print('gen_rule', tempQ, ' ', raws[int(a1[0])])
    return a1


def evaluate(lines, keyword_dict, rule_dict=rule_dict):
    single_rule_list = list()
    qs = list()
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    selected_rules = list()
    for line in lines:
        qKey = line.split('##')[0]

        pos_rules = list()
        if qKey in rule_dict.keys():
            pos_rules = rule_dict[qKey]

        pos_rule_encs = list()
        pos_rule_encs2 = list()

        for rule in pos_rules:
            rule_words = rule.split(' ')
            rule1 = [rule_words[0], rule_words[1]]

            rule_words1 = ''.join([str(word_idx[rule_word.replace('@', '')]) for rule_word in rule1])

            lss1 = ''.join(str(rule_words1))

            pos_rule_encs.append(lss1)
        if (log_info):
            print('pos_rules', pos_rules)

        line = line.split('##')[1]
        if (log_info):
            print('line', line)
        words = line.split(' ')

        #   pruning
        if (len(words) > memory_size):
            words = words[:memory_size]
            line = ' '.join(words)

        keysIn = list(set([x for x in re.findall(r'\b([A-Z]+)\b', line) if x.strip()]))
        keysIn +=list(set([x for x in re.findall(r'\b([b-z])\b', line) if x.strip()]))
        print('keysIn', keysIn)
        keyword_dict = {key: 0 for key in keysIn}

        for word in words:
            if (word in keysIn):
                keyword_dict[word] += 1

        for wordId in range(len(words)):

            word = words[wordId]
            if word in keyword_dict.keys() and keyword_dict[word] != 0:
                a1 = predictLong(line + ' ##' + word, wordId)
                print('wordId', wordId)
                pred_word = str(words[a1[0]])
                word_id = word_idx[word]
                lss3 = ''.join([str(word_idx[words[a1[0]].replace('@', '')]), str(word_id) ])
                single_rule_list.append(qKey + '##' + word + ' ' + pred_word)
                selected_rules.append(lss3)
                if containList(lss3, pos_rule_encs, 'tp'):
                    tp = tp + 1
                else:
                    fp = fp + 1
                    qKey
                    print('inc fp ',qKey, ' ' ,  fp)
                    
                if (log_info):
                    print('inc_over', tp, fp)

        for ruleId in range(len(pos_rule_encs)):
            if not containList(pos_rule_encs[ruleId], selected_rules, 'fp'):
                fn = fn + 1
                print('inc fn', fn)

    single_rule_list = list(set(single_rule_list))
    predict_single_rule_file = open("predict_single_rule_ext_file_mm.txt", "w+")
    for v in single_rule_list:
        predict_single_rule_file.write(v)
        predict_single_rule_file.write('\n')
    predict_single_rule_file.close()

    return tp, fp, tn, fn


def containList(list1, lists, a):
    if (list1 in lists):
        return True
    return False

tp, fp, tn, fn = evaluate(evaluate_data, keyword_dict)
precison = 1.0 * tp / (tp + fp)
recall = 1.0 * tp / (tp + fn)
f1Score = 2.0 * precison * recall / (precison + recall)

print(tp, fp, tn, fn, precison, recall, f1Score)
