In [6]:
from __future__ import print_function

import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import  DataLoader


from utils import prepare_vocab_continous as vocab_master
from utils import query_graph_to_sparql as sparql_constructor
from utils import embeddings_interface
from configs import config_loader as cl
import network_rdftype as net_rdftype
import network_intent as net_intent
import data_loader as dl
import auxiliary as aux
import network as net

from pprint import pprint
import ConfigParser
import numpy as np
import time
import pickle
import json
import os

device = torch.device("cuda")
sparql_constructor.init(embeddings_interface)

Using TensorFlow backend.


embeddings_interface: Loading Word Vector to Memory.


In [7]:
#Reading and setting up config parser
config = ConfigParser.ConfigParser()
config.readfp(open('configs/macros.cfg'))

#setting up device,model name and loss types.
training_model = 'bilstm_dot'
_dataset = 'lcquad'
pointwise = False
_debug = False


#Loading relations file.
COMMON_DATA_DIR = 'data/data/common'
INTENTS = ['count', 'ask', 'list']
RDFTYPES = ['x', 'uri', 'none']

_dataset_specific_data_dir = 'data/data/%(dataset)s/' % {'dataset': _dataset}
_relations = aux.load_relation(COMMON_DATA_DIR)
_word_to_id = aux.load_word_list(COMMON_DATA_DIR)

In [8]:
# Model specific paramters
    # #Model specific paramters
if pointwise:
    training_config = 'pointwise'
else:
    training_config = 'pairwise'

parameter_dict = cl.runtime_parameters(dataset=_dataset,training_model=training_model,
                                         training_config=training_config,config_file='configs/macros.cfg')

if training_model == 'cnn_dot':
    parameter_dict['output_dim'] = int(config.get(training_model, 'output_dim'))

# Update parameters
parameter_dict['_dataset_specific_data_dir'] = _dataset_specific_data_dir
parameter_dict['_model_dir'] = './data/models/'

parameter_dict['corechainmodel'] = 'bilstm_dot'
parameter_dict['corechainmodelnumber'] = '7'

parameter_dict['intentmodel'] = 'bilstm_dense'
parameter_dict['intentmodelnumber'] = '0'

parameter_dict['rdftypemodel'] = 'bilstm_dense'
parameter_dict['rdftypemodelnumber'] = '0'

parameter_dict['rdfclassmodel'] = 'bilstm_dot'
parameter_dict['rdfclassmodelnumber'] = '0'


In [9]:
TEMP = aux.data_loading_parameters(_dataset,parameter_dict,runtime=True)

_dataset_specific_data_dir,_model_specific_data_dir,_file,\
           _max_sequence_length,_neg_paths_per_epoch_train,\
            _neg_paths_per_epoch_validation,_training_split,_validation_split,_index= TEMP

_data, _gloveid_to_embeddingid, _vectors = dl.create_dataset_runtime(file=_file,_dataset=_dataset,
                                  _dataset_specific_data_dir=_dataset_specific_data_dir,
                                  split_point=.80)

parameter_dict['vectors'] = _vectors

# For interpretability's sake
gloveid_to_embeddingid , embeddingid_to_gloveid, word_to_gloveid, \
    gloveid_to_word = aux.load_embeddingid_gloveid()

In [10]:
class QuestionAnswering:
    """
        Usage:

            qa = QuestionAnswering(parameter_dict, False, _word_to_id, device, True)
            q = np.random.randint(0, 1233, (542))
            p = np.random.randint(0, 123, (10, 55))
            print(qa._predict_corechain(q,p))
            print("intent: ", qa._predict_intent(q))
            print("rdftype: ", qa._predict_rdftype(q))
            print("rdfclass: ", qa._predict_rdfclass(q, p))
    """

    def __init__(self, parameters, pointwise, word_to_id, device, debug):

        self.parameters = parameters
        self.pointwise = pointwise
        self.debug = debug
        self.device = device
        self._word_to_id = word_to_id

        # Load models
        self._load_corechain_model()
        self._load_rdftype_model()
        self._load_rdfclass_model()
        self._load_intentmodel()

    def _load_corechain_model(self):

        # Initialize the model
        if self.parameters['corechainmodel'] == 'bilstm_dot':
            self.corechain_model = net.BiLstmDot(_parameter_dict=self.parameters, _word_to_id=self._word_to_id,
                                                 _device=self.device, _pointwise=self.pointwise, _debug=self.debug)

        # Make the model path
        model_path = os.path.join(self.parameters['_model_dir'], 'core_chain')
        model_path = os.path.join(model_path, self.parameters['corechainmodel'])
        model_path = os.path.join(model_path, self.parameters['dataset'])
        model_path = os.path.join(model_path, self.parameters['corechainmodelnumber'])
        model_path = os.path.join(model_path, 'model.torch')

        # Pull the data from disk
        model_dump = torch.load(model_path)

        # Load parameters
        for key in self.corechain_model.prepare_save():
            key[1].load_state_dict(model_dump[key[0]])

    def _load_rdfclass_model(self):

        # Initialize the model
        if self.parameters['rdfclassmodel'] == 'bilstm_dot':
            self.corechain_model = net.BiLstmDot(_parameter_dict=self.parameters, _word_to_id=self._word_to_id,
                                                 _device=self.device, _pointwise=self.pointwise, _debug=self.debug)

        # Make the model path
        model_path = os.path.join(self.parameters['_model_dir'], 'rdf_class')
        model_path = os.path.join(model_path, self.parameters['rdfclassmodel'])
        model_path = os.path.join(model_path, self.parameters['dataset'])
        model_path = os.path.join(model_path, self.parameters['rdfclassmodelnumber'])
        model_path = os.path.join(model_path, 'model.torch')

        # Pull the data from disk
        model_dump = torch.load(model_path)

        # Load parameters
        for key in self.corechain_model.prepare_save():
            key[1].load_state_dict(model_dump[key[0]])

    def _load_rdftype_model(self):
        # Initialize the model
        self.rdftype_model = net_rdftype.RdfTypeClassifier(_parameter_dict=self.parameters,
                                                           _word_to_id=self._word_to_id,
                                                           _device=self.device)

        # Make model path like:
        # ('model with accuracy ', 0.998, 'stored at', 'data/models/intent/bilstm_dense/lcquad/2/model.torch')
        model_path = os.path.join(self.parameters['_model_dir'], 'rdf_type')
        model_path = os.path.join(model_path, self.parameters['rdftypemodel'])
        model_path = os.path.join(model_path, self.parameters['dataset'])
        model_path = os.path.join(model_path, self.parameters['rdftypemodelnumber'])
        model_path = os.path.join(model_path, 'model.torch')

        # Pull data from disk
        model_dump = torch.load(model_path)

        # Load parameters
        for key in self.rdftype_model.prepare_save():
            key[1].load_state_dict(model_dump[key[0]])

    def _load_intentmodel(self):

        # Initialize the model
        self.intent_model = net_intent.IntentClassifier(_parameter_dict=self.parameters,
                                                        _word_to_id=self._word_to_id,
                                                        _device=self.device)

        # Make model path like:
        # ('model with accuracy ', 0.998, 'stored at', 'data/models/intent/bilstm_dense/lcquad/2/model.torch')
        model_path = os.path.join(self.parameters['_model_dir'], 'intent')
        model_path = os.path.join(model_path, self.parameters['intentmodel'])
        model_path = os.path.join(model_path, self.parameters['dataset'])
        model_path = os.path.join(model_path, self.parameters['intentmodelnumber'])
        model_path = os.path.join(model_path, 'model.torch')

        # Pull data from disk
        model_dump = torch.load(model_path)

        # Load parameters
        for key in self.intent_model.prepare_save():
            key[1].load_state_dict(model_dump[key[0]])

    def _predict_corechain(self, _q, _p):
        """
            Given a datapoint (question, paths) encoded in  embedding_vocab,
                run the model's predict and find the best corechain.

            _q: (<var len>)
            _p: (100/500, <var len>)

            returns score: (100/500)
        """

        # Pad questions
        Q = np.zeros((len(_p), self.parameters['max_length']))
        Q[:, :min(len(_q), self.parameters['max_length'])] = \
            np.repeat(_q[np.newaxis, :min(len(_q), self.parameters['max_length'])], repeats=len(_p), axis=0)

        # Pad paths
        P = np.zeros((len(_p), self.parameters['max_length']))
        for i in range(len(_p)):
            P[i, :min(len(_p[i]), self.parameters['max_length'])] = \
                _p[i][:min(len(_p[i]), self.parameters['max_length'])]

        # Convert np to torch stuff
        Q = torch.tensor(Q, dtype=torch.long, device=self.device)
        P = torch.tensor(P, dtype=torch.long, device=self.device)
        
        if self.debug:
            print("Q: ", Q.shape, " P: ", P.shape)

            # We then pass them through a predict function and get a score array.
        score = self.corechain_model.predict(ques=Q, paths=P, device=self.device)


        return score.detach().cpu().numpy()


    def _predict_rdfclass(self, _q, _p):
        """
            Given a datapoint (question, paths) encoded in  embedding_vocab,
                run the model's predict and find the best corechain.

            _q: (<var len>)
            _p: (100/500, <var len>)

            returns score: (100/500)
        """

        # Pad questions
        Q = np.zeros((len(_p), self.parameters['max_length']))
        Q[:, :min(len(_q), self.parameters['max_length'])] = \
            np.repeat(_q[np.newaxis, :min(len(_q), self.parameters['max_length'])], repeats=len(_p), axis=0)

        # Pad paths
        P = np.zeros((len(_p), self.parameters['max_length']))
        for i in range(len(_p)):
            P[i, :min(len(_p[i]), self.parameters['max_length'])] = \
                _p[i][:min(len(_p[i]), self.parameters['max_length'])]

        # Convert np to torch stuff
        Q = torch.tensor(Q, dtype=torch.long, device=self.device)
        P = torch.tensor(P, dtype=torch.long, device=self.device)

        # We then pass them through a predict function and get a score array.
 
        score = self.corechain_model.predict(ques=Q, paths=P, device=self.device)

        return score.detach().cpu().numpy()

    def _predict_intent(self, _q):
        """
            Given a question, it runs a distribution over possible intents (ask/count/list)

            _q: (<var len>)

            returns: np.arr shape (3)
        """

        # Pad the question
        Q = np.zeros(self.parameters['max_length'])
        Q[:min(_q.shape[0], self.parameters['max_length'])] = _q[:min(_q.shape[0], self.parameters['max_length'])]

        data = {'ques_batch': Q.reshape(1, Q.shape[0])}

        # Get prediction
        score = self.intent_model.predict(data, self.device)

        return score.detach().cpu().numpy()

    def _predict_rdftype(self, _q):
        """
            Given a question, it runs a distribution over possible places where we attach an rdftype constraint
                (x/uri/none)

            _q: (<var len>)

            returns: np.arr shape (3)
        """

        # Pad the question
        Q = np.zeros(self.parameters['max_length'])
        Q[:min(_q.shape[0], self.parameters['max_length'])] = _q[:min(_q.shape[0], self.parameters['max_length'])]

        data = {'ques_batch': Q.reshape(1, Q.shape[0])}

        # Get prediction
        score = self.rdftype_model.predict(data, self.device)

        return score.detach().cpu().numpy()

In [11]:
def construct_paths(data, relations, gloveid_to_embeddingid, qald=False):
    """
    :param data: a data node of id_big_data
    relations : a dictionary which maps relation id to meta inforamtion like surface form, embedding id
    of surface form etc.
    :return: unpadded , continous id spaced question, positive path, negative paths

    @TODO: remove from here, and use dataloader version

    """

    question = np.asarray(data['uri']['question-id'])
    # questions = pad_sequences([question], maxlen=max_length, padding='post')

    # inverse id version of positive path and creating a numpy version
    positive_path_id = data['parsed-data']['path_id']
    no_positive_path = False
    if positive_path_id == [-1]:
        positive_path = np.asarray([-1])
        no_positive_path = True
    else:
        positive_path = []
        for path in positive_path_id:
            positive_path += [embeddings_interface.SPECIAL_CHARACTERS.index(path[0])]
            positive_path += relations[int(path[1:])][3].tolist()
        positive_path = np.asarray(positive_path)
    # padded_positive_path = pad_sequences([positive_path], maxlen=max_length, padding='post')

    # negative paths from id to surface form id
    negative_paths_id = data['uri']['hop-2-properties'] + data['uri']['hop-1-properties']
    negative_paths = []
    for neg_path in negative_paths_id:
        negative_path = []
        for path in neg_path:
            try:
                negative_path += [embeddings_interface.SPECIAL_CHARACTERS.index(path)]
            except ValueError:
                negative_path += relations[int(path)][3].tolist()
        negative_paths.append(np.asarray(negative_path))
    negative_paths = np.asarray(negative_paths)
    # negative paths padding
    # padded_negative_paths = pad_sequences(negative_paths, maxlen=max_length, padding='post')

    # explicitly remove any positive path from negative path
    negative_paths = dl.remove_positive_path(positive_path, negative_paths)

    # remap all the id's to the continous id space.

    # passing all the elements through vocab
    question = np.asarray([gloveid_to_embeddingid[key] for key in question])
    if not no_positive_path:
        positive_path = np.asarray([gloveid_to_embeddingid[key] for key in positive_path])
    for i in range(0, len(negative_paths)):
        # temp = []
        for j in xrange(0, len(negative_paths[i])):
            try:
                negative_paths[i][j] = gloveid_to_embeddingid[negative_paths[i][j]]
            except:
                negative_paths[i][j] = gloveid_to_embeddingid[0]
                # negative_paths[i] = np.asarray(temp)
                # negative_paths[i] = np.asarray([vocab[key] for key in negative_paths[i] if key in vocab.keys()])
    if qald:
        return question, positive_path, negative_paths, no_positive_path
    return question, positive_path, negative_paths

In [12]:
def prune_candidate_space(question, paths, k=None):
    """
        Boilerplate to reduce the number of valid paths.
        Note: path[0] is the correct path.
            Should we remove it? Should we not?

        As of now it returns an index
    """

    return np.arange(len(paths))

def create_sparql(log, data, embeddings_interface, embeddingid_to_gloveid, relations):
    """
        Creates a query graph from logs and sends it to sparql_constructor
            for getting a valid SPARQL query (or results) back.


        Query graph is a dict containing:
            best_path,
            intent,
            rdf_constraint,
            rdf_constraint_type,
            rdf_best_path

    :param log: dict made using answer_question function
    :param embeddings_interface: the file
    :param embeddingid_to_gloveid: reverse vocab dict
    :param relations: the relations dict
    :return: sparql query as string
    """
    query_graph = {}
    query_graph['intent'] = log['pred_intent']
    query_graph['best_path'] = log['pred_path']
    query_graph['rdf_constraint_type'] = log['pred_rdf_type']
    query_graph['rdf_best_path'] = log['pred_rdf_class']
    query_graph['entities'] = data['parsed-data']['entity']
    query_graph['rdf_constraint'] = False if log['pred_rdf_type'] == 'none' else True

    return sparql_constructor.convert(_graph=query_graph, relations=relations,
                                        embeddings_interface=embeddings_interface,
                                        embeddingid_to_gloveid=embeddingid_to_gloveid)

In [13]:
def corechain_prediction(question, paths, positive_path, negative_paths, no_positive_path):
    '''
        Why is path needed ?
    '''

    # Remove if adding to class
    global qa

    mrr = 0
    best_path = ''
    path_predicted_correct = False
    
    if no_positive_path and len(negative_paths) == 0:
        '''
            There exists no positive path and also no negative paths
                Why does this quest exists ? 
                    > Probably in qald
        '''
        print("The code should not have been herr. There is no warning. RUN!!!!!!!!")
        raise ValueError

    elif not no_positive_path and len(negative_paths) == 0:
        '''
            There exists a positive path and there exists no negative path
        '''
        best_path = positive_path
        mrr = 1
        path_predicted_correct = True
    
    elif no_positive_path and len(negative_paths) != 0:
        '''
            There exists no correct/true path and there are few negative paths.
        '''
        output = qa._predict_corechain(question, paths)
        best_path_index = np.argmax(output)
        best_path = paths[best_path_index]
    
    elif not no_positive_path and len(negative_paths) != 0:
        '''
            There exists positive path and also negative paths
            path = positive_path + negative_paths    
        '''
        output = qa._predict_corechain(question, paths)
        best_path_index = np.argmax(output)
        best_path = paths[best_path_index]

        # Calculate mrr here
        mrr = 0
        if best_path_index == 0:
            path_predicted_correct = True

        mrr_output = np.argsort(output)[::-1]
        mrr_output = mrr_output.tolist()
        mrr = mrr_output.index(0) + 1.0

        if mrr != 0:
            mrr = 1.0 / mrr
    
    else:
        print("The code should not have been herr. There is no warning. RUN!!!!!!!!")
        raise ValueError
    

    return mrr, best_path, path_predicted_correct

In [14]:
def answer_question(qa, index, data, gloveid_to_embeddingid, embeddingid_to_gloveid, relations, parameter_dict):
    """
        Uses everything to do everyhing for one data instance (one question, subgraph etc).
    """

    log = {}
    log['question'] = None
    log['true_path'] = None
    log['true_intent'] = None
    log['true_rdf_type'] = None
    log['true_rdf_class'] = None
    log['pred_path'] = None
    log['pred_intent'] = None
    log['pred_rdf_type'] = None
    log['pred_rdf_class'] = None

    metrics = {}

    question, positive_path, negative_paths, no_positive_path = dl.construct_paths(data, qald=True,
                                                                                   relations=relations,
                                                                                   gloveid_to_embeddingid=gloveid_to_embeddingid)
    log['question'] = question

    '''
        @some hack
        if the dataset is LC-QUAD and data['pop'] 
            is false then the positive path has been forcefully inserted and needs to be removed.
    '''
    if parameter_dict['dataset'] == 'lcquad':
        try:
            if data['pop'] == False:
                no_positive_path = True
        except KeyError:
            pass

    # ##############################################
    """
        Core chain prediction
    """
    # ##############################################
    if no_positive_path:
        '''
            There is no positive path, maybe we do something intelligent
        '''
        log['true_path'] = [-1]
        nps = [n.tolist() for n in negative_paths]
        paths = nps
        index_selected_paths = prune_candidate_space(question, paths, parameter_dict['prune_corechain_candidates'])

    else:     
        
        pp = [positive_path.tolist()]
        nps = [n.tolist() for n in negative_paths]
        paths = pp + nps
        if parameter_dict['prune_corechain_candidates']:
            index_selected_paths = prune_candidate_space(question, paths, parameter_dict['prune_corechain_candidates'])

            if index_selected_paths[-1] == 0:
                #  Counts the number of times just using  word2vec similarity, the best path came the most similar.
                # This will only work if CANDIDATE_SPACE is not none.
                metrics['word_vector_accuracy_counter'] = 1
        else:
            index_selected_paths = prune_candidate_space(question, paths, len(paths))

        log['true_path'] = pp[0]

    # Put the pruning index over the paths
    paths = [paths[i] for i in index_selected_paths]
    '''
        Converting paths to numpy array
    '''
    for i in range(len(paths)):
        paths[i] = np.asarray(paths[i])
    paths = np.asarray(paths)

    cc_mrr, best_path, cc_acc = corechain_prediction(question,
                                                     paths, positive_path,
                                                     negative_paths, no_positive_path)
    
    log['pred_path'] = best_path
    metrics['core_chain_accuracy_counter'] = cc_acc
    metrics['core_chain_mrr_counter'] = cc_mrr
    metrics['num_paths'] = len(paths)

    # ##############################################
    """
        Intent, rdftype prediction

        Straightforward.

        Metrics: accuracy
    """
    # ##############################################
    # Get intent
    intent_pred = np.argmax(qa._predict_intent(question))
    intent_true = np.argmax(net_intent.get_y(data))
    intent_acc = 1 if intent_pred == intent_true else 0
    metrics['intent_accuracy_counter'] = intent_acc
    intent = INTENTS[intent_pred]

    log['true_intent'] = INTENTS[intent_true]
    log['pred_intent'] = INTENTS[intent_pred]

    # Get rdftype
    rdftype_pred = np.argmax(qa._predict_rdftype(question))
    rdftype_true = np.argmax(net_rdftype.get_y(data))
    rdftype_acc = 1 if rdftype_pred == rdftype_true else 0
    metrics['rdftype_accuracy_counter'] = rdftype_acc
    rdftype = RDFTYPES[rdftype_pred]

    log['true_rdf_type'] = RDFTYPES[rdftype_true]
    log['pred_rdf_type'] = RDFTYPES[rdftype_pred]

    # ##############################################
    """
        RDF class prediction.

            do this only if we need to, based on the prediction of rdftype model.
    """
    # ##############################################

    # Add dummy rdfclass logs and metrics
    log['true_rdf_class'] = None
    log['pred_rdf_class'] = None
    metrics['rdfclass_accuracy_counter'] = None

    if rdftype == "none":

        pass

    else:
        """
            We do need an rdf constraint.
            We let the rdf class model (ranker) choose between both x and uri paths, 
                and the rdf type model is just used to see if we need paths at all.
        """


        rdf_candidates = sparql_constructor.rdf_type_candidates(data, best_path, gloveid_to_embeddingid, relations, embeddingid_to_gloveid)

        if rdf_candidates:

            rdf_candidate_pred = qa._predict_rdfclass(_q=question, _p=rdf_candidates)

            best_rdf_path = rdf_candidates[np.argmax(rdf_candidate_pred)]

                   

        else:

            # No candidates found
            best_rdf_path = []
            
        # @TODO: as of now we don't have ground truth so we add a 0 in metrics and 0 in log      
        log['true_rdf_class'] = 0
        log['pred_rdf_class'] = best_rdf_path
        metrics['rdfclass_accuracy_counter'] = 0
            
    return log, metrics

In [15]:
"""
    Different counters and metrics to store accuracy of diff modules

        Core chain accuracy counter counts the number of time the core chain predicated is same as 
        positive path. This also includes for ask query.
        The counter might confuse the property and the ontology. 

        Similar functionality with rdf_type and intent

        **word vector accuracy counter**: 
            Counts the number of times just using  word2vec similarity, 
            the best path came the most similar. 
            This will only work if CANDIDATE_SPACE is not none.

"""

'''
    c_flag  is true if the core_chain was correctly predicted. 
    same is the case for i_flag and r_flag, rt_flag (correct candidate for rdf type)
'''
c_flag, i_flag, r_flag, rt_flag = False, False, False, False

'''
    Stores tuple of (fmeasure,precision,recall)
'''
results = []

Logging = parameter_dict.copy()
Logging['runtime'] = []

qa = QuestionAnswering(parameter_dict, pointwise, _word_to_id, device, _debug)

PartiallyPretrainedWordEmb: vectors loaded in 0.021 second
PartiallyPretrainedWordEmb: words loaded in 0.049 second
PartiallyPretrainedWordEmb: dictionary created in 0.030 second
PartiallyPretrainedWordEmb: vectors loaded in 0.000 second
PartiallyPretrainedWordEmb: words loaded in 0.000 second
PartiallyPretrainedWordEmb: dictionary created in 0.033 second
PartiallyPretrainedWordEmb: vectors loaded in 0.000 second
PartiallyPretrainedWordEmb: words loaded in 0.000 second
PartiallyPretrainedWordEmb: dictionary created in 0.030 second
PartiallyPretrainedWordEmb: vectors loaded in 0.000 second
PartiallyPretrainedWordEmb: words loaded in 0.000 second
PartiallyPretrainedWordEmb: dictionary created in 0.031 second


In [16]:
core_chain_accuracy_counter = 0
core_chain_mrr_counter = 0
intent_accuracy_counter = 0
rdftype_accuracy_counter = 0
query_graph_accuracy_counter = 0
word_vector_accuracy_counter = 0  # @TODO: note this down on every occassion!
core_chain_acc_log = []
core_chain_mrr_log = []

In [None]:
startindex = 0
for index, data in enumerate(_data[startindex:]):

    index += startindex
    
    log, metrics = answer_question(qa=qa,
                                   index=index,
                                   data=data,
                                   gloveid_to_embeddingid=_gloveid_to_embeddingid,
                                   embeddingid_to_gloveid=embeddingid_to_gloveid,
                                   relations=_relations,
                                   parameter_dict=parameter_dict)
    
#     log, metrics = answer_question(qa=None,
#                                    index=None,
#                                    data=None,
#                                    gloveid_to_embeddingid=None,
#                                    embeddingid_to_gloveid=None,
#                                    relations=None,
#                                    parameter_dict=None)

    sparql = create_sparql(log=log,
                           data=data,
                           embeddings_interface=embeddings_interface,
                           embeddingid_to_gloveid=embeddingid_to_gloveid,
                           relations=_relations)

    # metrics = eval(data, log, metrics)

    # Update logs
    Logging['runtime'].append({'log': log, 'metrics': metrics})

    # Update metrics
    intent_accuracy_counter += metrics['intent_accuracy_counter']
    rdftype_accuracy_counter += metrics['rdftype_accuracy_counter']
    core_chain_accuracy_counter += metrics['core_chain_accuracy_counter']
    core_chain_mrr_counter += metrics['core_chain_mrr_counter']
    core_chain_acc_log.append(metrics['core_chain_accuracy_counter'])
    core_chain_mrr_log.append(metrics['core_chain_mrr_counter'])

    # Make shit interpretable
    question = aux.id_to_word(log['question'], gloveid_to_word, embeddingid_to_gloveid, remove_pad=True)
    true_path = aux.id_to_word(log['true_path'], gloveid_to_word, embeddingid_to_gloveid, remove_pad=True)
    pred_path = aux.id_to_word(log['pred_path'], gloveid_to_word, embeddingid_to_gloveid, remove_pad=True)

    print("#%s" % index, "\t\bAcc: ", np.mean(core_chain_acc_log))

    print("\t\bQues: ", question)
    print("\t\bTPath: ", true_path, "\n\t\bPPath: ", pred_path)
#     print("\t\bTIntent: ", log['true_intent'])
#     print("\t\bPIntent: ", log['pred_intent'])
#     print("\t\bPRdftype: ", log['true_rdf_type'])
#     print("\t\bTRdftype: ", log['pred_rdf_type'])
#     print("\t\bPRdfclass: ", log['true_rdf_class'])
#     print("\t\bTRdfclass: ", log['pred_rdf_class'])

#     print("")
#     pprint(log)
#     print("")
#     pprint(metrics)
#     print("\n",sparql)
    print("\n################################\n")




('DEBUG:  ', '+')
#0 Acc:  0.0
Ques:  how many movies did stanley kubrick direct
TPath:  - director 
PPath:  + quote

################################

#1 Acc:  0.0
Ques:  which UNK UNK is john forbes
TPath:  - founder 
PPath:  + branch - is cited by

################################

#2 Acc:  0.0
Ques:  what is the river whose mouth is in deadsea
TPath:  - river mouth 
PPath:  + inflow + river mouth

################################

#3 Acc:  0.0
Ques:  what is the allegiance of john kotelawala
TPath:  + allegiance 
PPath:  - government head + opposition party

################################

#4 Acc:  0.0
Ques:  how many races have the horses bred by jacques UNK hart participated in
TPath:  - breeder + race 
PPath:  - breeder + mmfm

################################

#5 Acc:  0.0
Ques:  what is the incumbent of the al gore presidential campaign 2000 and also the president of the ann lewis
TPath:  + incumbent - president 
PPath:  + affiliation - party

###############################

#44 Acc:  0.13333333333333333
Ques:  name the f1 racer with relative as ralf schumacher and has child named mick schumacher
TPath:  - relatives + child 
PPath:  - relative + child

################################

#45 Acc:  0.15217391304347827
Ques:  does the toyota UNK have the front engine design platform
TPath:  + automobile platform 
PPath:  + automobile platform

################################

#46 Acc:  0.14893617021276595
Ques:  what are some other products of the banks which makes UNK
TPath:  - product + products 
PPath:  - products + founding year

################################

here at no negative paths
#47 Acc:  0.16666666666666666
Ques:  what is the occupation of the irving chernev and karen grigorian
TPath:  + occupation - occupation 
PPath:  + occupation - occupation

################################

#48 Acc:  0.16326530612244897
Ques:  who produces the trains operated by the mtr
TPath:  - operator + manufacturer 
PPath:  - system + bridge

########################

#85 Acc:  0.2441860465116279
Ques:  in how many different places does canal and river trust own its assets
TPath:  - owner + location 
PPath:  + headquarters + static image name

################################

#86 Acc:  0.2413793103448276
Ques:  count the total number of launch site of the rockets which have been launched form cape canaveral air force station
TPath:  - launch site + launch site 
PPath:  + location + long s

################################

#87 Acc:  0.23863636363636365
Ques:  list some leaders of regions in the indian standard time zone
TPath:  - time zone + leader name 
PPath:  - time zone + time zone

################################

#88 Acc:  0.23595505617977527
Ques:  name some comic characters created by bruce timm
TPath:  - creator 
PPath:  - writer + main char team

################################

#89 Acc:  0.23333333333333334
Ques:  which university attended by franklin w. olin was also the alma mater of patty lin
TPath:  + alma mater - education 
PPath:

#128 Acc:  0.20155038759689922
Ques:  which UNK mouth is located in gulf of mexico and has source location as itasca state park
TPath:  - river mouth + source location 
PPath:  + countries - country

################################

#129 Acc:  0.2076923076923077
Ques:  is my truly truly fair the band of mitch miller
TPath:  + musical band 
PPath:  + musical band

################################

here
#130 Acc:  0.20610687022900764
Ques:  which religion is prevalent in the schools of the ashanti region
TPath:  <unk> 
PPath:  - place of birth + region

################################

#131 Acc:  0.20454545454545456
Ques:  which politician was succeeded by someone who graduated from the instituts UNK politiques
TPath:  - alma mater - successor 
PPath:  - title + motto

################################

#132 Acc:  0.20300751879699247
Ques:  how many different mascots are there of the ncaa teams
TPath:  - athletics + mascot 
PPath:  + headquarter + national motto

#######################

#170 Acc:  0.19883040935672514
Ques:  what are some short story kind of books
TPath:  - literary genre 
PPath:  - literary genre - books

################################

#171 Acc:  0.19767441860465115
Ques:  what are the nicknames of the schools whose color is royal blue
TPath:  - colour + nickname 
PPath:  - colors + labelstyle

################################

#172 Acc:  0.19653179190751446
Ques:  to what political party do the politicians graduated from somerville college oxford belong to
TPath:  - alma mater + party 
PPath:  - education + birth place

################################

#173 Acc:  0.19540229885057472
Ques:  what awards did jos rivera UNK win
TPath:  + awards 
PPath:  - title + title

################################

#174 Acc:  0.19428571428571428
Ques:  which political figures were awarded screen actors guild life achievement award
TPath:  - award 
PPath:  - title - is cited by

################################

here
#175 Acc:  0.19318181818181818
Ques:  what are

#211 Acc:  0.1792452830188679
Ques:  list the awards won by the spouse of harriet andersson
TPath:  + partner + awards 
PPath:  - starring + imdb id

################################

#212 Acc:  0.18309859154929578
Ques:  who was engaged in wars of fort gadsden and fort barrancas
TPath:  - battle + battle 
PPath:  - battle + battle

################################

here at no negative paths
#213 Acc:  0.18691588785046728
Ques:  where are solidus and aureus used
TPath:  - currency + currency 
PPath:  - currency + currency

################################

here
#214 Acc:  0.18604651162790697
Ques:  which company owns the airlines whose hub is in dubai
TPath:  <unk> 
PPath:  - west + official name

################################

#215 Acc:  0.18518518518518517
Ques:  which appointer of william clark is the successor of levi lincoln sr
TPath:  + successor - appointer 
PPath:  + office - title

################################

#216 Acc:  0.18433179723502305
Ques:  which UNK division is

#253 Acc:  0.1968503937007874
Ques:  was marie curie a doctoral student of henri becquerel
TPath:  + doctoral student 
PPath:  + doctoral student

################################

#254 Acc:  0.19607843137254902
Ques:  what are the baseball teams who belongs to guggenheim partners
TPath:  - owner 
PPath:  + location + leader title

################################

#255 Acc:  0.1953125
Ques:  count all the things licensed as software as a service.
TPath:  - license 
PPath:  - products and services + location country

################################

#256 Acc:  0.19455252918287938
Ques:  who is the headcoach of 2014-15 virginia tech hokies UNK basketball team
TPath:  + headcoach 
PPath:  - name + coach team

################################

#257 Acc:  0.1937984496124031
Ques:  what is the resting place of the politician who succeeded samuel hayes
TPath:  + successor + resting place 
PPath:  - alongside + birth year

################################

#258 Acc:  0.19305019305019305
Ques

#294 Acc:  0.19322033898305085
Ques:  list all the artist of the tv shows which has mcclain as one of the artist
TPath:  - artist + artist 
PPath:  - writer + artist

################################

#295 Acc:  0.19256756756756757
Ques:  who is the person whose child performed with tony bennett
TPath:  - associated acts - child 
PPath:  - starring + show name

################################

here at no negative paths
#296 Acc:  0.19528619528619529
Ques:  does the north carolina highway 280 end in arden
TPath:  + route end 
PPath:  + route end

################################

here at no negative paths
#297 Acc:  0.19798657718120805
Ques:  is angola avante the anthem of angola
TPath:  + anthem 
PPath:  + anthem

################################

#298 Acc:  0.20066889632107024
Ques:  name the river with mouth as thames estuary and passes through reading berkshire
TPath:  - river mouth + city 
PPath:  - river mouth + city

################################

#299 Acc:  0.2
Ques:  who al

#336 Acc:  0.20474777448071216
Ques:  which purpose of the maharashtra chess association is abhijit kunte is also know for
TPath:  + purpose - known for 
PPath:  + purpose - known for

################################

#337 Acc:  0.20414201183431951
Ques:  what does the famous relative of levon UNK grigorian do for a living
TPath:  - relatives + occupation 
PPath:  - relative + death date

################################

#338 Acc:  0.20353982300884957
Ques:  where is the debut team of UNK barrett located
TPath:  + debutteam + location city 
PPath:  + debut team + formation year

################################

('DEBUG:  ', '-')
#339 Acc:  0.20294117647058824
Ques:  who owns ivanpah solar power facility
TPath:  + owner 
PPath:  - is cited by

################################

#340 Acc:  0.20527859237536658
Ques:  what artist of charing cross bridge influenced dorothea sharp in her career
TPath:  + artist - influenced by 
PPath:  + artist - influenced by

############################

#377 Acc:  0.20105820105820105
Ques:  who are the politicians whose death place is ontario
TPath:  - death place 
PPath:  - city + man of the match2a

################################

#378 Acc:  0.20052770448548812
Ques:  which statesman is married to dolley madison
TPath:  - spouse 
PPath:  + birth place + status text

################################

#379 Acc:  0.2
Ques:  how many theme musics have been composed by julian gingell
TPath:  - theme music composer 
PPath:  - theme music composer + runtime

################################

#380 Acc:  0.1994750656167979
Ques:  name the monarch of l buwei
TPath:  + monarch 
PPath:  + monarch + picsize

################################

#381 Acc:  0.19895287958115182
Ques:  name the musician who was given label by celluloid records and has been associated with UNK
TPath:  - label + associated musical artist 
PPath:  - label + associated band

################################

#382 Acc:  0.19843342036553524
Ques:  what is the stylistic ori

#419 Acc:  0.20238095238095238
Ques:  what is the death location of the scientist who is the known authority of UNK
TPath:  + binomial authority + death place 
PPath:  + family + label

################################

here at no negative paths
#420 Acc:  0.2042755344418052
Ques:  cn UNK has a stadium in barcelona
TPath:  + stadium 
PPath:  + stadium

################################

#421 Acc:  0.2037914691943128
Ques:  what are some non fiction subjects dealt with in fantasy novels
TPath:  - literary genre + non fiction subject 
PPath:  - genre + gi3ds

################################

#422 Acc:  0.2033096926713948
Ques:  which companies have launched a rocket from cape canaveral air force station
TPath:  - launch site + manufacturer 
PPath:  - launch site + launch rocket

################################

#423 Acc:  0.20518867924528303
Ques:  is the frank r lillie house has the chicago architecture
TPath:  + architectural style 
PPath:  + architectural style

#####################

#461 Acc:  0.19913419913419914
Ques:  on how many subjects has random house publishers published books
TPath:  - publisher + subject 
PPath:  + parent + keypeople

################################

#462 Acc:  0.19870410367170627
Ques:  list the associated musical artist of the current members of al bano and romina power
TPath:  + current members + associated musical artist 
PPath:  - artist + country

################################

#463 Acc:  0.20043103448275862
Ques:  which UNK neighboring municipalities are cologny and UNK
TPath:  - neighboring municipalities + neighboring municipality 
PPath:  - neighboring municipalities + neighboring municipality

################################

#464 Acc:  0.2
Ques:  who is the writer of the sandman UNK
TPath:  + writers 
PPath:  - notable work + birth date

################################

#465 Acc:  0.19957081545064378
Ques:  who has a child named lori black and is resting place as palo alto california
TPath:  - children + resting place 
P

KeyboardInterrupt: 

In [None]:
# np.mean(core_chain_acc_log)

aux.id_to_word([0,1,2,3,4,5,6,7], gloveid_to_word, embeddingid_to_gloveid, remove_pad=True)