# Step 3 - Predict Relation and Finish

Our goal during this step is to predict the relation and compute the end-to-end accuracy.

In [2]:
import sys
sys.path.insert(0, '../../')

In [3]:
from scripts.utils.connect import get_connection 
from scripts.utils.data import FB2M_NAME_TABLE
from scripts.utils.data import FB2M_KG_TABLE

connection = get_connection()
cursor = connection.cursor()

In [4]:
import pandas as pd
from tqdm import tqdm_notebook

tqdm_notebook().pandas()

df = pd.read_pickle('step_2_generate_candidates.pkl')
df[:5]




Unnamed: 0,end_index,object,predicted_question_tokens,predicted_subject_names,question,question_tokens,relation,start_index,subject,subject_name,subject_name_tokens,candidate_mids,predicted_start_index,predicted_end_index,predicted_subject_name
6219,,0bs56bp,"[name, an, american, thoroughbread, racehorse]","[{'name': 'american thoroughbread', 'score': 1...",Name an American Thoroughbread racehorse,,biology/organism_classification/organisms_of_t...,,03k3r,,,"[01z1jf2, 04q7gbh]",2.0,3.0,american
3364,9.0,01sjng,"[what, kind, of, game, is, vision, racing, dri...","[{'name': 'vision racing driving simulator', '...",what kind of game is vision racing driving sim...,"[what, kind, of, game, is, vision, racing, dri...",cvg/computer_videogame/cvg_genre,5.0,02qlppc,vision racing driving simulator,"(vision, racing, driving, simulator)",[02qlppc],5.0,9.0,vision racing driving simulator
9374,6.0,0dlmm88,"[what, tv, program, is, romance, film]","[{'name': 'romance film', 'score': 28.02931404...",what tv program is romance film,"[what, tv, program, is, romance, film]",tv/tv_genre/programs,4.0,02l7c8,romance film,"(romance, film)",[02l7c8],4.0,6.0,romance film
10142,4.0,04rrx,"[what, state, is, polaski, located, in]","[{'name': 'polaski', 'score': 32.1325416564941...",what state is polaski located in,"[what, state, is, polaski, located, in]",location/location/containedby,3.0,049_zj3,polaski,"(polaski,)",[049_zj3],3.0,4.0,polaski
97,8.0,0qcr0,"[what, disease, claimed, the, life, of, fern, ...","[{'name': 'fern emmett', 'score': 23.679399490...",what disease claimed the life of fern emmett,"[what, disease, claimed, the, life, of, fern, ...",people/deceased_person/cause_of_death,6.0,02w9ycr,fern emmett,"(fern, emmett)",[02w9ycr],6.0,8.0,fern emmett


## Generate Facts

Given the candidate mids, the we generate candidate facts.

In [5]:
from collections import defaultdict

def generate_facts(row):
    cursor.execute("""SELECT subject_mid, relation, object_mid
                      FROM {kg}
                      WHERE subject_mid = ANY(%s)""".format(kg=FB2M_KG_TABLE), (row['candidate_mids'],))
    rows = cursor.fetchall()
    candidate_facts = defaultdict(lambda: defaultdict(set))
    
    for subject_mid, relation, object_mid in rows:
        candidate_facts[relation][subject_mid].add(object_mid)
        
    return candidate_facts

In [6]:
df['candidate_facts'] = df.progress_apply(generate_facts, axis=1)




## Upperbounds

Check the accuracy of the candidates.

In [7]:
from tqdm import tqdm_notebook

correct_object = 0
correct_relation = 0
correct_subject = 0
for index, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    facts = row['candidate_facts']

    if row['relation'] in facts:
        correct_relation += 1
        if row['subject'] in facts[row['relation']]:
            correct_subject += 1
            if row['object'] in facts[row['relation']][row['subject']]:
                correct_object += 1    
        

print('Object Canditate Accuracy:', correct_object / df.shape[0])
print('Relation Canditate Accuracy:', correct_relation / df.shape[0])
print('Subject Canditate Accuracy:', correct_subject / df.shape[0])


Object Canditate Accuracy: 0.9544490548639927
Relation Canditate Accuracy: 0.9673582295988935
Subject Canditate Accuracy: 0.9544490548639927


## Baseline Relation Model

Load the baseline relation model.

TODO: Replace with <e> model and spacy_tokenize

TODO: Think about the probablistics <e> model stuff
    
TODO: Test a ranking model based on candidate aliases

TODO: Check the best accuracy for relation classification with <e> if you guess the top relation
    
TODO: Clarify that <e> each has a bias toward some relation, we can compute that by looking at the distribution of FB2M

In [8]:
from functools import lru_cache
from lib.checkpoint import Checkpoint
from torch.autograd import Variable
import math
import torch


BASELINE_RELATION_CLASSIFIER = '../../results/0756.12-22_15:25:19.relation_classifier/12m_22d_15h_37m_20s.pt'

baseline_relation_classifer = Checkpoint(checkpoint_path=BASELINE_RELATION_CLASSIFIER, device=0)
baseline_relation_classifer.model.train(mode=False)

cuda = lambda v: v.cuda() if torch.cuda.is_available() else t
to_variable = lambda e: cuda(Variable(torch.LongTensor(e).unsqueeze(1).contiguous()))

@lru_cache(maxsize=16384)
def model(question):
    encoded = baseline_relation_classifer.input_text_encoder.encode(question)
    encoded = to_variable(encoded)
    encoded_length = torch.LongTensor([encoded.size()[0]])
    output_batch = baseline_relation_classifer.model(encoded, encoded_length)[0]
    output_batch = output_batch.squeeze(0)
    return output_batch.data

@lru_cache(maxsize=16384)
def get_baseline_relation_score(question, relation):
    relation = 'www.freebase.com' + relation
    question = question.lower()
    output_batch = model(question)
    relation_index = baseline_relation_classifer.output_text_encoder.encode(relation)[0]
    return math.exp(output_batch[relation_index])
    
## Test ##
question = 'Where was Obama born?'
print('Question:', question)
print('Scores:')
print(get_baseline_relation_score(question, '/people/person/place_of_birth'))
print(get_baseline_relation_score(question, '/location/location/people_born_here'))

Question: Where was Obama born?
Scores:


AttributeError: 'Checkpoint' object has no attribute 'input_text_encoder'

## Relation Model Yu et Al.

In [10]:
import re
import torch
import pprint
from torch.autograd import Variable
from functools import lru_cache
from lib.checkpoint import Checkpoint
from lib.utils import pad_batch

RELATION_CLASSIFIER = '../../scripts/Simple QA Models/logs/5611.01-22_13:34:57.yu_relation_model/01m_22d_14h_53m_40s.pt'

pretty_printer = pprint.PrettyPrinter(indent=2)

relation_classifier = Checkpoint(checkpoint_path=RELATION_CLASSIFIER, device=0)
relation_classifier.model.relation_word_rnn.flatten_parameters()
relation_classifier.model.text_rnn_layer_one.flatten_parameters()
relation_classifier.model.text_rnn_layer_two.flatten_parameters()
relation_classifier.model.relation_rnn.flatten_parameters()
relation_classifier.model.train(mode=False)

cuda = lambda v: v.cuda() if torch.cuda.is_available() else t
to_variable = lambda b: cuda(Variable(torch.stack(b).t_().contiguous(), volatile=True))
    
def get_relation_scores(questions, relations):
    questions = [q for q in questions]
    questions_encoded, _ = pad_batch([relation_classifier.text_encoder.encode(q) for q in questions])
    relations_encoded, _ = pad_batch([relation_classifier.relation_encoder.encode(r) for r in relations])
    relations_word_encoded, _ = pad_batch([relation_classifier.relation_word_encoder.encode(r) for r in relations])

    questions_encoded = to_variable(questions_encoded)
    relations_encoded = to_variable(relations_encoded)
    relations_word_encoded = to_variable(relations_word_encoded)

    return relation_classifier.model(questions_encoded, relations_encoded, relations_word_encoded).data

# To test this cell
question = 'where was <e> born ?'
print('Question:', question)
# print('Scores:')
print(get_relation_scores([question], ['people/person/place_of_birth']))
print(get_relation_scores([question], ['location/location/people_born_here']))

## TODO: Check the model agains the dev.txt data
# 0.882065468
# 9566 should be correct! not 9500

Question: where was <e> born ?

 0.6998
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6154
[torch.cuda.FloatTensor of size 1 (GPU 0)]



## Final End-To-End Metric

Given candidate facts compute the end-to-end metric.

In [None]:
def format_question(row):
    formatted_question =  ''
    for i, token in enumerate(row['predicted_question_tokens']):
        if i == row['predicted_start_index']:
            formatted_question += '<e>'
        elif i > row['predicted_start_index'] and i < row['predicted_end_index']:
            continue
        else:
            formatted_question += token.lower().strip()
        formatted_question += ' '
    formatted_question = formatted_question.strip()
    return formatted_question

In [16]:
from tqdm import tqdm_notebook
from numpy import nan
import pandas as pd
from collections import defaultdict
import random

subject_and_relation_correct = 0
object_correct = 0
relation_correct = 0
subject_mid_correct = 0
for index, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    if len(row['candidate_facts']) != 0:
        question = ' '.join(row['predicted_question_tokens']) # format_question(row)
        candidate_relations = list(row['candidate_facts'].keys())
        questions, relations = zip(*[(question, r) for r in candidate_relations])
        
        scores = get_relation_scores(questions, relations)
        max_relation = max([(r, i) for i, r in enumerate(candidate_relations)],
                           key=lambda item: scores[item[1]])[0]
        
        # We use the `Better than random guessing` from notebook 
        # `HYPOTHESIS - Question Refers to Multiple Subjects`.
        # subject_mid, object_mids = random.sample(row['candidate_facts'][max_relation].items(), 1)[0]
        subject_mid, object_mids = sorted(row['candidate_facts'][max_relation].items(),
                                          key=lambda i: len(i[1]), reverse=True)[0]

        if max_relation == row['relation'] and subject_mid == row['subject']:
            subject_and_relation_correct += 1
        if row['object'] in object_mids:
            object_correct += 1
        if max_relation == row['relation']:
            relation_correct += 1
        if subject_mid == row['subject']:
            subject_mid_correct += 1

print('Subject & Relation Accuracy (SOTA 78.7%%): %f [%d of %d]' %
          (subject_and_relation_correct / df.shape[0], subject_and_relation_correct, df.shape[0]))
print('Object Accuracy: %f [%d of %d]' %
          (object_correct / df.shape[0], object_correct, df.shape[0]))
print('Relation Accuracy (SOTA 89%%): %f [%d of %d]' %
          (relation_correct / df.shape[0], relation_correct, df.shape[0]))
print('Subject Accuracy (SOTA 79%%): %f [%d of %d]' %
          (subject_mid_correct / df.shape[0], subject_mid_correct, df.shape[0]))


Subject & Relation Accuracy (SOTA 78.7%): 0.784417 [8507 of 10845]
Object Accuracy: 0.837621 [9084 of 10845]
Relation Accuracy (SOTA 89%): 0.878930 [9532 of 10845]
Subject Accuracy (SOTA 79%): 0.835039 [9056 of 10845]
