# Step 3 - Predict Relation and Finish

Our goal during this step is to predict the relation and compute the end-to-end accuracy.

In [2]:
import sys
sys.path.insert(0, '../../')

In [5]:
from scripts.utils.connect import get_connection 
from scripts.utils.data import FB2M_NAME_TABLE

connection = get_connection()
cursor = connection.cursor()

In [6]:
import pandas as pd
from tqdm import tqdm_notebook

tqdm_notebook().pandas()

df = pd.read_pickle('step_2_generate_candidates.pkl')
df[:5]




Unnamed: 0,end_index,object,predicted_subject_names,question,question_tokens,relation,start_index,subject,subject_name,subject_name_tokens,candidate_mids,predicted_start_index,predicted_end_index,predicted_subject_name
6219,,0bs56bp,"[{'name': 'american thoroughbread', 'score': 1...",Name an American Thoroughbread racehorse,"[name, an, american, thoroughbread, racehorse]",biology/organism_classification/organisms_of_t...,,03k3r,,,"[01z1jf2, 04q7gbh]",2.0,3.0,american
3364,9.0,01sjng,"[{'name': 'vision racing driving simulator', '...",what kind of game is vision racing driving sim...,"[what, kind, of, game, is, vision, racing, dri...",cvg/computer_videogame/cvg_genre,5.0,02qlppc,vision racing driving simulator,"(vision, racing, driving, simulator)",[02qlppc],5.0,9.0,vision racing driving simulator
9374,6.0,0dlmm88,"[{'name': 'romance film', 'score': 28.02931404...",what tv program is romance film,"[what, tv, program, is, romance, film]",tv/tv_genre/programs,4.0,02l7c8,romance film,"(romance, film)",[02l7c8],4.0,6.0,romance film
10142,4.0,04rrx,"[{'name': 'polaski', 'score': 32.1325416564941...",what state is polaski located in,"[what, state, is, polaski, located, in]",location/location/containedby,3.0,049_zj3,polaski,"(polaski,)",[049_zj3],3.0,4.0,polaski
97,8.0,0qcr0,"[{'name': 'fern emmett', 'score': 23.679399490...",what disease claimed the life of fern emmett,"[what, disease, claimed, the, life, of, fern, ...",people/deceased_person/cause_of_death,6.0,02w9ycr,fern emmett,"(fern, emmett)",[02w9ycr],6.0,8.0,fern emmett


## Generate Facts

Given the candidate mids, the we generate candidate facts.

In [27]:
from collections import defaultdict

def generate_facts(row):
    cursor.execute("""SELECT object_mid, relation, subject_mid
                      FROM fb_two_kg
                      WHERE subject_mid = ANY(%s)""", (row['candidate_mids'],))
    rows = cursor.fetchall()
    candidate_facts = defaultdict(lambda: defaultdict(set))
    
    for object_mid, relation, subject_mid in rows:
        candidate_facts[relation][subject_mid].add(object_mid)
        
    return candidate_facts

In [28]:
df['candidate_facts'] = df.progress_apply(generate_facts, axis=1)




## Upperbounds

Check the accuracy of the candidates.

In [21]:
from tqdm import tqdm_notebook

correct_object = 0
correct_relation = 0
correct_subject = 0
for index, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    facts = row['candidate_facts']

    if row['relation'] in facts:
        correct_relation += 1
        if row['subject'] in facts[row['relation']]:
            correct_subject += 1
            if row['object'] in facts[row['relation']][row['subject']]:
                correct_object += 1    
        

print('Object Canditate Accuracy:', correct_object / df.shape[0])
print('Relation Canditate Accuracy:', correct_relation / df.shape[0])
print('Subject Canditate Accuracy:', correct_subject / df.shape[0])


Object Canditate Accuracy: 0.9550023052097741
Relation Canditate Accuracy: 0.9675426463808207
Subject Canditate Accuracy: 0.9550023052097741


## Baseline Relation Model

Load the baseline relation model.

TODO: Replace with <e> model and spacy_tokenize

TODO: Think about the probablistics <e> model stuff
    
TODO: Test a ranking model based on candidate aliases

TODO: Check the best accuracy for relation classification with <e> if you guess the top relation
    
TODO: Clarify that <e> each has a bias toward some relation, we can compute that by looking at the distribution of FB2M

In [29]:
from functools import lru_cache
from lib.checkpoint import Checkpoint
from torch.autograd import Variable
import math
import torch

BASELINE_RELATION_CLASSIFIER = '../../results/0756.12-22_15:25:19.relation_classifier/12m_22d_15h_37m_20s.pt'

baseline_relation_classifer = Checkpoint(checkpoint_path=BASELINE_RELATION_CLASSIFIER, device=0)
baseline_relation_classifer.model.train(mode=False)

cuda = lambda v: v.cuda() if torch.cuda.is_available() else t
to_variable = lambda e: cuda(Variable(torch.LongTensor(e).unsqueeze(1).contiguous()))

@lru_cache(maxsize=16384)
def model(question):
    encoded = baseline_relation_classifer.input_text_encoder.encode(question)
    encoded = to_variable(encoded)
    encoded_length = torch.LongTensor([encoded.size()[0]])
    output_batch = baseline_relation_classifer.model(encoded, encoded_length)[0]
    output_batch = output_batch.squeeze(0)
    return output_batch.data

@lru_cache(maxsize=16384)
def get_baseline_relation_score(question, relation):
    relation = 'www.freebase.com' + relation
    question = question.lower()
    output_batch = model(question)
    relation_index = baseline_relation_classifer.output_text_encoder.encode(relation)[0]
    return math.exp(output_batch[relation_index])
    
## Test ##
question = 'Where was Obama born?'
print('Question:', question)
print('Scores:')
print(get_baseline_relation_score(question, '/people/person/place_of_birth'))
print(get_baseline_relation_score(question, '/location/location/people_born_here'))



Question: Where was Obama born?
Scores:
0.8835899947877679
0.11019587285726916


  scores = F.log_softmax(output)


## Relation Model Yu et Al.

In [16]:
import re
import torch
from torch.autograd import Variable

RELATION_CLASSIFIER = '../../logs/2785.01-01_17:32:06.yu_relation_model/01m_01d_17h_54m_53s.pt'

import pprint

from nltk.tokenize.treebank import TreebankWordTokenizer

from lib.checkpoint import Checkpoint

pretty_printer = pprint.PrettyPrinter(indent=2)
tokenizer = TreebankWordTokenizer() # Same tokenizer used during training

relation_classifier = Checkpoint(checkpoint_path=RELATION_CLASSIFIER, device=0)
relation_classifier.model.relation_word_rnn.flatten_parameters()
relation_classifier.model.relation_rnn.flatten_parameters()
relation_classifier.model.text_rnn.flatten_parameters()
relation_classifier.model.train(mode=False)

cuda = lambda v: v.cuda() if torch.cuda.is_available() else t
to_variable = lambda e: cuda(Variable(torch.LongTensor(e).unsqueeze(1).contiguous()))
    
def get_relation_score(question, relation):
    question = question.lower()
    question_encoded = relation_classifier.text_encoder.encode(question)
    relation_encoded = relation_classifier.relation_encoder.encode(relation)
    relation_word_encoded = relation_classifier.relation_word_encoder.encode(relation)

    question_encoded = to_variable(question_encoded)
    relation_encoded = to_variable(relation_encoded)
    relation_word_encoded = to_variable(relation_word_encoded)

    return relation_classifier.model(question_encoded, relation_encoded, relation_word_encoded).data[0]

# To test this cell
question = 'where was #head_entity# born ?'
print('Question:', question)
print('Scores:')
print(get_relation_score(question, '/people/person/place_of_birth'))
print(get_relation_score(question, '/location/location/people_born_here'))

Question: where was #head_entity# born ?
Scores:
0.7156175971031189
0.6886991858482361


## Final End-To-End Metric

Given candidate facts compute the end-to-end metric.

In [39]:
from tqdm import tqdm_notebook
from numpy import nan
import pandas as pd
from collections import defaultdict
import random

subject_and_relation_correct = 0
object_correct = 0
relation_correct = 0
subject_mid_correct = 0
for index, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    if len(row['candidate_facts']) != 0:
        # TODO: Multiple relations are correct given any question; therefore, its important for us to compute
        # the probabilty that one of the correct relations. 
        # Model approximates the P(r | q)
        # Should we approximate P(r) instead
        # We should not approximate P(r | q, e) because P(r | e) is exactly known
        # QUESTION: Is it important to consider multiple relations, incase for a particular E, one relation makes
        # much more sense? -- Multiple relations -> P(r | q) * P(r | e) 
        # = P(r and q) / P(q) * P(r and e) / P(e)
        # = P(r and q and e) / P(q and e)
        # = P(r | q, e) 
        # Multiple them together?
        # TODO: Compute the upperbound for relation accuracy given half the question templates overlap
#         max_relation = max([r for r in row['candidate_facts']],
#                            key=lambda r: get_baseline_relation_score(row['question'], '/' + r))
        
        # We use the `Better than random guessing` from notebook 
        # `HYPOTHESIS - Question Refers to Multiple Subjects`.
        # subject_mid, object_mids = random.sample(row['candidate_facts'][max_relation].items(), 1)[0]
#         subject_mid, object_mids = sorted(row['candidate_facts'][max_relation].items(),
#                                           key=lambda i: len(i[1]), reverse=True)[0]
        
        max_score = 0
        max_subject_mid = None
        max_relation = None
        max_object_mids = None
        
        for r in row['candidate_facts']:
            probability_relation_given_question = get_baseline_relation_score(row['question'], '/' + r)
            for subject_mid in row['candidate_facts'][r]:
                cursor.execute("""
                    SELECT count(*)
                    FROM fb_two_kg
                    WHERE subject_mid = %s
                """, (subject_mid,))
                n_facts = cursor.fetchall()[0][0]
                probability_relation_given_entity = len(row['candidate_facts'][r][subject_mid]) / n_facts
                score = probability_relation_given_entity * probability_relation_given_question
                if score > max_score:
                    max_score = score
                    max_object_mids = row['candidate_facts'][r][subject_mid]
                    max_subject_mid = subject_mid
                    max_relation = r

        if max_relation == row['relation'] and subject_mid == row['subject']:
            subject_and_relation_correct += 1
        if row['object'] in object_mids:
            object_correct += 1
        if max_relation == row['relation']:
            relation_correct += 1
        if subject_mid == row['subject']:
            subject_mid_correct += 1

# TODO: Look into why the relation accuracy is worse than before
# Baseline Relation Model
# Subject & Relation Accuracy: 0.776763 [8424 of 10845]
# Object Accuracy: 0.833195 [9036 of 10845]
# Relation Accuracy: 0.873490 [9473 of 10845]
# Subject MID Accuracy: 0.834025 [9045 of 10845]
            
print('Subject & Relation Accuracy (SOTA 78.7%%): %f [%d of %d]' %
          (subject_and_relation_correct / df.shape[0], subject_and_relation_correct, df.shape[0]))
print('Object Accuracy: %f [%d of %d]' %
          (object_correct / df.shape[0], object_correct, df.shape[0]))
print('Relation Accuracy (SOTA 89%%): %f [%d of %d]' %
          (relation_correct / df.shape[0], relation_correct, df.shape[0]))
print('Subject Accuracy (SOTA 79%%): %f [%d of %d]' %
          (subject_mid_correct / df.shape[0], subject_mid_correct, df.shape[0]))


Subject & Relation Accuracy (SOTA 78.7%): 0.668326 [7248 of 10845]
Object Accuracy: 0.000000 [0 of 10845]
Relation Accuracy (SOTA 89%): 0.872937 [9467 of 10845]
Subject Accuracy (SOTA 79%): 0.724297 [7855 of 10845]
