# End-to-End Freebase QA

Use the below line to import modules from the root of the repository.

Reference: https://stackoverflow.com/questions/34976803/sys-path-different-in-jupyter-and-python-how-to-import-own-modules-in-jupyter 

In [1]:
import sys
sys.path.insert(0, '../../../allennlp')
sys.path.insert(0, '../../')
import pandas as pd
from tqdm import tqdm_notebook
import random
import numpy as np
random_seed = 123
random.seed(random_seed)
np.random.seed(random_seed)
tqdm_notebook().pandas()




We use the cursor from psycopg2 to connect to PostgreSQL with our knowledge graph.

In [21]:
from scripts.utils.connect import get_connection 

connection = get_connection()
cursor = connection.cursor()

Load the Simple Questions V2 validation dataset.

In [3]:
from scripts.utils.simple_qa import load_simple_qa 
from sklearn.utils import shuffle


df_dev, = load_simple_qa(dev=True)
df_dev = shuffle(df_dev, random_state=random_seed)
df_dev[:5]

Unnamed: 0,subject,relation,object,question
6219,03k3r,biology/organism_classification/organisms_of_t...,0bs56bp,Name an American Thoroughbread racehorse
3364,02qlppc,cvg/computer_videogame/cvg_genre,01sjng,what kind of game is vision racing driving sim...
9374,02l7c8,tv/tv_genre/programs,0dlmm88,what tv program is romance film
10142,049_zj3,location/location/containedby,04rrx,what state is polaski located in
97,02w9ycr,people/deceased_person/cause_of_death,0qcr0,what disease claimed the life of fern emmett


For every example, there is not a clear subject name but we can do a fair job a linking questions with subject names only missing 2.7% of examples.

## Predict Subject Name

Using a tagging model, we are going to predict the subject name for every row.

In [10]:
from allennlp.models.archival import load_archive
from allennlp.service.predictors import Predictor
from scripts.utils.link_subject_name import tokenize
import math

SUBJECT_RECOGNITION = '../../../allennlp/experiments/crf_tagger_lstm_no_reg_3/model.tar.gz'
archive = load_archive(SUBJECT_RECOGNITION, cuda_device=0)
predictor = Predictor.from_archive(archive, 'sentence-tagger')

# TODO: Better handle multiple subject_names picking the one with the highest probability

def predict_subject_name(s):
    question_tokens = tokenize(s)
    predicted = predictor.predict_json({'sentence': ' '.join(question_tokens)}, 0)
    logit_to_probability = lambda x: 1 / (1 + math.exp(-x))
    # Confidence that a token is a entity
    confidence = [logit_to_probability(logit[1]) for logit in predicted['logits']] 
    
    predicted_subject_name = ''
    inside = False
    start_index = 0
    end_index = len(predicted['tags'])
    for i, tag in enumerate(predicted['tags']): 
        if tag == 'I' and not inside:
            predicted_subject_name = predicted['words'][i]
            inside = True
            start_index = i
        elif tag != 'I':
            if inside:
                end_index = i
            inside = False
        elif tag == 'I':
            predicted_subject_name += ' ' + predicted['words'][i]
            
    if inside:
        end_index = len(predicted['tags'])
        
    assert len(confidence) == len(question_tokens)
            
    return (predicted_subject_name,
            question_tokens,
            confidence,
            start_index,
            end_index)

predict_subject_name('what major cities does u.s. route 2 run through?')
predict_subject_name('what was arthur b. hancock, jr.\'s cause of death')

('arthur b. hancock',
 ['what',
  'was',
  'arthur',
  'b.',
  'hancock',
  ',',
  'jr.',
  "'s",
  'cause',
  'of',
  'death'],
 [0.011748453053504696,
  0.0030749839218348137,
  0.9847373594185165,
  0.7639913663356342,
  0.9820837711450371,
  0.043732789036917616,
  0.15952436461081299,
  0.08061072016615713,
  0.14452510878420124,
  0.288720534216086,
  0.17725884038983625],
 2,
 5)

In [11]:
import pandas as pd
from numpy import nan

def add_predicted_subject_name(row):
    subject_name, question, tag_confidence, start_index, end_index = predict_subject_name(row['question'])
    if len(subject_name) > 0:
        row['predicted_subject_name'] = subject_name
        row['tokenized_question'] = question
        row['tag_confidence'] = tag_confidence
        row['predicted_subject_name_start_index'] = start_index
        row['predicted_subject_name_stop_index'] = end_index
    else:
        row['predicted_subject_name'] = nan
        row['tokenized_question'] = nan
        row['tag_confidence'] = nan
        row['predicted_subject_name_start_index'] = nan
        row['predicted_subject_name_stop_index'] = nan
        
    return row

df_dev = df_dev.progress_apply(add_predicted_subject_name, axis=1)
df_dev[:5]




Unnamed: 0,subject,relation,object,question,predicted_subject_name,predicted_question,tag_confidence,predicted_subject_name_start_index,predicted_subject_name_stop_index
6219,03k3r,biology/organism_classification/organisms_of_t...,0bs56bp,Name an American Thoroughbread racehorse,American Thoroughbread,"[Name, an, American, Thoroughbread, racehorse]","[0.03852444377976815, 0.011928176725244675, 0....",2.0,4.0
3364,02qlppc,cvg/computer_videogame/cvg_genre,01sjng,what kind of game is vision racing driving sim...,vision racing driving simulator,"[what, kind, of, game, is, vision, racing, dri...","[0.009964622867763331, 0.01985006595168436, 0....",5.0,9.0
9374,02l7c8,tv/tv_genre/programs,0dlmm88,what tv program is romance film,romance film,"[what, tv, program, is, romance, film]","[0.006318914004223599, 0.005014553051472905, 0...",4.0,6.0
10142,049_zj3,location/location/containedby,04rrx,what state is polaski located in,polaski,"[what, state, is, polaski, located, in]","[0.0079512906240557, 0.03284792185161542, 0.00...",3.0,4.0
97,02w9ycr,people/deceased_person/cause_of_death,0qcr0,what disease claimed the life of fern emmett,fern emmett,"[what, disease, claimed, the, life, of, fern, ...","[0.021128480615324314, 0.38043073700318475, 0....",6.0,8.0


### Link Predicted Subject Alias to MID

#### True Alias

In [15]:
import unicodedata
import re
from nltk import word_tokenize
import pandas as pd
from scripts.utils.levenshtein import levenshtein_word_distance
from IPython.display import display
from numpy import nan

def strip_accents(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s))

def preproces(s):
    """
    Preprocess before computing the edit distance.
    
    Remove punctuation to allow for consistent tokenization 
        (e.g. who produces npr: wait wait... dont tell me! podcast?
              gunnar \"the ghost pilot\" andersson
              that's == thats)
    Represent as ASCII
        (e.g. ikkyū)
    Lower case
    Tokenize
    """
    # Represent characters in ASCII
    s = strip_accents(s)
    # Remove punctuation
    s = re.sub(r'[^\w\s]','',s)
    # Substitue multiple spaces with one
    s = re.sub('\s+', ' ', s)
    s = s.strip()
    s = s.lower()
    s = tuple(word_tokenize(s))
    return s

def levenshtein_link_alias(row, name_table='fb_two_subject_name'):
    cursor.execute('SELECT alias FROM ' + name_table + ' WHERE mid=%s', (row['subject'],))
    aliases = [a[0] for a in cursor.fetchall()]
    if len(aliases) > 0:
        distances = [levenshtein_word_distance(preproces(a), preproces(row['question']))[0] for a in aliases]
        normalized_distances = [(len(aliases[i]) - d) / len(aliases[i]) for i, d in enumerate(distances)]
        if max(normalized_distances) >= 0.8:
            subject_alias_index = max(range(len(normalized_distances)), key=lambda i: normalized_distances[i])
            subject_alias = aliases[subject_alias_index]
            return subject_alias
    return nan

df_dev['subject_name'] = df_dev.progress_apply(levenshtein_link_alias, axis=1)
print('Linked', sum(df_dev.subject_name.notnull()), 'examples')
df_dev[:5]


Linked 10640 examples


Unnamed: 0,subject,relation,object,question,predicted_subject_name,predicted_question,tag_confidence,predicted_subject_name_start_index,predicted_subject_name_stop_index,subject_name
6219,03k3r,biology/organism_classification/organisms_of_t...,0bs56bp,Name an American Thoroughbread racehorse,American Thoroughbread,"[Name, an, American, Thoroughbread, racehorse]","[0.03852444377976815, 0.011928176725244675, 0....",2.0,4.0,
3364,02qlppc,cvg/computer_videogame/cvg_genre,01sjng,what kind of game is vision racing driving sim...,vision racing driving simulator,"[what, kind, of, game, is, vision, racing, dri...","[0.009964622867763331, 0.01985006595168436, 0....",5.0,9.0,vision racing driving simulator
9374,02l7c8,tv/tv_genre/programs,0dlmm88,what tv program is romance film,romance film,"[what, tv, program, is, romance, film]","[0.006318914004223599, 0.005014553051472905, 0...",4.0,6.0,romance
10142,049_zj3,location/location/containedby,04rrx,what state is polaski located in,polaski,"[what, state, is, polaski, located, in]","[0.0079512906240557, 0.03284792185161542, 0.00...",3.0,4.0,polaski
97,02w9ycr,people/deceased_person/cause_of_death,0qcr0,what disease claimed the life of fern emmett,fern emmett,"[what, disease, claimed, the, life, of, fern, ...","[0.021128480615324314, 0.38043073700318475, 0....",6.0,8.0,fern emmett


#### Index Aliases for Canditate Generation

In [4]:
name_table = 'fb_two_subject_name'
cursor.execute('ALTER TABLE ' + name_table + ' ADD normalized_alias varchar')

ProgrammingError: column "normalized_alias" of relation "fb_two_subject_name" already exists


In [16]:
from tqdm import tqdm_notebook
import psycopg2

chunk_size = 10000

def update_chunk(rows):
    query = 'UPDATE ' + name_table + ' SET normalized_alias = %s WHERE mid = %s and alias = %s'
    psycopg2.extras.execute_batch(cursor, query, rows)

cursor.execute('SELECT mid, alias FROM ' + name_table)
rows = []
for mid, alias in tqdm_notebook(cursor.fetchall()):
    normalized_alias = ' '.join(preproces(alias))
    rows.append(tuple([normalized_alias, mid, alias]))
    
    # Insert Chunk
    if len(rows) > chunk_size:
        update_chunk(rows)
        rows = []
        
update_chunk(rows)




In [17]:
cursor.execute('CREATE INDEX ' + name_table + '_normalized_alias_trgm ON ' + name_table + ' USING gist(normalized_alias gist_trgm_ops);')
connection.commit()

#### Cached Similarity Queries 

In [27]:
from functools import lru_cache

# Allows running experiments without redoing SQL calls
@lru_cache(maxsize=65536)
def query_same_alias(alias, name_table='fb_two_subject_name'):
    cursor.execute("""
                  SELECT mid, alias
                  FROM {} 
                  WHERE normalized_alias LIKE %s""".format(name_table),
                 ('%' + alias + '%',))
    return list(cursor.fetchall())

# Allows running experiments without redoing SQL calls
@lru_cache(maxsize=65536)
def query_sub_aliases(aliases, name_table='fb_two_subject_name'):
    cursor.execute("""
                  SELECT mid, alias
                  FROM {} 
                  WHERE alias LIKE %s
                  OR alias LIKE %s
                  OR alias LIKE %s""".format(name_table),
                 ('% ' + alias, '% ' + alias + ' %', alias + ' %'))
    return list(cursor.fetchall())

# Allows running experiments without redoing SQL calls
@lru_cache(maxsize=65536)
def query_similar_aliases(alias, name_table='fb_two_subject_name'):
    cursor.execute("""
                  SELECT set_limit(0.3);
                  SELECT mid, alias, similarity(alias, %s) AS score
                  FROM {} 
                  ORDER BY score DESC
                  LIMIT 1000""".format(name_table, alias),
                 (alias,))
    return list(cursor.fetchall())

#### Candidate Generation Strategies

In [29]:
from collections import defaultdict
from nltk.corpus import stopwords
import string
import numpy as np

stopwords = set(stopwords.words('english'))

correct = 0
skipped = 0
print_index = 0
def add_candidates(row):
    global correct
    global skipped
    global print_index
    if isinstance(row['predicted_subject_name'], str):
        candidates = defaultdict(list)
        candidates_mids = []
        predicted_subject_name = ' '.join(preproces(row['predicted_subject_name']))
        
        # NOTE: Try querying the exact same alias
        query_rows = query_same_alias(predicted_subject_name)
        for mid, alias in query_rows:
            candidates_mids.append(mid)
            candidates[alias].append(mid)
            
        if len(query_rows) > 0 and row['subject'] not in candidates_mids and isinstance(row['subject_name'], str):
            print_index += 1
            print('| ' + str(print_index) + ' | ' + 
                  ' | '.join([row['question'], predicted_subject_name, row['subject_name'], str(len(query_rows))])
                 + ' | |')
        
        if row['subject'] in candidates_mids:
            correct += 1
            
        if len(query_rows) == 0:
            skipped += 1

        return candidates
    else:
        skipped += 1
        return []
    
df_dev['candidates'] = df_dev.progress_apply(add_candidates, axis=1)

print('Canditate Accuracy: %f [%d of %d]' % (correct / df_dev.shape[0], correct, df_dev.shape[0]))    
print('Strategy Accuracy: %f [%d of %d]' % (correct / (df_dev.shape[0] - skipped), correct,
                                            (df_dev.shape[0] - skipped)))    
print('Skipped Rows: %f [%d of %d]' % (skipped / df_dev.shape[0], skipped, df_dev.shape[0]))       

| 1 | Name an album released by aaron carter | aaron carter | album | 6 | |
| 2 | what is a tablet used to treat hypertension  | hypertension | tablet | 28 | |
| 3 | What's a soundtrack written by anthony marinelli | anthony marinelli | soundtrack | 2 | |
| 4 | which karl may characters was a tribal chief? | karl may | tribal chief | 4 | |
| 5 | name an album by George Canyon | george canyon | album | 2 | |
| 6 | What is a compilation album by creedence clearwater revival | compilation album | creedence | 16 | |
| 7 | what is one of the master's powers  | the master | master | 425 | |
| 8 | What newspaper circulates in the town of kearny | kearny | t-town | 22 | |
| 9 | what animated cartoon was about ducks? | ducks | animated cartoon | 62 | |
| 10 | What's the soundtrack for s.cry.ed | scryed | soundtrack | 4 | |
| 11 | name a location contained in peoria county, illinois. | peoria county illinois | peoria county | 20 | |
| 12 | which fantasy series were written by mercedes lackey? | 

| 98 | Name a 2002 Indian Tamil romantic biographical film film written and directed by Vasanthabalan | vasanthabalan | biographical film | 1 | |
| 99 | that's amore by dean martin, is what kind of album | dean martin | that's amore | 76 | |
| 100 | Which bollywood film stars Menekka Arora and Arun Bali | arun bali | bollywood | 1 | |
| 101 | What's a book written about the great wall of china | the great wall of china | great wall of china | 6 | |
| 102 | what is the miniature version of the poodle | the poodle | poodle | 2 | |
Canditate Accuracy: 0.936284 [10154 of 10845]
Strategy Accuracy: 0.983915 [10154 of 10320]
Skipped Rows: 0.048409 [525 of 10845]


#### Baseline Candidate Generation via Exact Match

Statistics:
- Canditate Accuracy: 0.852928 [9250 of 10845]
- Strategy Accuracy: 0.977285 [9250 of 9465]
- Skipped Rows: 0.127248 [1380 of 10845]

Discussion:

27 of 50 errors are due to longer matches; therefore, our next candidate generation model will take into account longer candidates.

Error Bucket:

| Index | Question | Predicted Subject | Subject Name | Number of Candidates | Bucket
|:--|:--|:--|:--|
| 1 | Name an album released by aaron carter | aaron carter | album | 2 | Wrong Span |
| 2 | which country is the commune of luxembourg in | luxembourg | commune of luxembourg | 40 | Wrong Span, Longer |
| 3 | what song was included in the hits album 6 | 6 | the hits album 6 | 8 | Wrong Span, Longer Alias |
| 4 | what is about between two women | two women | between two women | 8 | Wrong Span, Longer Alias |
| 5 | what is a tablet used to treat hypertension  | hypertension | tablet | 1 | Wrong Span|
| 6 | What's a soundtrack written by anthony marinelli | anthony marinelli | soundtrack | 2 | Wrong Span |
| 7 | which karl may characters was a tribal chief? | karl may | tribal chief | 2 | Wrong Span |
| 8 | name an album by George Canyon | george canyon | album | 2 | Wrong Span |
| 9 | what is the name of the netflix martial arts film? | martial arts | martial arts film | 46 | Wrong Span, Longer Alias |
| 10 | What is a compilation album by creedence clearwater revival | compilation album | creedence | 1 | Wrong Span |
| 11 | what is one of the master's powers  | the master | master | 19 | Wrong Span, Longer Alias, Article|
| 12 | what's one of the subjects talked about it usually begins with ayn rand | ayn rand | it usually begins with ayn rand | 11 | Wrong Span, Longer Alias |
| 13 | What newspaper circulates in the town of kearny | kearny | t-town | 2 | Wrong Span |
| 14 | which musician plays the drum kit | drum | drums | 74 | Longer alias |
| 15 | what animated cartoon was about ducks? | ducks | animated cartoon | 2 | Wrong Span |
| 16 | where is the bomb's origins | bomb | the bomb | 6 | Wrong span, Longer alias |
| 17 | what is south by java head about  | java head | south by java head | 2 | Wrong Span, Longer Alias |
| 18 | what album is released as a compilation albumby CEMA | album | compilation | 18 | Wrong Span |
| 19 | What is a book that is about arabic name | arabic | arabic name | 1 | Wrong Span, Longer Alias |
| 20 | what genre is crystal city | crystal city | the crystal city | 3 | Wrong Span, Longer Alias |
| 21 | which fantasy series were written by mercedes lackey? | fantasy | mercedes lackey | 1614 | Wrong Span |
| 22 | brian oshea performs what type of martial art | brian oshea | brian o'shea | 1 | Punctuation |
| 23 | What is the best of johnny & the hurricanes's country of origin? | johnny & the hurricanes | the best of johnny & the hurricanes | 1 | Wrong Span, Longer Alias |
| 24 | which album is released by Sham 69 | sham 69 | album | 2 | Wrong Span |
| 25 | what track was released on to hear the angels sing | the angels sing | to hear the angels sing | 1 | Wrong Span, Longer Alias |
| 26 | What's an album by soil | soil | album | 4 | Wrong Span |
| 27 | Name one of the breton people | breton | bretons | 22 | Wrong Span, Longer Alias |
| 28 | what's one of the brothers grimm's famous fairy tales | fairy tales | the brothers grimm | 8 | Wrong Span |
| 29 | What's the original version of cruisin | cruisin | cruisin' | 7 | Wrong Span, Longer Alias |
| 30 | What compilation album did olivia newton-john release in 1998? | compilation album | olivia newton-john | 1 | Wrong Span |
| 31 | what's one event that celebrates the leather subculture | leather | leather subculture | 3 | Wrong Span, Longer Alias |
| 32 | what type of film is the need for speed | the need for speed | need for speed | 3 | Wrong Span, Longer Alias |
| 33 | what is an album by Chico DeBarge? | chico debarge | album | 1 | Wrong Span |
| 34 | which church does robert jaworski, jr. ascribe to | robert jaworski | robert jaworski, jr. | 1 | Wrong Span, Longer Alias |
| 35 | what's an arcade game made by atari | arcade game | atari | 2 | Wrong Span |
| 36 | Name a novel by Vladimir Nabokov | vladimir nabokov | novel | 2 | Wrong Span |
| 37 | roy rogers restaurants in which industry? | roy rogers | roy rogers restaurants | 42 | Wrong Span |
| 38 | who is of caucasian race in babylon 5 | babylon 5 | caucasian | 4 | Wrong Span |
| 39 | what is a visual art painting | visual art | painting | 1 | Wrong Span |
| 40 | brian swimme wrote what book that dealt with  science | brian swimme | science | 1 | Wrong Span |
| 41 | Which animation did Raoul Servais direct | raoul servais | animation | 1 | Wrong Span |
| 42 | what is an album by leo sayer? | leo sayer | album | 3 | Wrong Span |
| 43 | who played the drum  in the Los Angeles rock quintet Rooney | drum | drums | 74 | Wrong Span, Longer Alias |
| 44 | What is a compilation album from 2006  | compilation album | album | 1 | Wrong Span, Longer Alias |
| 45 | who is the most influential female k-pop singer? | singer | k-pop | 399 | Wrong Span |
| 46 | what western state does contain avila place | avila | avila place | 2 | Wrong Span, Longer Alias |
| 47 | What is an album written by Johannes Brahms | johannes brahms | album | 1 | Wrong Span |
| 48 | what is the mouth name of the  conejos river  | conejos | conejos river | 1 | Wrong Span, Longer Alias |
| 49 | which language is two projects by frederick kiesler usually shown in | frederick kiesler | two projects by frederick kiesler | 1 | Wrong Span, Longer Alias |
| 50 | which film created the barefoot artist  | barefoot | the barefoot artist | 7 | Wrong Span, Longer Alias |

#### Candidate Generation via Exact Match + Longer

Statistics:
- Canditate Accuracy: 0.871093 [9447 of 10845]
- Strategy Accuracy: 0.983345 [9447 of 9607]
- Skipped Rows: 0.114154 [1238 of 10845]

Dicussion: 

All the errors are due to the wrong span. 6/50 Articles are incosistenly applied; therefore, during training we should remove them from the CRF tags on the left side. 2/50 Punctuation should be removed during our preprocessing of aliases. This is consistent with our exploratory notebook preprocess step.

The data is quite noisy around the word album. We could consider adding an edge cases specifically to deal with that word.

Error Bucket:

| Index | Question | Predicted Subject | Subject Name | Number of Candidates | Bucket
|:--|:--|:--|:--|
| 1 | Name an album released by aaron carter | aaron carter | album | 6 | Wrong Span |
| 2 | what is a tablet used to treat hypertension  | hypertension | tablet | 28 | Wrong Span |
| 3 | What's a soundtrack written by anthony marinelli | anthony marinelli | soundtrack | 2 | Wrong Span |
| 4 | which karl may characters was a tribal chief? | karl may | tribal chief | 4 | Wrong Span |
| 5 | name an album by George Canyon | george canyon | album | 2 | Wrong Span |
| 6 | What is a compilation album by creedence clearwater revival | compilation album | creedence | 16 | Wrong Span |
| 7 | what is one of the master's powers  | the master | master | 425 | Wrong Span, Shorter, Article |
| 8 | What newspaper circulates in the town of kearny | kearny | t-town | 22 | Wrong Span |
| 9 | what animated cartoon was about ducks? | ducks | animated cartoon | 57 | Wrong Span |
| 10 | What's the soundtrack for s.cry.ed | s.cry.ed | soundtrack | 2 | Wrong Span |
| 11 | which fantasy series were written by mercedes lackey? | fantasy | mercedes lackey | 2676 | Wrong Span |
| 12 | brian oshea performs what type of martial art | brian oshea | brian o'shea | 1 | Punctuation |
| 13 | which album is released by Sham 69 | sham 69 | album | 9 | Wrong Span |
| 14 | What's an album by soil | soil | album | 107 | Wrong Span, Album |
| 15 | what's one of the brothers grimm's famous fairy tales | fairy tales | the brothers grimm | 75 | Wrong Span  |
| 16 | What compilation album did olivia newton-john release in 1998? | compilation album | olivia newton-john | 16 | Wrong Span |
| 17 | who recorded ich wär so gern wie du | ich wär so gern wie du | ich wär' so gern wie du | 1 | Punctuation |
| 18 | what type of film is the need for speed | the need for speed | need for speed | 4 | Wrong Span, Shorter, Article |
| 19 | what is an album by Chico DeBarge? | chico debarge | album | 1 | Wrong Span |
| 20 | what's an arcade game made by atari | arcade game | atari | 18 | Wrong Span |
| 21 | Name a novel by Vladimir Nabokov | vladimir nabokov | novel | 7 | Wrong Span |
| 22 | who is of caucasian race in babylon 5 | babylon 5 | caucasian | 87 | Wrong Span |
| 23 | what is a visual art painting | visual art | painting | 27 | Wrong Span |
| 24 | brian swimme wrote what book that dealt with  science | brian swimme | science | 1 | Wrong Span |
| 25 | Which animation did Raoul Servais direct | raoul servais | animation | 1 | Wrong Span |
| 26 | what is an album by leo sayer? | leo sayer | album | 10 | Wrong Span |
| 27 | which country was the cosmic egg released in | the cosmic egg | cosmic egg | 1 | Wrong Span, Shorter Article |
| 28 | who is an artist that creates latin pop music | latin pop music | latin pop | 1 | Wrong Span, "Music" Shorter |
| 29 | What is a compilation album from 2006  | compilation album | album | 16 | Wrong Span, Shorter "Compilation" |
| 30 | who is the most influential female k-pop singer? | singer | k-pop | 1416 | Wrong Span |
| 31 | What is a city in Surrey County, north carolina? | surrey county | north carolina | 1 | Wrong Span |
| 32 | What is an album written by Johannes Brahms | johannes brahms | album | 14 | Wrong Span |
| 33 | what are indie films? | indie films | indie film | 1 | Plural |
| 34 | which albums were released by the century media label? | century media | album | 6 | Wrong Span |
| 35 | which airline is in the air canada jazz | air canada jazz | jazz | 1 | Wrong Span, Shorter "Air Canada" |
| 36 | Name a drug that contains serine. | drug | serine | 363 | Wrong Span |
| 37 | who recorded the tower of london | the tower of london | tower of london | 2 | Wrong Span, Shorter Article |
| 38 | Who is an artist  signed by rca records? | rca records | rca | 4 | Wrong Span |
| 39 | Which mining companies are located in Brazil  | brazil | mining | 2112 | Wrong Span |
| 40 | where was the end of the line filmed  | the end of the line | end of the line | 4 | Wrong Span, Shorter Article |
| 41 | What's a game that konami published for the wii | konami | wii | 50 | Wrong Span |
| 42 | which fear factor episode involved wrecking balls | wrecking balls | fear factor | 1 | Wrong Span  |
| 43 | What is queen's compilation album called | compilation album | queen | 16 | Wrong Span |
| 44 | Name a rowing team from Oxford | oxford | rowing | 595 | Wrong Span |
| 45 | what is the genre of the magic | the magic | magic | 550 | Shorter Article, Wrong Span |
| 46 | what horror films came out in 2002? | horror films | horror | 4 | Wrong Span, Shorter|
| 47 | who is a fictional human character from the television series Angel | angel | human | 7367 | Wrong Span |
| 48 | which peninsula campaign battles took place during the american civil war? | american civil war | peninsula campaign | 21 | Wrong Span |
| 49 | Who is the music by in destruction, inc.? | in destruction | destruction, inc. | 1 | Wrong Span, Shorter, Stop Word |
| 50 | which actor was born in sydney? | sydney | actor | 825 | Wrong Span |


#### Candidate Generation via Exact Match + Longer + Normalized Punctuation

Statistics:
- Canditate Accuracy: 0.936284 [10154 of 10845]
- Strategy Accuracy: 0.983915 [10154 of 10320]
- Skipped Rows: 0.048409 [525 of 10845]

Dicussion: 

6/50 Errors are due to articles. 1/50 is due to a plural word. 1 / 50 is due to stop words. 49/50 of them are due to the wrong span being selected.

Error Bucket:

| Index | Question | Predicted Subject | Subject Name | Number of Candidates | Bucket
|:--|:--|:--|:--|
| 1 | Name an album released by aaron carter | aaron carter | album | 6 | Wrong Span |
| 2 | what is a tablet used to treat hypertension  | hypertension | tablet | 28 | Wrong Span |
| 3 | What's a soundtrack written by anthony marinelli | anthony marinelli | soundtrack | 2 | Wrong Span |
| 4 | which karl may characters was a tribal chief? | karl may | tribal chief | 4 | Wrong Span |
| 5 | name an album by George Canyon | george canyon | album | 2 | Wrong Span |
| 6 | What is a compilation album by creedence clearwater revival | compilation album | creedence | 16 | Wrong Span |
| 7 | what is one of the master's powers  | the master | master | 425 | Wrong Span, Shorter, Article |
| 8 | What newspaper circulates in the town of kearny | kearny | t-town | 22 | Wrong Span |
| 9 | what animated cartoon was about ducks? | ducks | animated cartoon | 62 | Wrong Span |
| 10 | What's the soundtrack for s.cry.ed | scryed | soundtrack | 4 | Wrong Span |
| 11 | name a location contained in peoria county, illinois. | peoria county illinois | peoria county | 20 | Wrong Span |
| 12 | which fantasy series were written by mercedes lackey? | fantasy | mercedes lackey | 2676 | Wrong Span |
| 13 | which album is released by Sham 69 | sham 69 | album | 9 | Wrong Span |
| 14 | What's an album by soil | soil | album | 108 | Wrong Span |
| 15 | what's one of the brothers grimm's famous fairy tales | fairy tales | the brothers grimm | 75 | Wrong Span |
| 16 | What compilation album did olivia newton-john release in 1998? | compilation album | olivia newton-john | 16 | Wrong Span |
| 17 | what type of film is the need for speed | the need for speed | need for speed | 4 | Wrong Span, Shorter, Article |
| 18 | what is an album by Chico DeBarge? | chico debarge | album | 2 | Wrong Span |
| 19 | what's an arcade game made by atari | arcade game | atari | 18 | Wrong Span |
| 20 | Name a novel by Vladimir Nabokov | vladimir nabokov | novel | 7 | Wrong Span |
| 21 | who is of caucasian race in babylon 5 | babylon 5 | caucasian | 87 | Wrong Span |
| 22 | what is a visual art painting | visual art | painting | 27 | Wrong Span |
| 23 | brian swimme wrote what book that dealt with  science | brian swimme | science | 1 |  Wrong Span|
| 24 | Which animation did Raoul Servais direct | raoul servais | animation | 1 | Wrong Span |
| 25 | what is an album by leo sayer? | leo sayer | album | 10 | Wrong Span |
| 26 | which country was the cosmic egg released in | the cosmic egg | cosmic egg | 1 | Wrong Span, Shorter, Article |
| 27 | who is an artist that creates latin pop music | latin pop music | latin pop | 1 | Wrong Span, Shorter |
| 28 | What is a compilation album from 2006  | compilation album | album | 16 | Wrong Span, Shorter |
| 29 | who is the most influential female k-pop singer? | singer | k-pop | 1418 | Wrong Span |
| 30 | What is a city in Surrey County, north carolina? | surrey county | north carolina | 1 | Wrong Span |
| 31 | What is an album written by Johannes Brahms | johannes brahms | album | 15 | Wrong Span |
| 32 | what are indie films? | indie films | indie film | 1 | Plural |
| 33 | which albums were released by the century media label? | century media | album | 6 | Wrong Span |
| 34 | which airline is in the air canada jazz | air canada jazz | jazz | 1 | Wrong Span, Shorter |
| 35 | Name a drug that contains serine. | drug | serine | 364 | Wrong Span |
| 36 | who recorded the tower of london | the tower of london | tower of london | 2 | Wrong Span, Shorter, Article |
| 37 | Who is an artist  signed by rca records? | rca records | rca | 4 | Wrong Span, Shorter |
| 38 | Which mining companies are located in Brazil  | brazil | mining | 2112 | Wrong Span |
| 39 | where was the end of the line filmed  | the end of the line | end of the line | 4 | Wrong Span, Shorter, Article |
| 40 | What's a game that konami published for the wii | konami | wii | 50 | Wrong Span |
| 41 | which fear factor episode involved wrecking balls | wrecking balls | fear factor | 1 | Wrong Span  |
| 42 | What is queen's compilation album called | compilation album | queen | 16 | Wrong Span |
| 43 | Name a rowing team from Oxford | oxford | rowing | 595 | Wrong Span |
| 44 | what is the genre of the magic | the magic | magic | 550 | Wrong Span, Shorter, Article |
| 45 | what horror films came out in 2002? | horror films | horror | 4 | Wrong Span, Shorter |
| 46 | who is a fictional human character from the television series Angel | angel | human | 7982 | Wrong Span |
| 47 | which peninsula campaign battles took place during the american civil war? | american civil war | peninsula campaign | 21 | Wrong Span |
| 48 | Who is the music by in destruction, inc.? | in destruction | destruction, inc. | 1 | Wrong Span, Stop Word |
| 49 | which actor was born in sydney? | sydney | actor | 825 | Wrong Span |
| 50 | what novel is written by James Kelman | james kelman | novel | 1 | Wrong Span |

In [None]:
from functools import lru_cache

# Allows running experiments without redoing SQL calls
@lru_cache(maxsize=65536)
def cached_lookup(ngram):
    cursor.execute("""SELECT mid, alias, normalized_alias FROM fb_two_subject_name 
                  WHERE normalized_alias = %s OR 
                  normalized_alias LIKE %s""", (ngram, '%' + ngram + '%'))
    return list(cursor.fetchall())

In [None]:
@lru_cache(maxsize=65536)
def cached_similarity(ngram):
    cursor.execute("""
        SELECT set_limit(0.3);
        SELECT mid, alias, normalized_alias
        FROM fb_two_subject_name
        WHERE normalized_alias %% %s
    """, (predicted,))
    return cursor.fetchall()

In [150]:

normalize_alias('where did william wilson \\"buffalo bill\\" quinn die')
normalize_alias('where did william wilson \\"buffalo bill\\" quinn die')

"where did william wilson \\ '' buffalo bill\\ '' quinn die"

In [157]:

cursor.execute("SELECT substring_similarity('Devereaux', 'General Ricardo Wall y Devereux')");
cursor.fetchall();

ProgrammingError: function substring_similarity(unknown, unknown) does not exist
LINE 1: SELECT substring_similarity('Devereaux', 'General Ricardo Wa...
               ^
HINT:  No function matches the given name and argument types. You might need to add explicit type casts.


In [196]:
# TODO: Ask Luke about the TopK sequences for a CRF and how to get them
from collections import defaultdict
from tqdm import tqdm_notebook
import numpy as np
import pandas as pd
import random
from scripts.utils.link_subject_name import get_alias_in_sentence
from scripts.utils.link_subject_name import normalize_alias
from functools import reduce


is_alias_in_sentence = lambda setence, alias: not np.isnan(get_alias_in_sentence(setence, [alias])[0])
stopwords = ['the', 'a', 'an', 'in']
remove_stopwords = lambda normalized_alias: ' '.join([w for w in normalized_alias.split() if w not in stopwords]).strip()

correct = 0
predicted_mids_column = []
total_aliases = 0
features = []

for row_index, row in tqdm_notebook(df_dev.iterrows(), total=df_dev.shape[0]):
    predicted_mids = []
    predicted = normalize_alias(row['predicted_subject_name'].split())
#     if len(predicted) > 0:
#         for i in range(len(predicted)):
#             # CASE: Added a prediction, move on
#             if len(predicted_mids_column) - 1 == row_index:
#                 break;
            
#             gram_size = len(predicted) - i
#             ngrams = list(get_ngrams(list(range(len(predicted))), gram_size))
#             score = lambda ngram: reduce(lambda x, y: x * y, [
#                 row['tag_confidence'][i + row['predicted_subject_name_start_index']] for i in ngram])
#             # Get the ngram with the highest probability
#             ngrams = sorted(ngrams, key=score, reverse=True)
#             for ngram in ngrams:
#                 # Convert ngram from indicies -> words -> phrase
#                 ngram = ' '.join([predicted[i] for i in ngram])
#                 # NOTE: We found that 97% of the time the alias is an exact match with stemming and removing possesives
#                 # and 0.58% there is a one character difference punctuation
#                 candidates = cached_lookup(ngram)
#                 normalized_aliases = set([normalized_alias for (_, _, normalized_alias) in candidates])
#                 normalized_question = ' ' + normalize_alias(row['question']) +  ' '
#                 normalized_aliases = [a for a in normalized_aliases
#                                       if fuzzy_substring(' ' + a + ' ', normalized_question) / (len(a) + 2) < 0.05]
                
#                 if len(normalized_aliases) > 0:
#                     get_length = lambda normalized_alias: len(remove_stopwords(normalized_alias))
#                     max_length_candidate = max(normalized_aliases, key=get_length)
#                     max_length = get_length(max_length_candidate)
#                     top_candidates = set([a for a in normalized_aliases if get_length(a) == max_length])
#                     aliases = set([alias for (_, alias, normalized_alias) in candidates 
#                                    if normalized_alias in top_candidates])
#                     predicted_mids = set([mid for (mid, _, normalized_alias) in candidates 
#                                    if normalized_alias in top_candidates])
#                     predicted_mids_column.append(predicted_mids)
#                     total_aliases += len(aliases)

#                     if row['subject'] not in predicted_mids:
#                         print('WRONG CANDIDATE')
#                         print('Predicted NGram:', ngram)
#                         print('Predicted Aliases:', aliases)
#                         print('Predicted MIDS:', predicted_mids)
#                         print('Max Score:', max_length)
#                     break;
#     else:
#         print('EMPTY SUBJECT PREDICTION')
#         pass


    if len(predicted) > 0:
        # NOTE: Articles are sometimes added by the model as part of the entity
        # We remove them because they are not consistently added or substracted by Freebase
        predicted = predicted.split()
        if predicted[0] in ['a', 'the', 'an']:
            predicted = predicted[1:]
        predicted = ' '.join(predicted)
        
        def filter_candidates(candidates):
            ### Filter candidates based on similarity to question ###
            if len(candidates) == 0:
                return []
            normalized_aliases = list(set([normalized_alias for (_, _, normalized_alias) in candidates]))
            question_tokens = tuple(normalize_alias(row['question']).split())
            question_distance_feature = lambda a: (len(a) - fuzzy_word(tuple(a.split()), question_tokens)) / len(a)
            normalized_aliases = set([a for a in normalized_aliases if question_distance_feature(a) > 0.9])
            return [c for c in candidates if c[2] in normalized_aliases]    
        
        ### Get candidates with high recall. ###
        # Depending on how confident it is, we look it up exactly? That should give us breathing room
        candidates = cached_lookup(predicted)
        candidates = filter_candidates(candidates)
        if len(candidates) == 0:
            candidates = cached_similarity(predicted)
            candidates = filter_candidates(candidates)
            
        predicted_mids = [mid for (mid, _, _) in candidates]
        normalized_aliases = set([normalized_alias for (_, _, normalized_alias) in candidates])
        predicted_mids_column.append(predicted_mids)
        if isinstance(row['subject_name'], str) and row['subject'] not in predicted_mids:
            print('Question Distance:', question_distance_feature(normalize_alias(row['subject_name'])))
        if row['subject'] in predicted_mids and len(normalized_aliases) > 1:
            print('aliases:', normalized_aliases)
            print('question:', row['question'])
            print('predicted:', predicted)
            print('correct:', row['subject_name'])
            print()
            
        total_aliases += len(normalized_aliases)          
    else: 
        predicted_mids_column.append([])
        print('Empty Predicted')
        
        
        
        # Features used for scoring
#         if len(normalized_aliases) > 0:
#             predicted_distance_feature = lambda a: ((max(len(predicted), len(a)) - 
#                                                     fuzzy_word(tuple(a.split()), tuple(predicted.split())))
#                                                     / max(len(predicted), len(a)))
#             get_features = lambda a: (question_distance_feature(a), predicted_distance_feature(a),  len(a))
#             features = [get_features(a) for a in normalized_aliases]

#             scores = [predicted_distance * length for (_, predicted_distance, length) in features]
#             max_score = max(scores)
#             top_aliases = set([normalized_aliases[i] for i, s in enumerate(scores) if s == max_score])
#         else:
#             top_aliases = []
            
#         if normalize_alias(row['subject_name']) in top_aliases:
#             correct += 1
#         elif isinstance(row['subject_name'], str):
#             print('Question:', row['question'])
#             print('Top alises:', top_aliases)
#             print('Top Score:', max_score)
#             print('True Subject Name:', row['subject_name'])
#             print('Predicted Subject Name:', predicted)
#             print('Number of rows:', len(features))
#             print('Correct:', get_features(normalize_alias(row['subject_name'])))
#             print('Wrong:', [get_features(a) for a in top_aliases])
#             print()
        

#     predicted = normalize_alias(row['predicted_subject_name'].split())
#     cursor.execute('SELECT mid, normalized_alias FROM fb_two_name WHERE normalized_alias = %s', (predicted, ))
#     rows = list(cursor.fetchall())
#     predicted_mids = set([row[0] for row in rows])
#     aliases = set([r[1] for r in rows])

#     if len(predicted_mids) == 0:
#         # CASE: Missing words on left or right causing no matches
#         cursor.execute("""
#             SELECT set_limit(0.3);
#             SELECT mid, normalized_alias, similarity(normalized_alias, %s) AS similarity_score 
#             FROM fb_two_name
#             WHERE normalized_alias %% %s
#         """, (predicted, predicted))
#         rows = list(cursor.fetchall())
#         max_similarity_score = max(row[2] for row in rows)
#         predicted_mids = set([r[0] for r in rows if r[2] == max_similarity_score])
#         aliases = set([r[1] for r in rows if r[2] == max_similarity_score])
#     predicted_mids_column.append(predicted_mids)
#     total_aliases += len(aliases)
    
    if row['subject'] in predicted_mids:
        correct += 1
    else:
        print('Question:', row['question'])
        print('Subject MID:', row['subject'])
        cursor.execute('SELECT alias, normalized_alias FROM fb_two_subject_name WHERE mid = %s', (row['subject'],))
        aliases = cursor.fetchall()
        print('Subject Alises:', aliases)
        print('Subject Name:', row['subject_name'])
        print('Predicted Confidence:', row['tag_confidence'])
        print('Predicted:', predicted)
        print('strategy:', strategy)
        print('Num Predicted MIDS:', len(predicted_mids))
        if isinstance(row['subject_name'], str):
            print('1True' if normalize_alias(row['subject_name']) in predicted else '1False')
        print('Nan and None:',predicted, 'True' if len(predicted) == 0 and not isinstance(row['subject_name'], str) else 'False')
        print('WHOOPS')
        print()

# 0.968 ~ base?
# 0.973997 ~ Similarity 0.3
# 0.973721 ~ 1 token
# 0.975657 ~ Similarity 0.2
# 0.976763 ~ Similarity 0.2 + Remove articles

# EXPERIMENT Best linking strategy ~
# normalized_alias: 0.625700 expected linking accuracy / 0.922914 accuracy
# normalized_alias + similarity max filter: 0.653346 | 0.956478 / Average N Aliases: 1.012633
# normalized_alias + similarity: 0.627181 | ~0.967358
# normalized_alias: 0.635924 expected linking accuracy / 0.933241 accuracy
# normalized_alias + ngrams: 0.650822 expected linking accuracy / 0.955187 accuracy / Average N Aliases: 1.189212
# question distance 0.95: 0.628252 expected linking accuracy / 0.961457 accuracy /Average N Aliases: 1.102536
# question distance 0.9: 0.626057 expected linking accuracy / 0.965698 accuracy / Average N Aliases: 1.179991

print('Accuracy: %f [%d of %d]' % (correct / df_dev.shape[0], correct, df_dev.shape[0]))
print('Average N Aliases: %f [%d of %d]' % (total_aliases / df_dev.shape[0], total_aliases, df_dev.shape[0]))
df_dev['predicted_mids'] = pd.Series(predicted_mids_column, index=df_dev.index)
expected_accuracy = 0
for index, row in df_dev.iterrows():
    if row['subject'] in row['predicted_mids']:
        expected_accuracy += 1 / len(row['predicted_mids'])
print('Expected Linking Accuracy: %f' % (expected_accuracy / df_dev.shape[0]))

df_dev[:5]

aliases: {'bon voyag', 'bon voyag !'}
question: What is the language of the film bon voyage?
predicted: bon voyag
correct: bon voyage

aliases: {'u.s. rout 24', 'u.s. rout 22', 'u.s. rout 29', 'u.s. rout 27', 'u.s. rout 20', 'u.s. rout 23', 'u.s. rout 26', 'u.s. rout 21', 'u.s. rout 2'}
question: what major cities does u.s. route 2 run through
predicted: u.s. rout 2
correct: u.s. route 2

aliases: {'australia', 'v australia'}
question: which recordings contains the composition australia?
predicted: australia
correct: australia

aliases: {'riot grrrl', 'riot', 'music'}
question: what artist creates riot grrrl music
predicted: riot grrrl music
correct: riot grrrl

aliases: {'the invad', 'invad'}
question: What is the language of the invaders?
predicted: invad
correct: the invaders

aliases: {'the thriller', 'thriller'}
question: What is the name of a title found under the thriller (genre)
predicted: thriller
correct: thriller

aliases: {'frome', 'ethan frome', 'ethan', 'seri'}
question: 

aliases: {'ak soundtrack', 'π soundtrack', 'soundtrack', '3 soundtrack', 'x soundtrack'}
question: what's a name of a soundtrack
predicted: soundtrack
correct: soundtrack

aliases: {'stanford univers .', 'stanford univers'}
question: What is a  fraternity or sorority at stanford university?
predicted: stanford univers
correct: stanford university

Question: What newspaper circulates in the town of kearny
Subject MID: 013kcv
Subject Alises: [('wagoner county / tulsa city', 'wagon counti / tulsa citi'), ('tulsa', 'tulsa'), ('tulsa, oklahoma', 'tulsa , oklahoma'), ('t-town', 't-town')]
Subject Name: nan
Predicted Confidence: [0.9749530105604103, 0.8485470333991963, 0.9496939538555046, 0.8066772969914706, 0.8602264923964622, 0.6650237234745787, 0.917764141218179, 0.9986309223015929]
Predicted: kearni
strategy: same or bigger
Num Predicted MIDS: 2
Nan and None: kearni False
WHOOPS

Empty Predicted
Question: Name a country who has constitutional republic as a form of government.
Subject MID:

aliases: {'magic', 'the magic 7'}
question: what is the genre of the magic
predicted: magic
correct: magic

Question Distance: 1.0
Question: What's an album featuring music by ella fitzgerald
Subject MID: 02lx2r
Subject Alises: [('album', 'album')]
Subject Name: album
Predicted Confidence: [0.9749941107730988, 0.977765011018162, 0.9864322996471675, 0.967608339603163, 0.9376750532532405, 0.8830168507014754, 0.9920100478248248, 0.8751860914493533, 0.6255055041901164]
Predicted: ella fitzgerald
strategy: same or bigger
Num Predicted MIDS: 22
1False
Nan and None: ella fitzgerald False
WHOOPS

aliases: {'in london', 'london'}
question: which cooking show has an episode in london
predicted: london
correct: london

aliases: {"the best of daniel o'donnel on film", 'the best of x', 'best of', 'the best of…', 'the best off', "daniel o'donnel", 'the best of', 'daniel'}
question: What kind of music is the film the best of daniel odonnell on film about?
predicted: best of daniel odonnel on film
cor

aliases: {'counti of the unit state', 'unit state'}
question: what is the name of a city found in pasco county in the united states
predicted: unit state
correct: united states

aliases: {'the perfect crime # 2', 'a perfect crime', 'the perfect crime', 'perfect crime'}
question: what type of film is the perfect crime
predicted: perfect crime
correct: the perfect crime

aliases: {'film product', 'film produc'}
question: Name a film producer.
predicted: film produc
correct: film producer

Question Distance: 0.26666666666666666
Question: what tract is released by derek sherinian
Subject MID: 03yxb3
Subject Alises: [('derek sherinian', 'derek sherinian'), ('derek sherinan', 'derek sherinan')]
Subject Name: derek sherinian
Predicted Confidence: [0.9882285547064672, 0.9954248459597338, 0.9865656226034942, 0.7914722163308474, 0.9862921030196323, 0.5756480383180821, 0.8842639270562713]
Predicted: tract
strategy: same or bigger
Num Predicted MIDS: 1
1False
Nan and None: tract False
WHOOPS

Ques

aliases: {'ak soundtrack', 'soundtrack album', 'π soundtrack', 'soundtrack', '3 soundtrack', 'x soundtrack'}
question: whats an example of a soundtrack album
predicted: soundtrack
correct: soundtrack

Question Distance: 0.2
Question: What's a soundtrack written by anthony marinelli
Subject MID: 0l14gg
Subject Alises: [('soundtrack', 'soundtrack')]
Subject Name: soundtrack
Predicted Confidence: [0.9788438190603553, 0.9718340869309384, 0.9614877589425324, 0.5605627945112349, 0.9677317537600492, 0.9920197287715544, 0.9611139107000497, 0.7176164506148925]
Predicted: anthoni marinelli
strategy: same or bigger
Num Predicted MIDS: 2
1False
Nan and None: anthoni marinelli False
WHOOPS

aliases: {'women badminton , singl', 'men badminton , singl'}
question: what olympic games was mens badminton, singles apart of
predicted: men badminton , singl
correct: men's badminton, singles

aliases: {'an artist against the third reich', 'against the third reich'}
question: what's one of the topics discusse

aliases: {'cross', 'the cross'}
question: what is the language of the crossing?
predicted: cross
correct: the crossing

aliases: {'the jungl book 2', 'jungl book', 'the jungl book'}
question: what's a song on the album the jungle book
predicted: jungl book
correct: the jungle book

aliases: {'bollywood', 'bollywood movi'}
question: whats the title of a bollywood movie
predicted: bollywood
correct: bollywood

Question Distance: 1.0
Question: What is a compilation album from 2006 
Subject MID: 02lx2r
Subject Alises: [('album', 'album')]
Subject Name: album
Predicted Confidence: [0.9883143061123084, 0.9769069337493334, 0.9952884176124034, 0.927202256111374, 0.974571267809309, 0.9985052701545284, 0.9171188075818063]
Predicted: compil album
strategy: same or bigger
Num Predicted MIDS: 1
1True
Nan and None: compil album False
WHOOPS

aliases: {'of mice and men .', 'of mice and men'}
question: what is the soundtrack for of mice and men called 
predicted: of mice and men
correct: of mice and m

Question: cd-i games from the legend of zelda series is an example of what kind of video game?
Subject MID: 0h4867
Subject Alises: []
Subject Name: nan
Predicted Confidence: [0.9183126051434116, 0.7164937606508363, 0.5388128611414474, 0.6572165784950602, 0.7559593701818585, 0.7101687613508804, 0.8870513509406125, 0.9448099513925197, 0.9871743551065334, 0.933012604292428, 0.9816903384659761, 0.9146057360661087, 0.963425853794202, 0.9436803470765895, 0.9052281086892384, 0.9552990873727873, 0.8895728610350796, 0.9975579784577662]
Predicted: cd-i game from the legend of zelda seri
strategy: same or bigger
Num Predicted MIDS: 96
Nan and None: cd-i game from the legend of zelda seri False
WHOOPS

Question: what invasions happened in macedon
Subject MID: 0bk25
Subject Alises: [('macedonia', 'macedonia')]
Subject Name: nan
Predicted Confidence: [0.9712665448314589, 0.46804831093392013, 0.8358254279445256, 0.9867860226724041, 0.996580194847833]
Predicted: macedon
strategy: same or bigger
Num Pr

aliases: {'film product', 'film produc'}
question: What is the name of a film producer
predicted: film produc
correct: film producer

aliases: {'the skin', 'in the skin', 'skin'}
question: what genre is the skin
predicted: skin
correct: the skin

Question Distance: 0.25
Question: What is the name of the comedy film created by todd solondz? 
Subject MID: 03mwg5
Subject Alises: [('todd solondz', 'todd solondz')]
Subject Name: todd solondz
Predicted Confidence: [0.9808949122414734, 0.984259662044229, 0.8693206475487723, 0.9586650800902166, 0.9634261982349137, 0.9406447542679278, 0.8775294785322932, 0.8002164539829397, 0.9733631310329981, 0.980196406642845, 0.7893173233364942, 0.8361966306146127, 0.9975934658890468]
Predicted: comedi film
strategy: same or bigger
Num Predicted MIDS: 1
1False
Nan and None: comedi film False
WHOOPS

aliases: {'a soul on ice', 'soul on ice'}
question: Which type of music is the album soul on ice
predicted: soul on ice
correct: soul on ice

aliases: {'the verv

aliases: {'geographi of the hapless heart', 'geographi of the hapless heart 2'}
question: Which language is the film geography of the hapless heart in
predicted: geographi of the hapless heart
correct: geography of the hapless heart

aliases: {'go to extrem', 'i go to extrem'}
question: what is an edition of the book going to extremes
predicted: go to extrem
correct: going to extremes

aliases: {'centuri', 'centuri media record', 'centuri media x', 'media', 'record', 'centuri media'}
question: what artist is on the lable century media records?
predicted: labl centuri media record
correct: century media records

aliases: {'phillip lord', 'the shit', 'phillipa lord : she the shit'}
question: which country released phillipa lord: shes the shit
predicted: phillipa lord : shes the shit
correct: nan

Question: What is the name of the track list for the release pillows & prayers: cherry red 1982-1983?
Subject MID: 0f3hh9h
Subject Alises: [('pillows & prayers: cherry red 1982–1983', 'pillow & 

question: what is a comedy film made in india
predicted: comedi
correct: comedy film

aliases: {'a poet', 'poet'}
question: Name a poet. 
predicted: poet
correct: poet

aliases: {'munro', 'peak'}
question: This mountain is the most easterly of the Munro peaks.
predicted: munro peak
correct: munro

aliases: {'world war ii', 'world war iii'}
question: which book was part of series of world war ii
predicted: world war ii
correct: world war ii

aliases: {'the sun', 'sun'}
question: What minor planet orbits the sun?
predicted: sun
correct: the sun

Question: What is william e. mcanulty, jr.'s gender
Subject MID: 02z0b5x
Subject Alises: [('william e. mcanulty jr.', 'william e. mcanulti jr .'), ('william mcanulty, jr.', 'william mcanulti , jr .')]
Subject Name: nan
Predicted Confidence: [0.9878281152620981, 0.9975507030027131, 0.9913222463773773, 0.9037878177197489, 0.9883268115319656, 0.9797929458197583, 0.8857749130933588, 0.9796885224663973, 0.9868548734936102]
Predicted: william e. mcanul

predicted: drum
correct: drums

aliases: {'for', 'pilot', 'the pilot', 'x-file'}
question: who is the director of the pilot for x-files
predicted: pilot for x-file
correct: pilot

Question: What was theatrical production of MacBeth staged in?
Subject MID: 04rltj
Subject Alises: [('donmar', 'donmar'), ('donmar warehouse, london borough of camden', 'donmar warehous , london borough of camden'), ('donmar warehouse', 'donmar warehous')]
Subject Name: nan
Predicted Confidence: [0.9741678164075473, 0.9511635006565959, 0.5028298744512248, 0.6412701951006626, 0.9775779615984439, 0.999540635633881, 0.9916178465130208, 0.8995258875138123, 0.9971821745624353]
Predicted: macbeth
strategy: same or bigger
Num Predicted MIDS: 39
Nan and None: macbeth False
WHOOPS

Question: who developed the game vay
Subject MID: 0kyvhwk
Subject Alises: []
Subject Name: nan
Predicted Confidence: [0.9827348259358479, 0.9866142560549824, 0.9292675746147058, 0.9900197990398012, 0.999584238594191]
Predicted: vay
strategy

aliases: {'love troubl', 'i love troubl'}
question: What kind of film is lovely troubles
predicted: love troubl
correct: lovely troubles

Question: In which city did catherine uhlmyer die in
Subject MID: 04zfl8
Subject Alises: []
Subject Name: nan
Predicted Confidence: [0.9813943955010315, 0.9776932224835323, 0.9306427673952965, 0.9975605744246868, 0.9989046945995667, 0.9972707793140789, 0.9989084086112304, 0.9939760470513604]
Predicted: catherin uhlmyer
strategy: same or bigger
Num Predicted MIDS: 15
Nan and None: catherin uhlmyer False
WHOOPS

aliases: {'opera', 'music'}
question: what artist sings opera music
predicted: opera music
correct: opera

aliases: {'guitar hero iii', 'guitar hero ii'}
question: what's one of the modes in guitar hero ii
predicted: guitar hero ii
correct: guitar hero ii

aliases: {'star war : episod i - battl for naboo', 'star war : episod ix', 'star war : episod ii', 'star war : episod i', 'star war : episod iii', 'episod', 'star war', 'star war episod i : b

Predicted: self-titl
strategy: same or bigger
Num Predicted MIDS: 0
Nan and None: self-titl False
WHOOPS

aliases: {'ghostbust 3', 'ghostbust 2', 'ghostbust'}
question: who published the game ghostbusters
predicted: ghostbust
correct: ghostbusters

aliases: {'radio', 'microbeworld radio'}
question: microbeworld radio mostly talks about what subject?
predicted: microbeworld radio most talk
correct: microbeworld radio

aliases: {'susan davi', 'susan david'}
question: which committee is susan davis a part of
predicted: susan davi
correct: susan davis

aliases: {'the coil', 'coil'}
question: what is the coil about?
predicted: coil
correct: the coil

Empty Predicted
Question: which country contains 
Subject MID: 0nltq
Subject Alises: [('cascade county', 'cascad counti')]
Subject Name: nan
Predicted Confidence: [0.9823410585982777, 0.7829478134056063, 0.9986050517843741]
Predicted: 
strategy: same or bigger
Num Predicted MIDS: 0
Nan and None:  True
WHOOPS

Question: what color is the breed b

Subject Alises: [('single-player video game', 'single-play video game')]
Subject Name: nan
Predicted Confidence: [0.9859839311595114, 0.9848014974782219, 0.9277726563454509, 0.9663450429808006, 0.9494434798294529, 0.9894189634001926, 0.9065624620030495, 0.9544908074946852, 0.9713827236176451]
Predicted: single-play horror
strategy: same or bigger
Num Predicted MIDS: 6
Nan and None: single-play horror False
WHOOPS

aliases: {'the hit album 6', '6'}
question: what song was included in the hits album 6
predicted: 6
correct: the hits album 6

Question: which bay area city was david f. dodge born in 
Subject MID: 0gzd7p
Subject Alises: [('david dodge', 'david dodg'), ('david francis dodge', 'david franci dodg')]
Subject Name: nan
Predicted Confidence: [0.981135961975933, 0.6474344233523105, 0.8494159907319272, 0.9195271911117222, 0.9963740326210572, 0.9918579192554214, 0.9391656153569313, 0.9914118961899847, 0.9986943628547429, 0.9955880815889689]
Predicted: david f. dodg
strategy: same or 

aliases: {'birmingham', 'birmingham 6'}
question: Name the release track on the musical recording birmingham.
predicted: birmingham
correct: birmingham

aliases: {'the miracl', 'the daisi', 'daisi fay and the miracl man', 'miracl man', 'the miracl man', 'miracl', 'the miracul'}
question: in what format is the daisy fay and the miracle man book released
predicted: daisi fay and the miracl man book
correct: daisy fay and the miracle man

aliases: {'cvg', 'sport', 'game', 'sport game'}
question: what is a type of cvg sports game
predicted: cvg sport game
correct: sports game

aliases: {'newspap circul', 'newspap', 'guardian', 'the brampton guardian', 'brampton'}
question: In what area does the the brampton guardian newspaper circulate
predicted: brampton guardian newspap
correct: the brampton guardian

aliases: {'a song', 'song'}
question: what is a song?
predicted: song
correct: song

aliases: {'marathon', 'ny marathon'}
question: What is the name of an album by marathon?
predicted: mara

aliases: {'the drum', 'drum'}
question: Which instrumentalists play the drum kit?
predicted: drum
correct: drums

Question Distance: 0.3333333333333333
Question: which world war ii events took place in the atlantic sea?
Subject MID: 081pw
Subject Alises: [('ww2', 'ww2'), ('world war ii', 'world war ii'), ('wwii', 'wwii'), ('the second world war', 'the second world war'), ('world war, 1939-1945', 'world war , 1939-1945'), ('world war 2', 'world war 2')]
Subject Name: world war ii
Predicted Confidence: [0.9734107617621556, 0.5728144390556555, 0.6516233382080969, 0.5731186648025145, 0.8636488784227341, 0.7036355218395915, 0.8858753213337147, 0.8883287107391591, 0.9541713772061456, 0.9327088471448689, 0.9379446047787146, 0.9986149178689105]
Predicted: atlant sea
strategy: same or bigger
Num Predicted MIDS: 14
1False
Nan and None: atlant sea False
WHOOPS

Question: where are thoroughbred racehorses originally from
Subject MID: 09c7w0
Subject Alises: [('u.s. of a.', 'u.s. of a .'), ('united 

predicted: oil
correct: once upon a time there was oil

aliases: {'bubbl bobbl', 'bubbl bobbl 2'}
question: who published bubble bobble
predicted: bubbl bobbl
correct: bubble bobble

Question Distance: 0.16666666666666666
Question: which season of andrew zimmerman's show did he visit uganda in
Subject MID: 07j_kcb
Subject Alises: [('uganda', 'uganda')]
Subject Name: uganda
Predicted Confidence: [0.9639344791256632, 0.5534555980142913, 0.9614565118551104, 0.8499606862587955, 0.9204721931660667, 0.9728004852130685, 0.606252368975623, 0.9154436896982073, 0.9038983649870429, 0.6969694922292825, 0.7719344262652817, 0.9935621567031023]
Predicted: andrew zimmerman
strategy: same or bigger
Num Predicted MIDS: 1
1False
Nan and None: andrew zimmerman False
WHOOPS

Question: what kind of martial art does 東口 翔太 practice 
Subject MID: 0l8h7t_
Subject Alises: [('とぐち しょうた', 'とぐち しょうた'), ('ikioi shota', 'ikioi shota'), ('ikioi shōta', 'ikioi shōta'), ('いきおい しょうた', 'いきおい しょうた'), ('勢 翔太', '勢 翔太'), ('tog

aliases: {'unit state', 'nativ american in the unit state'}
question: which singer has native americans in the united states ethnicity 
predicted: unit state
correct: native americans in the united states

aliases: {'releas', 'mami blue', 'mami', 'blue'}
question: What is the title of a track on the mamy blue release?
predicted: mami blue releas
correct: mamy blue

aliases: {'scream for help !', 'scream for help'}
question: what is the genre of the film scream for help
predicted: scream for help
correct: scream for help

aliases: {'australia', 'ip australia', 'v australia'}
question: What's a wetland site located in australia
predicted: australia
correct: australia

aliases: {'o holi night', 'o holi night !'}
question: which 1999 release contains the track o holy night?
predicted: o holi night
correct: o holy night

aliases: {'the sun', 'sun'}
question: what mars crossing minor planet orbits the sun
predicted: sun
correct: the sun

aliases: {'counti', 'peoria counti', 'illinoi', 'peori

Question: What is an album written by Johannes Brahms
Subject MID: 02lx2r
Subject Alises: [('album', 'album')]
Subject Name: album
Predicted Confidence: [0.9827525191201477, 0.9729936122108866, 0.9599890040495004, 0.6249842722290203, 0.9721665396807982, 0.9919346292168799, 0.9472444610351562, 0.6792040763257142]
Predicted: johann brahm
strategy: same or bigger
Num Predicted MIDS: 2
1False
Nan and None: johann brahm False
WHOOPS

aliases: {'gösta ekman', 'gösta ekmann'}
question: what is the profession of gösta ekman
predicted: gösta ekman
correct: gösta ekman

Question Distance: 0.3333333333333333
Question: Which scottish cuisine cheese is a sheep milk cheese?
Subject MID: 0ftx68
Subject Alises: [('scottish cuisine', 'scottish cuisin')]
Subject Name: scottish cuisine
Predicted Confidence: [0.9686611844475028, 0.6445449084939452, 0.5386338607324774, 0.5010029892953703, 0.8154105675298321, 0.9796461270775557, 0.921681144449926, 0.731291019742046, 0.8826153815096708, 0.9973094529850746]
P

predicted: music film
correct: musical

aliases: {'four minut mile', 'the four minut mile'}
question: who is the subject of the film the four minute mile?
predicted: four minut mile
correct: the four minute mile

Question: what is a song by
Subject MID: 016hrs
Subject Alises: [('the meters', 'the meter'), ('the original meters', 'the origin meter'), ('meters', 'meter'), ('meters, the', 'meter , the'), ('funky meters', 'funki meter')]
Subject Name: nan
Predicted Confidence: [0.9737142171980465, 0.9792706843187747, 0.989081514972156, 0.992719026306948, 0.9991273531283529]
Predicted: song
strategy: same or bigger
Num Predicted MIDS: 21
Nan and None: song False
WHOOPS

aliases: {'hypertens 2', 'hypertens'}
question: Name a symptom from hypertension.
predicted: hypertens
correct: hypertension

Question: What is the release type of the album sauf le respect que je vous dois...
Subject MID: 0fw1vpy
Subject Alises: [('ses plus belles chansons, volume 1 : sauf le respect que je vous dois...', '

question: which weight division does Éder jofre box in
predicted: éder jofr box
correct: éder jofre

aliases: {'vladimir vysotski', 'vladimir vysotskiy', 'vladimir vysotskij'}
question: which city did vladimir vysotsky die
predicted: vladimir vysotski
correct: vladimir vysotsky

aliases: {'the sun', 'sun'}
question: What orbits the sun
predicted: sun
correct: the sun

aliases: {'the season', 'season', 'the 4 season'}
question: Which region was the season released in
predicted: season
correct: the season

aliases: {'the disciplin', 'meteorolog', 'disciplin'}
question: what do people in the discipline meteorology study?
predicted: disciplin meteorolog
correct: meteorology

aliases: {'justin simmond', 'justin simmon'}
question: Who is justin simmons?
predicted: justin simmon
correct: justin simmons

Question: What type of animal is a manx
Subject MID: 012jg5
Subject Alises: [('stubbin', 'stubbin'), ('manx cat', 'manx cat'), ('kayt manninagh', 'kayt manninagh'), ('cabbit', 'cabbit')]
Subje

Nan and None: effingham False
WHOOPS

aliases: {'the lemon song', 'the lemon', 'lemon'}
question: which album includes the lemon song
predicted: lemon
correct: lemon

Question: where was jacques erwin born
Subject MID: 0z2l7hh
Subject Alises: []
Subject Name: nan
Predicted Confidence: [0.9824530716071755, 0.9984369891990411, 0.9995315047076719, 0.9995372572724239, 0.9997192847155629]
Predicted: jacqu erwin
strategy: same or bigger
Num Predicted MIDS: 12
Nan and None: jacqu erwin False
WHOOPS

aliases: {'rev', 'the rev'}
question: what does the rev play as an instrument 
predicted: rev
correct: the rev

Question: Which release track is also a recording?
Subject MID: 0m94hxk
Subject Alises: [('monster', 'monster')]
Subject Name: nan
Predicted Confidence: [0.9897321392809584, 0.6467159494430477, 0.9750946720706293, 0.9097006559198959, 0.9801153835540479, 0.9871174015050451, 0.9770831675183768, 0.9989999930462812]
Predicted: record
strategy: same or bigger
Num Predicted MIDS: 10
Nan and No

question: what is a song from the there was an elk album 
predicted: elk
correct: there was an elk

aliases: {'salvador dalí', 'salvador dali'}
question: Where was salvador dali published?
predicted: salvador dali
correct: salvador dali

aliases: {'cruella de vil', 'cruella de vill'}
question: on what recording can you fing the release track cruella de vil
predicted: cruella de vil
correct: cruella de vil

aliases: {'hous associ', 'the hous', 'hous'}
question: Which genre is the film the house associated with
predicted: hous
correct: the house

aliases: {'cadillac', 'come'}
question: what is the recording from which the self titled release track cadillac comes
predicted: cadillac come
correct: cadillac

aliases: {'the greatest fit', 'greatest fit'}
question: what kind of music is on the greatest fits
predicted: greatest fit
correct: the greatest fits

aliases: {'majorett', 'the majorett'}
question: what type of netflix movie is the majorettes?
predicted: majorett
correct: the majorett

predicted: stravaganzza is best
correct: stravaganzza

aliases: {'a b', 'b movi', 'b'}
question: what is an example of a b movie?
predicted: b
correct: b movie

aliases: {'suicid ?', 'suicid'}
question: Which artist recorded suicide?
predicted: suicid
correct: suicide

aliases: {'the internet', 'internet'}
question: What is a book written about the internet?
predicted: internet
correct: the internet

aliases: {'leonard ford', 'brion leonard ford', 'ford', 'leonard', 'brion', 'brian leonard', 'leonard lord'}
question: This man is the father of brion leonard ford.
predicted: brion leonard ford .
correct: brion leonard ford

aliases: {'rabid dog', 'rabid', 'dog', 'best'}
question: What genre does rabid dogs best fall under?
predicted: rabid dog best
correct: rabid dogs

aliases: {'jemma donovan', 'emma donovan'}
question: Which genre of music does emma donovan create
predicted: emma donovan
correct: emma donovan

aliases: {'just danc', 'just danc 3', 'just danc 2'}
question: what type of 

strategy: same or bigger
Num Predicted MIDS: 19
Nan and None: you cant take it with you False
WHOOPS

aliases: {'the paraguayan war', 'paraguayan war'}
question: What was a significant battle in the paraguayan war
predicted: paraguayan war
correct: the paraguayan war

aliases: {'eden', 'our time in eden'}
question: who is our time in eden by
predicted: eden
correct: our time in eden

aliases: {'assassin', 'wax assassin'}
question: Whose cause of death was assassination?
predicted: assassin
correct: assassination

aliases: {'barefoot', 'the barefoot artist'}
question: which film created the barefoot artist 
predicted: barefoot
correct: the barefoot artist

Question Distance: 0.3333333333333333
Question: What's a game that konami published for the wii
Subject MID: 026kds
Subject Alises: [('nintendo wii', 'nintendo wii'), ('wii', 'wii'), ('nintendo revolution', 'nintendo revolut')]
Subject Name: wii
Predicted Confidence: [0.9708569887080667, 0.963466392342533, 0.9156977653306559, 0.538436

aliases: {'ak soundtrack', 'π soundtrack', 'soundtrack', '3 soundtrack', 'x soundtrack'}
question: What is an example of a soundtrack?
predicted: soundtrack
correct: soundtrack

aliases: {'the bride with white hair', 'the bride with white hair 2'}
question: who wrote the bride with white hair
predicted: bride with white hair
correct: the bride with white hair

aliases: {'the isley brother', 'isley brother'}
question: What is an album by the isley brothers?
predicted: isley brother
correct: the isley brothers

aliases: {'rumor has it…', 'rumor has it'}
question: who was the artist that recorded the album rumor has it
predicted: rumor has it
correct: rumor has it

aliases: {'novel', 'the saint', 'saint'}
question: What type of book is the saint novel?
predicted: saint novel
correct: saint

aliases: {'the heavi', 'heavi'}
question: which country was the the heavy album released
predicted: heavi
correct: the heavy

aliases: {'sonic advanc 3', 'sonic advanc', 'sonic advanc 2'}
question: WHo

predicted: ata airlin
correct: ata airlines

Question: Which country is azitamusic from
Subject MID: 01qnrgs
Subject Alises: [('azita', 'azita')]
Subject Name: nan
Predicted Confidence: [0.9911618560177333, 0.9334372551733714, 0.9959260062762917, 0.9999309345000973, 0.9995574855109489]
Predicted: azitamus
strategy: same or bigger
Num Predicted MIDS: 0
Nan and None: azitamus False
WHOOPS

aliases: {'meet the parent', 'parent'}
question: what was the sequel to meet the parents
predicted: parent
correct: meet the parents

Question: What artist recorded europa?
Subject MID: 01bz6g5
Subject Alises: []
Subject Name: nan
Predicted Confidence: [0.9883674122241188, 0.9467512235845484, 0.9978637422348596, 0.9999158988859002, 0.9989786958041372]
Predicted: europa
strategy: same or bigger
Num Predicted MIDS: 494
Nan and None: europa False
WHOOPS

aliases: {'the evil touch', 'the devil touch'}
question: What is the country of origin of the tv program the evil touch?
predicted: evil touch
correct: t

Subject Alises: [('student', 'student')]
Subject Name: student
Predicted Confidence: [0.982376377298587, 0.9710728690435156, 0.8914298160302001, 0.9062912649680028, 0.9367646902990456, 0.9513398977322155, 0.7002526291175523, 0.994954202870732, 0.9621533098377155]
Predicted: hollyoak
strategy: same or bigger
Num Predicted MIDS: 1
1False
Nan and None: hollyoak False
WHOOPS

aliases: {"do n't bother me", "it do n't bother me"}
question: which language is sung in don't bother me
predicted: do n't bother me
correct: don't bother me

aliases: {'employe of the month ep', 'employe of the month'}
question: what was the soundtrack for employee of the month (2006 film)
predicted: employe of the month
correct: employee of the month

Question Distance: 0.4117647058823529
Question: what's one of the brothers grimm's famous fairy tales
Subject MID: 0bjsc
Subject Alises: [('gebrüder grimm', 'gebrüder grimm'), ('brothers grimm', 'brother grimm'), ('grimm brothers', 'grimm brother'), ('brüder grimm', 'b

Subject Name: film festival
Predicted Confidence: [0.9895217269558957, 0.8127382462664426, 0.8708144481732705, 0.9970027314991079, 0.9989115512994742, 0.9985251212568744]
Predicted: recur
strategy: same or bigger
Num Predicted MIDS: 1
1False
Nan and None: recur False
WHOOPS

aliases: {'andi russel', 'randi russel'}
question: What label is andy russell under
predicted: andi russel
correct: andy russell

aliases: {'ford lightn', 'lightn'}
question: Which license is required for lightning?
predicted: lightn
correct: lightning

aliases: {'spider-man !', 'spider-man 4', 'spider-man', 'spider-man 2', 'spider-man 3'}
question: in what north american country was spider-man (film) filmed
predicted: spider-man
correct: spider-man

Question: in what country was what play did  born
Subject MID: 0glp9nl
Subject Alises: [('kenmare sorceress', 'kenmar sorceress')]
Subject Name: nan
Predicted Confidence: [0.9914602003696639, 0.8550034093927836, 0.8311932998464698, 0.9896339323080461, 0.812583146506613

Unnamed: 0,subject,relation,object,question,subject_name,subject_name_start_index,subject_name_stop_index,predicted_subject_name,predicted_question,tag_confidence,predicted_subject_name_start_index,predicted_subject_name_stop_index,predicted_mids
0,0f3xg_,symbols/namesake/named_after,0cqt90,Who was the trump ocean club international hot...,trump ocean club international hotel and tower,3.0,10.0,trump ocean club international hotel and tower,Who was the #head_entity# named after,"[0.9293023252209918, 0.8612211722891993, 0.967...",3,10,[0f3xg_]
1,07f3jg,people/person/place_of_birth,0565d,where was sasha vujačić born,sasha vujačić,2.0,4.0,sasha vujačić,where was #head_entity# born,"[0.9826483932316815, 0.9986743224610085, 0.999...",2,4,[07f3jg]
2,031j8nn,music/release/region,07ssc,What is a region that dead combo was released in,dead combo,5.0,7.0,dead combo,What is a region that #head_entity# was releas...,"[0.9650627174054079, 0.9847821153288865, 0.603...",5,7,"[0gql46, 031j8nn]"
3,0c1cyhd,film/director/film,0wxsz5y,What is a film directed by wiebke von carolsfeld?,wiebke von carolsfeld,6.0,9.0,wiebke von carolsfeld,What is a film directed by #head_entity# ?,"[0.9833911283844019, 0.9653590589910868, 0.958...",6,9,[0c1cyhd]
4,0fvhc0g,music/release/region,0345h,what country was music for stock exchange rel...,music for stock exchange,3.0,7.0,music for stock exchange,what country was #head_entity# released in,"[0.963210482176025, 0.8503666558303614, 0.9947...",3,7,[0fvhc0g]


In [197]:
from tqdm import tqdm_notebook
from numpy import nan
import pandas as pd
from collections import defaultdict

correct_object = 0
correct_relation = 0
correct_subject = 0
predicted_facts_column = []
for index, row in tqdm_notebook(df_dev.iterrows(), total=df_dev.shape[0]):
    cursor.execute('SELECT object_mid, relation, subject_mid FROM fb_two_kg WHERE subject_mid = ANY(%s)', (list(row['predicted_mids']), ))
    rows = cursor.fetchall()
    predicted_facts = defaultdict(lambda: defaultdict(set))
    
    for object_mid, relation, subject_mid in rows:
        predicted_facts[relation][subject_mid].add(object_mid)        
        
    predicted_facts_column.append(predicted_facts)
    
    predicted_object_mids = set([fact[0] for fact in rows])
    if row['object'] in predicted_object_mids:
        correct_object += 1    
        
    predicted_relations = set([fact[1] for fact in rows])
    if row['relation'] in predicted_relations:
        correct_relation += 1
        
    predicted_subject_mids = set([fact[2] for fact in rows])
    if row['subject'] in predicted_subject_mids:
        correct_subject += 1

df_dev['predicted_facts'] = pd.Series(predicted_facts_column, index=df_dev.index)
print('End-to-end Object Canditate Accuracy So Far:', correct_object / df_dev.shape[0])
print('End-to-end Relation Canditate Accuracy So Far:', correct_relation / df_dev.shape[0])
print('End-to-end Subject Canditate Accuracy So Far:', correct_subject / df_dev.shape[0])
df_dev[:5]


End-to-end Object Canditate Accuracy So Far: 0.9746426924850161
End-to-end Relation Canditate Accuracy So Far: 0.9763024435223605
End-to-end Subject Canditate Accuracy So Far: 0.9656984785615491


Unnamed: 0,subject,relation,object,question,subject_name,subject_name_start_index,subject_name_stop_index,predicted_subject_name,predicted_question,tag_confidence,predicted_subject_name_start_index,predicted_subject_name_stop_index,predicted_mids,predicted_facts
0,0f3xg_,symbols/namesake/named_after,0cqt90,Who was the trump ocean club international hot...,trump ocean club international hotel and tower,3.0,10.0,trump ocean club international hotel and tower,Who was the #head_entity# named after,"[0.9293023252209918, 0.8612211722891993, 0.967...",3,10,[0f3xg_],{'location/location/containedby': {'0f3xg_': {...
1,07f3jg,people/person/place_of_birth,0565d,where was sasha vujačić born,sasha vujačić,2.0,4.0,sasha vujačić,where was #head_entity# born,"[0.9826483932316815, 0.9986743224610085, 0.999...",2,4,[07f3jg],{'sports/professional_sports_team/draft_picks'...
2,031j8nn,music/release/region,07ssc,What is a region that dead combo was released in,dead combo,5.0,7.0,dead combo,What is a region that #head_entity# was releas...,"[0.9650627174054079, 0.9847821153288865, 0.603...",5,7,"[0gql46, 031j8nn]",{'music/release/region': {'031j8nn': {'07ssc'}...
3,0c1cyhd,film/director/film,0wxsz5y,What is a film directed by wiebke von carolsfeld?,wiebke von carolsfeld,6.0,9.0,wiebke von carolsfeld,What is a film directed by #head_entity# ?,"[0.9833911283844019, 0.9653590589910868, 0.958...",6,9,[0c1cyhd],{'people/person/gender': {'0c1cyhd': {'02zsn'}...
4,0fvhc0g,music/release/region,0345h,what country was music for stock exchange rel...,music for stock exchange,3.0,7.0,music for stock exchange,what country was #head_entity# released in,"[0.963210482176025, 0.8503666558303614, 0.9947...",3,7,[0fvhc0g],{'music/release/track_list': {'0fvhc0g': {'0mr...


# Relation Model Yu et Al.

In [200]:
import re
import torch
from torch.autograd import Variable

RELATION_CLASSIFIER = '../../logs/2785.01-01_17:32:06.yu_relation_model/01m_01d_17h_54m_53s.pt'

import pprint

from nltk.tokenize.treebank import TreebankWordTokenizer

from lib.checkpoint import Checkpoint

pretty_printer = pprint.PrettyPrinter(indent=2)
tokenizer = TreebankWordTokenizer() # Same tokenizer used during training

relation_classifier = Checkpoint(checkpoint_path=RELATION_CLASSIFIER, device=0)
relation_classifier.model.relation_word_rnn.flatten_parameters()
relation_classifier.model.relation_rnn.flatten_parameters()
relation_classifier.model.text_rnn.flatten_parameters()
relation_classifier.model.train(mode=False)

cuda = lambda v: v.cuda() if torch.cuda.is_available() else t
to_variable = lambda e: cuda(Variable(torch.LongTensor(e).unsqueeze(1).contiguous()))
    
def get_relation_score(question, relation):
    question = question.lower()
    question_encoded = relation_classifier.text_encoder.encode(question)
    relation_encoded = relation_classifier.relation_encoder.encode(relation)
    relation_word_encoded = relation_classifier.relation_word_encoder.encode(relation)

    question_encoded = to_variable(question_encoded)
    relation_encoded = to_variable(relation_encoded)
    relation_word_encoded = to_variable(relation_word_encoded)

    return relation_classifier.model(question_encoded, relation_encoded, relation_word_encoded).data[0]

# To test this cell
print(get_relation_score('where was #head_entity# born ?', '/people/person/place_of_birth'))
print(get_relation_score('where was #head_entity# born ?', '/location/location/people_born_here'))

0.7156175971031189
0.6886991858482361


# Baseline Relation Model

In [201]:
from functools import lru_cache
from lib.checkpoint import Checkpoint


import math

BASELINE_RELATION_CLASSIFIER = '../../results/0756.12-22_15:25:19.relation_classifier/12m_22d_15h_37m_20s.pt'

baseline_relation_classifer = Checkpoint(checkpoint_path=BASELINE_RELATION_CLASSIFIER, device=0)
baseline_relation_classifer.model.train(mode=False)

@lru_cache(maxsize=4000)
def model(text, text_length):
    return baseline_relation_classifer.model(text, text_length)[0]

def get_baseline_relation_score(question, relation):
    relation = 'www.freebase.com' + relation
    question = question.lower()
    encoded = baseline_relation_classifer.input_text_encoder.encode(question)
    encoded = to_variable(encoded)
    encoded_length = torch.LongTensor([encoded.size()[0]])
    output_batch = model(encoded, encoded_length)
    output_batch = output_batch.squeeze(0)
    relation_index = baseline_relation_classifer.output_text_encoder.encode(relation)[0]
    return math.exp(output_batch[relation_index].data[0])
    
print(get_baseline_relation_score('Where was Obama born?', '/people/person/place_of_birth'))
print(get_baseline_relation_score('Where was Obama born?', '/location/location/people_born_here'))



0.8835899947877679
0.11019587285726916


  scores = F.log_softmax(output)


# Final End-To-End Metric

In [202]:
from tqdm import tqdm_notebook
from numpy import nan
import pandas as pd
from collections import defaultdict
import random

sr_correct = 0
o_correct = 0
relation_correct = 0
subject_mid_correct = 0
total = 0
for index, row in tqdm_notebook(df_dev.iterrows(), total=df_dev.shape[0]):
    max_relation_score = float('-inf')
    max_relation = None

    for relation in row['predicted_facts']:
        # NOTE: row['question'] for baseline
        # row['predicted_question'] for yu et al.
        relation_score = get_baseline_relation_score(row['question'], '/' + relation)

        if relation_score > max_relation_score:
            max_relation_score = relation_score
            max_relation = relation

    if max_relation is not None:
        # The dataset was assembled favoring grouped facts with more facts
        # 2 - 3% point increase in accuracy
        # subject_mid, object_mids = random.sample(row['predicted_facts'][max_relation].items(), 1)[0]
        subject_mid, object_mids = sorted(row['predicted_facts'][max_relation].items(),
                                          key=lambda i: len(i[1]), reverse=True)[0]

        if max_relation == row['relation'] and subject_mid == row['subject']:
            sr_correct += 1
        if row['object'] in object_mids:
            o_correct += 1
        if max_relation == row['relation']:
            relation_correct += 1
        if subject_mid == row['subject']:
            subject_mid_correct += 1

    total += 1

# Yu Et Al. Relation Model
# Subject & Relation Accuracy: 0.745781 [8088 of 10845]
# Object Accuracy: 0.800277 [8679 of 10845]
# Relation Accuracy: 0.812356 [8810 of 10845]
# Subject MID Accuracy: 0.808114 [8764 of 10845]

# Baseline Relation Model
# Subject & Relation Accuracy: 0.776763 [8424 of 10845]
# Object Accuracy: 0.833195 [9036 of 10845]
# Relation Accuracy: 0.873490 [9473 of 10845]
# Subject MID Accuracy: 0.834025 [9045 of 10845]

print('Subject & Relation Accuracy: %f [%d of %d]' % (sr_correct / total, sr_correct, total))
print('Object Accuracy: %f [%d of %d]' % (o_correct / total, o_correct, total))
print('Relation Accuracy: %f [%d of %d]' % (relation_correct / total, relation_correct, total))
print('Subject MID Accuracy: %f [%d of %d]' % (subject_mid_correct / total, subject_mid_correct, total))

  scores = F.log_softmax(output)



Subject & Relation Accuracy: 0.773075 [8384 of 10845]
Object Accuracy: 0.831259 [9015 of 10845]
Relation Accuracy: 0.877086 [9512 of 10845]
Subject MID Accuracy: 0.828861 [8989 of 10845]
