# Upperbound to Relation Accuracy

In this notebook, we attempt to compute the upperbound for the relation accuracy. Our models are unable to achieve more than 88.4% given entity and question features; therefore, we investigate if there exists an upperbound.

In [1]:
import sys
sys.path.insert(0, '../../')
import pandas as pd
import random
from tqdm import tqdm_notebook
tqdm_notebook().pandas()
from scripts.utils.data import FB2M_NAME_TABLE
from scripts.utils.data import FB2M_KG_TABLE
from scripts.utils.connect import get_connection 
import scripts.utils.import_notebook
import importlib

connection = get_connection()
cursor = connection.cursor()




In [2]:
from scripts.utils.simple_qa import load_simple_qa 

# Load development set because its a magnitude smaller than the training set.
df_train, df_dev, df_test = load_simple_qa(train=True, dev=True, test=True)
df_train = pd.concat([df_dev, df_train])
print('Training Examples:', len(df_train))
df_test[:5]

Training Examples: 86755


Unnamed: 0,subject,relation,object,question
0,01jp8ww,music/album/genre,01qzt1,Which genre of album is harder.....faster?
1,0np6z99,music/album/release_type,02lx2r,what format is fearless
2,0wzc58l,people/person/place_of_birth,0n2z,what city was alex golfis born in
3,0jtw9c,film/writer/film,05szq8z,what film is by the writer phil hay?
4,0gys2sn,people/deceased_person/place_of_death,0tzls,Where did roger marquis die


## Step 1 - Determine Question Subject Name

In [3]:
from functools import partial
edit_distance_link_alias = importlib.import_module(
                "scripts.Simple QA Numbers.HYPOTHESIS - Question Refers to Multiple Subjects").edit_distance_link_alias
normalize = importlib.import_module(
                "scripts.Simple QA Numbers.HYPOTHESIS - Subject Name not in Question").normalize

# Create a column with the subject_name linked per example
df_train['subject_name'] = df_train.progress_apply(partial(edit_distance_link_alias, cursor, normalize), axis=1)
df_test['subject_name'] = df_test.progress_apply(partial(edit_distance_link_alias, cursor, normalize), axis=1)

# Print Results
print('Sample:')
display(df_test[:5])

importing Jupyter notebook from ../../scripts/Simple QA Numbers/HYPOTHESIS - Question Refers to Multiple Subjects.ipynb
importing Jupyter notebook from ../../scripts/Simple QA Numbers/HYPOTHESIS - Subject Name not in Question.ipynb






Sample:


Unnamed: 0,subject,relation,object,question,subject_name
0,01jp8ww,music/album/genre,01qzt1,Which genre of album is harder.....faster?,harder.....faster
1,0np6z99,music/album/release_type,02lx2r,what format is fearless,fearless
2,0wzc58l,people/person/place_of_birth,0n2z,what city was alex golfis born in,alex golfis
3,0jtw9c,film/writer/film,05szq8z,what film is by the writer phil hay?,phil hay
4,0gys2sn,people/deceased_person/place_of_death,0tzls,Where did roger marquis die,roger marquis


In [4]:
total = (df_test.shape[0] + df_train.shape[0])
linked = (sum(df_train.subject_name.notnull()) + sum(df_test.subject_name.notnull()))
not_linked = total - linked
print('Without Subject Name: %f [%d of %d]' % (not_linked / total, not_linked, total))
total = df_test.shape[0]
linked = sum(df_test.subject_name.notnull())
not_linked = total - linked
print('Without Subject Name Dev Data: %f [%d of %d]' % (not_linked / total, not_linked, total))

Without Subject Name: 0.018517 [2008 of 108442]
Without Subject Name Dev Data: 0.019413 [421 of 21687]


## Step 2 - Determine Subject Name Span

In [5]:
find_subject_name_span = importlib.import_module(
                "scripts.Simple QA Models.Subject Recognition Data").find_subject_name_span

df_train = df_train.progress_apply(find_subject_name_span, axis=1)
df_test = df_test.progress_apply(find_subject_name_span, axis=1)

# Print Results
print('Sample:')
display(df_test[:5])

importing Jupyter notebook from ../../scripts/Simple QA Models/Subject Recognition Data.ipynb






Sample:


Unnamed: 0,end_index,object,question,question_tokens,relation,start_index,subject,subject_name,subject_name_tokens
0,8.0,01qzt1,Which genre of album is harder.....faster?,"[which, genre, of, album, is, harder, ....., f...",music/album/genre,5.0,01jp8ww,harder.....faster,"(harder, ....., faster)"
1,4.0,02lx2r,what format is fearless,"[what, format, is, fearless]",music/album/release_type,3.0,0np6z99,fearless,"(fearless,)"
2,5.0,0n2z,what city was alex golfis born in,"[what, city, was, alex, golfis, born, in]",people/person/place_of_birth,3.0,0wzc58l,alex golfis,"(alex, golfis)"
3,8.0,05szq8z,what film is by the writer phil hay?,"[what, film, is, by, the, writer, phil, hay, ?]",film/writer/film,6.0,0jtw9c,phil hay,"(phil, hay)"
4,4.0,0tzls,Where did roger marquis die,"[where, did, roger, marquis, die]",people/deceased_person/place_of_death,2.0,0gys2sn,roger marquis,"(roger, marquis)"


## Step 3 - Candidate MIDs

Knowing the span of the question that references the subject, we look up all aliases

In [6]:
# TODO: Look at the number of examples being skipped

def get_candidate_mids(row):
    if isinstance(row['question_tokens'], list):
        cursor.execute("""
            SELECT DISTINCT mid
            FROM {name_table}
            WHERE alias = %s
        """.format(name_table=FB2M_NAME_TABLE), (row['subject_name'].lower(),))
        mids = [r[0] for r in cursor.fetchall()]
        
        assert row['subject'] in mids
        
        return mids
    return []

df_train['candidate_mids'] = df_train.progress_apply(get_candidate_mids, axis=1)
df_test['candidate_mids'] = df_test.progress_apply(get_candidate_mids, axis=1)

# Print Results
print('Average Number of MIDs:', sum(len(mids) for mids in df_test['candidate_mids']) / df_test.shape[0])
print('Sample:')
display(df_test[:5])





Average Number of MIDs: 56.643196384931066
Sample:


Unnamed: 0,end_index,object,question,question_tokens,relation,start_index,subject,subject_name,subject_name_tokens,candidate_mids
0,8.0,01qzt1,Which genre of album is harder.....faster?,"[which, genre, of, album, is, harder, ....., f...",music/album/genre,5.0,01jp8ww,harder.....faster,"(harder, ....., faster)",[01jp8ww]
1,4.0,02lx2r,what format is fearless,"[what, format, is, fearless]",music/album/release_type,3.0,0np6z99,fearless,"(fearless,)","[0vqfcr, 016h11q, 0g3m0zm, 0mr5g41, 0swkzk, 0m..."
2,5.0,0n2z,what city was alex golfis born in,"[what, city, was, alex, golfis, born, in]",people/person/place_of_birth,3.0,0wzc58l,alex golfis,"(alex, golfis)",[0wzc58l]
3,8.0,05szq8z,what film is by the writer phil hay?,"[what, film, is, by, the, writer, phil, hay, ?]",film/writer/film,6.0,0jtw9c,phil hay,"(phil, hay)",[0jtw9c]
4,4.0,0tzls,Where did roger marquis die,"[where, did, roger, marquis, die]",people/deceased_person/place_of_death,2.0,0gys2sn,roger marquis,"(roger, marquis)",[0gys2sn]


## Step 4 - Candidate Facts per Entity

For every entity alias, there is are a set of facts that it can take. We determine those.

In [7]:
from collections import defaultdict

def generate_facts(row):
    cursor.execute("""SELECT subject_mid, relation, object_mid
                      FROM {kg}
                      WHERE subject_mid = ANY(%s)""".format(kg=FB2M_KG_TABLE), (row['candidate_mids'],))
    rows = cursor.fetchall()
    candidate_facts = defaultdict(lambda: defaultdict(list))
    
    for subject_mid, relation, object_mid in rows:
        candidate_facts[relation][subject_mid].append(object_mid)
        
    return candidate_facts

df_test['candidate_facts'] = df_test.progress_apply(generate_facts, axis=1)

# Print Results
print('Average Number of Relations:',
          sum(len(relations) for relations in df_test['candidate_facts']) / df_test.shape[0])
print('Sample:')
display(df_test[:5])


Average Number of Relations: 18.04638723659335
Sample:


Unnamed: 0,end_index,object,question,question_tokens,relation,start_index,subject,subject_name,subject_name_tokens,candidate_mids,candidate_facts
0,8.0,01qzt1,Which genre of album is harder.....faster?,"[which, genre, of, album, is, harder, ....., f...",music/album/genre,5.0,01jp8ww,harder.....faster,"(harder, ....., faster)",[01jp8ww],{'music/album/release_type': {'01jp8ww': ['02l...
1,4.0,02lx2r,what format is fearless,"[what, format, is, fearless]",music/album/release_type,3.0,0np6z99,fearless,"(fearless,)","[0vqfcr, 016h11q, 0g3m0zm, 0mr5g41, 0swkzk, 0m...",{'common/topic/notable_types': {'06xb83k': ['0...
2,5.0,0n2z,what city was alex golfis born in,"[what, city, was, alex, golfis, born, in]",people/person/place_of_birth,3.0,0wzc58l,alex golfis,"(alex, golfis)",[0wzc58l],{'people/person/nationality': {'0wzc58l': ['03...
3,8.0,05szq8z,what film is by the writer phil hay?,"[what, film, is, by, the, writer, phil, hay, ?]",film/writer/film,6.0,0jtw9c,phil hay,"(phil, hay)",[0jtw9c],{'people/person/nationality': {'0jtw9c': ['09c...
4,4.0,0tzls,Where did roger marquis die,"[where, did, roger, marquis, die]",people/deceased_person/place_of_death,2.0,0gys2sn,roger marquis,"(roger, marquis)",[0gys2sn],{'common/topic/notable_types': {'0gys2sn': ['0...


# Step 5 - Question to Relation Distribution

Given each entity, we have candidate relations that it can take. The SimpleQuestions dataset questions have a significant overlap; therefore, we overfit a model to the training set. We map the question predicte "when was <e> born" to all the relations it can take. 

In [8]:
from tqdm import tqdm_notebook
from collections import defaultdict

def get_question_predicate(row):
    ret =  ''
    for i, token in enumerate(row['question_tokens']):
        if i == row['start_index']:
            ret += '<e>'
        elif i > row['start_index'] and i < row['end_index']:
            continue
        else:
            ret += token.lower().strip()
        ret += ' '
    
    # Strip punctuation as it does not affect the semantic meaning of the question
    ret = ret.strip().strip('?.').strip()
    return ret

df = pd.concat([df_train, df_test])
question_predicate_to_relation = defaultdict(lambda: defaultdict(int))
skipped = 0
for index, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):    
    if not isinstance(row['question_tokens'], list):
        skipped += 1
        continue
        
    question_predicate = get_question_predicate(row)
    # Map the question predicate to the True relation
    question_predicate_to_relation[question_predicate][row['relation']] += 1
    
print('Skipped:', skipped)
print('Number of Examples per Question Predicates: %f [%d of %d]' %
      (df.shape[0] / len(question_predicate_to_relation), len(question_predicate_to_relation), df.shape[0]))
print('Sample:')
print(list(sorted(question_predicate_to_relation.keys(), key=lambda q: len(q)))[:50])


Skipped: 2008
Number of Examples per Question Predicates: 2.740026 [39577 of 108442]
Sample:
['who is <e>', 'name a <e>', 'list a <e>', 'what s <e>', 'what is <e>', 'wher is <e>', 'name an <e>', '<e> is what', 'list an <e>', 'who was <e>', 'who has <e>', 'who won <e>', 'that is <e>', 'who did <e>', 'who are <e>', 'who had <e>', 'when is <e>', 'is <e> male', 'where i <e>', 'where is <e>', 'who does <e>', 'what was <e>', 'who is a <e>', 'who sang <e>', 'what are <e>', 'who owns <e>', 'who made <e>', 'name one <e>', "where 's <e>", 'who used <e>', 'how died <e>', 'who like <e>', 'who uses <e>', 'who sand <e>', "who 's a <e>", 'who wrote <e>', 'what is a <e>', 'who plays <e>', '<e> is by who', 'who makes <e>', 'who is <e> by', 'who sings <e>', 'who is an <e>', 'where was <e>', 'who was a <e>', 'who is in <e>', 'name a tv <e>', '<e> died here', 'who has a <e>', "what 's a <e>"]


# Step 6 - Compute Upperbound

Given we have a perfect distribution for the entity linking and relation distribution. We can compute the maximum accuracy for the relation and end-to-end model.

In [21]:
# Same shuffle as end-to-end implementation to make the rows comparable
from sklearn.utils import shuffle
df_test = shuffle(df_test, random_state=123)

In [22]:
from scripts.utils.table import format_pipe_table
import random
from itertools import chain

def get_top_subject_object(facts, relation):
    # Given the relation, get the best subject / object
    scores = [len(object_mids) for object_mids in facts[relation].values()]
    max_score = max(scores)
    top_grouped_facts = [item for i, item in enumerate(facts[relation].items()) if scores[i] == max_score]
    return random.choice(top_grouped_facts)

unanswerable = []
interpretations = []
relation_correct = 0
unanswerable_multiple_subjects = 0
unanswerable_multiple_relations = 0
skipped = 0
object_correct = 0
given_relation_object_correct = 0
given_relation_subject_correct = 0
subject_and_relation_correct = 0
answerable = 0
flatten = lambda l: list(chain.from_iterable(l))
for index, row in tqdm_notebook(df_test.iterrows(), total=df_test.shape[0]):
    if not isinstance(row['subject_name'], str):
        # Not answerable because the subject_name is not referenced in the question
        skipped += 1
        interpretations.append([])
        continue
    
    question_predicate = get_question_predicate(row)
    assert len(question_predicate_to_relation[question_predicate]) > 0
    candidate_entity_relations = set(row['candidate_facts'].keys())
    candidate_question_relations = set([r for r, count in 
                                    question_predicate_to_relation[question_predicate].items() if count > 0])
    candidate_relations = candidate_entity_relations.intersection(candidate_question_relations)
    assert len(candidate_relations) > 0
    scores = [question_predicate_to_relation[question_predicate][r] for r in candidate_relations]
    max_score = max(scores)
    max_relations = [r for i, r in enumerate(candidate_relations) if scores[i] == max_score]
    max_relation = random.choice(max_relations)
    assert question_predicate_to_relation[question_predicate][max_relation] > 0
    assert max_relation in candidate_question_relations
    assert row['relation'] in candidate_relations
    
    question_interpretations = []
    for relation in candidate_relations:
        for subject_mid in row['candidate_facts'][relation]:
            question_interpretations.append(tuple([relation, subject_mid]))
    interpretations.append(question_interpretations)
    
    # We use the `Better than random guessing` from notebook 
    # `HYPOTHESIS - Question Refers to Multiple Subjects`.
    # subject_mid, object_mids = random.sample(row['candidate_facts'][max_relation].items(), 1)[0]
    subject_mid, object_mids = get_top_subject_object(row['candidate_facts'], max_relation)
    candidate_mids = list(set(flatten([row['candidate_facts'][r] for r in candidate_relations])))
    if len(candidate_mids) == 1 and len(candidate_relations) == 1:
        answerable += 1
    else:
        cursor.execute("SELECT alias FROM fb_two_name WHERE mid = %s", (row['object'],))
        object_aliases = [r[0] for r in cursor.fetchall()]
        unanswerable.append({
            'Candidate Relations': [{r: question_predicate_to_relation[question_predicate][r]}
                                    for r in candidate_relations],
            'Predicate': question_predicate,
            'Predicate To Relation': question_predicate_to_relation[question_predicate],
            'True Relation': row['relation'],
            'True Subject': row['subject'],
            'Subject Name': row['subject_name'],
            'Object': object_aliases[:3],
            'MIDs': candidate_mids[:5], # One per relation
            'Question': row['question'],
            'Candidate Pairs': {(r, question_predicate_to_relation[question_predicate][r]):
                                [(s, len(o)) for s, o in row['candidate_facts'][r].items()]
                                for r in candidate_relations}
        })
        
    if len(row['candidate_facts'][max_relation]) > 1:
        unanswerable_multiple_subjects += 1
    if len(candidate_relations) > 1:
        unanswerable_multiple_relations += 1
    if max_relation == row['relation'] and subject_mid == row['subject']:
        subject_and_relation_correct += 1    
    if max_relation == row['relation']:
        relation_correct += 1
    if row['object'] in object_mids:
        object_correct += 1
        
        
    # Given the True Relation
    subject_mid, object_mids = get_top_subject_object(row['candidate_facts'], row['relation'])
    if row['object'] in object_mids:
        given_relation_object_correct += 1
    if row['subject'] == subject_mid:
        given_relation_subject_correct +=  1


print('Unanswerable Multiple Relations: %f [%d of %d]' % 
          (unanswerable_multiple_relations / (df_test.shape[0] - skipped), 
           unanswerable_multiple_relations, (df_test.shape[0] - skipped)))
print('Unanswerable Multiple Subjects: %f [%d of %d]' % 
          (unanswerable_multiple_subjects / (df_test.shape[0] - skipped), 
           unanswerable_multiple_subjects, (df_test.shape[0] - skipped)))
print('Unanswerable Subject Name Not Referenced: %f [%d of %d]' %
          (skipped / df_test.shape[0], skipped, df_test.shape[0]))
print('Relation Accuracy Upperbound: %f [%d of %d]' % 
          (relation_correct / (df_test.shape[0] - skipped), relation_correct, (df_test.shape[0] - skipped)))
print('End-to-End Accuracy Approximate Upperbound: %f [%d of %d]' % 
      (subject_and_relation_correct / df_test.shape[0], subject_and_relation_correct, df_test.shape[0]))
print('Object Accuracy Approximate Upperbound: %f [%d of %d]' % 
        (object_correct / df_test.shape[0], object_correct, df_test.shape[0]))
print('Given Relation Object Accuracy Approximate Upperbound: %f [%d of %d]' % 
        (given_relation_object_correct / df_test.shape[0], given_relation_object_correct, df_test.shape[0]))
print('Given Relation Subject Accuracy Approximate Upperbound: %f [%d of %d]' % 
        (given_relation_subject_correct / df_test.shape[0], given_relation_subject_correct, df_test.shape[0]))
print('End-to-End Answerable: %f [%d of %d]' % (answerable / df_test.shape[0], answerable, df_test.shape[0]))
print('Unanswerable:\n')
# Columns -- ['Bucket', 'Subject Name', 'Object', 'MIDs', 'Predicate', 'True Relation', 'Candidate Relations']
print(format_pipe_table(unanswerable[:50], columns=['Subject Name', 'True Relation', 'True Subject', 'Question', 'Candidate Pairs']))


Unanswerable Multiple Relations: 0.220493 [4689 of 21266]
Unanswerable Multiple Subjects: 0.180617 [3841 of 21266]
Unanswerable Subject Name Not Referenced: 0.019413 [421 of 21687]
Relation Accuracy Upperbound: 0.952788 [20262 of 21266]
End-to-End Accuracy Approximate Upperbound: 0.834555 [18099 of 21687]
Object Accuracy Approximate Upperbound: 0.876516 [19009 of 21687]
Given Relation Object Accuracy Approximate Upperbound: 0.902153 [19565 of 21687]
Given Relation Subject Accuracy Approximate Upperbound: 0.868170 [18828 of 21687]
End-to-End Answerable: 0.651127 [14121 of 21687]
Unanswerable:

| Index | Subject Name | True Relation | True Subject | Question | Candidate Pairs |
| --- | --- | --- | --- | --- | --- |
| 0 | nelson mandela | music/release_track/recording | 0slws_1 | Name a recording by nelson mandela | {('music/recording/artist', 1): [('016z6xj', 1), ('0dystxy', 1), ('0wcvlf', 1), ('0drd2f6', 1), ('0dxxvd6', 1), ('0wzyx1', 1), ('0wk87k', 1), ('01ct763', 1), ('0155lg0', 1), 

## Analysis

### Numbers

- Unanswerable Multiple Relations: 0.212340 [2261 of 10648]
- Unanswerable Multiple Subjects: 0.173835 [1851 of 10648]
- Unanswerable Subject Name Not Referenced: 0.018165 [197 of 10845]
- Relation Accuracy Upperbound: 0.952385 [10141 of 10648]
- End-to-End Accuracy Approximate Upperbound: 0.840572 [9116 of 10845]
- Object Accuracy Approximate Upperbound: 0.882158 [9567 of 10845]
- Given Relation Object Accuracy Approximate Upperbound: 0.908345 [9851 of 10845]
- Given Relation Subject Accuracy Approximate Upperbound: 0.875611 [9496 of 10845]
- End-to-End Answerable: 0.661134 [7170 of 10845]

### Dicussion

- The numbers concerning `Given Relation Subject Accuracy Approximate Upperbound` match between this notebook [9496 of 10845] and `HYPOTHESIS - Question Refers to Multiple Subjects` [9495 of 10640].
- Subject and relation accuracy drops more than object accuracy depending on being conditioned with the true relation or not; therefore, I believe a better metric is object accuracy due to invariance to relation ambiguity.
- With a 84.05% upperbound then the SOTA 78.7% is within 93.6% of the upperbound. We've reduced the room of improvement from 21.3% to 6.4% by determining the upperbound.
- With our upperbound measurement, we do not use any text normalization strategies; therefore, it could be higher if synonyms are factored in.

### Empirical Proof

We check every example to ensure that a human evaluator such as myself is unable to differentiate between the multiple relations. In `HYPOTHESIS - Question Refers to Multiple Subjects.ipynb` we check to ensure that a human evaluator cannot differentiate between the multiple subject mids. 

| Index | Bucket | Subject Name | Object | MIDs | Predicate | True Relation | Candidate Relations |
| --- | --- | --- | --- | --- | --- | --- | --- |
| 0 | Unanswerable | sasha vujacic | ['maribor', 'slovenia, maribor'] | ['07f3jg'] | where was <e> born | people/person/place_of_birth | [{'people/person/nationality': 10}, {'people/person/place_of_birth': 1331}] |
| 1 | Unanswerable | john miltern | ['hartford county / new britain city', 'new britain', 'new britain, connecticut'] | ['0c1mfxz'] | where was <e> born | people/person/place_of_birth | [{'people/person/nationality': 10}, {'people/deceased_person/place_of_death': 9}, {'people/person/place_of_birth': 1331}] |
| 2 | Unanswerable | seymour parker gilbert | ['attorney', 'barrister', 'counsel'] | ['02p_vkx'] | what was <e> 's profession | people/person/profession | [{'people/person/profession': 25}, {'common/topic/notable_types': 8}] |
| 3 | Unanswerable | rama | ['on-line sierra', 'on-line systems', 'sierra'] | ['04hr4_v', '07wd2k'] | who published <e> | cvg/game_version/publisher | [{'cvg/computer_videogame/publisher': 43}, {'cvg/game_version/publisher': 40}] |
| 4 | Unanswerable | bon voyage | ['anglit', 'english', 'english language'] | ['0b4pm3', '0gkrfmk', '0k3qrvh', '0gmhlt'] | what is the language of the film <e> | film/film/language | [{'film/film/language': 49}] |
| 5 | Unanswerable | star | ['star'] | ['01cy1z3', '0mrstz', '0nlql9t', '0rzlqg', '0115gdv'] | what s a version of the single titled <e> | music/single/versions | [{'music/single/versions': 1}] |
| 6 | Unanswerable | john rutter | ['angel tidings'] | ['02ydh8'] | what is a song by <e> | music/artist/track | [{'music/artist/track': 79}, {'music/composer/compositions': 1}] |
| 7 | Unanswerable | album | ['the bootmoon series: detroit - march 31st 1977'] | ['02lx2r'] | what 's an example of an <e> | music/album_release_type/albums | [{'music/genre/albums': 1}, {'music/album_release_type/albums': 20}, {'film/film_genre/films_in_this_genre': 5}] |
| 8 | Unanswerable | australia | ['australia (lionrock remix)'] | ['0kpg4sd', '01j9qgr', '0_j2lh4', '0_hqxrc', '0nm2g6l'] | which recordings contains the composition <e> | music/composition/recordings | [{'music/composition/recordings': 1}] |
| 9 | Unanswerable | ghost house | ['daehan minguk', 'korea', 'republic of korea'] | ['0d5jxc', '09gbzy0', '08jcb1', '0b1xf4'] | what country is <e> from | film/film/country | [{'film/film/country': 149}, {'media_common/netflix_title/netflix_genres': 1}] |
| 10 | Unanswerable | wake | ['ep', 'extended play'] | ['0yc5wvz', '01hqqnq', '0fsnmdr', '01nb7q4', '03jmqrc'] | what is the release type of the album <e> | music/album/release_type | [{'music/album/release_type': 45}] |
| 11 | Unanswerable | fritz leiber | ['justin fritz leiber', 'justin leiber'] | ['025wdlw', '02y49'] | who is the chid of <e> | people/person/children | [{'people/person/children': 1}] |
| 12 | Unanswerable | the invaders | ['anglit', 'english', 'english language'] | ['076twy5', '0494vp', '05gsc5h'] | what is the language of <e> | tv/tv_program/languages | [{'tv/tv_program/languages': 8}, {'film/film/language': 20}] |
| 13 | Unanswerable | fran drescher | ['kew gardens', 'kew gardens, new york', 'kew gardens, queens'] | ['01s3kv'] | where was <e> born | people/person/place_of_birth | [{'people/person/nationality': 10}, {'people/person/place_of_birth': 1331}] |
| 14 | Unanswerable | ethan frome | ['ethan frome'] | ['04v7h93', '04v7kz6', '04v7k_9', '04v7ky6', '04v7kzr'] | what is a book from the <e> series | book/book_edition/book | [{'book/book_edition/book': 1}] |
| 15 | Unanswerable | look | ['helvetia', 'la suisse', 'schweiz'] | ['0h566cd', '031qx6m', '03gt0jb', '0pc47fb', '01nkghv'] | where is <e> from | film/film/country | [{'film/film/country': 22}, {'music/release/region': 8}, {'music/artist/origin': 96}] |
| 16 | Unanswerable | smooth 'n swingin' | ['cd', 'compact disc'] | ['037m3vb', '01hq7lz'] | what medium was <e> released on | music/release/format | [{'music/release/format': 9}, {'music/album/release_type': 2}] |
| 17 | Unanswerable | titanium dioxide 0.223 cream | ['zinc oxide'] | ['0hqtxt9'] | what is the active ingredient in <e> | medicine/drug_formulation/active_ingredients | [{'medicine/drug_formulation/active_ingredient_moieties': 39}, {'medicine/drug_formulation/active_ingredients': 45}] |
| 18 | Unanswerable | fast freeze | ['menthol'] | ['0hqsrcv', '0hqrlp6'] | what is the active ingredient moiety in <e> | medicine/drug_formulation/active_ingredient_moieties | [{'medicine/drug_formulation/active_ingredient_moieties': 4}] |
| 19 | Unanswerable | mecca | ['mecca'] | ['014zvln', '0rnv7f', '0drmclx', '010ljqb', '0fv4l1y'] | what is the name for <e> canonical version | music/recording/canonical_version | [{'music/recording/canonical_version': 1}] |
| 20 | Unanswerable | gulliver's travels | ['dean swift', 'isaac bickerstaff', 'jonathan swift'] | ['0btc7', '090s_0', '0dy60p', '06znpjr', '02py9bj'] | who wrote <e> | film/film/story_by | [{'film/film/story_by': 9}, {'film/film/written_by': 67}, {'common/topic/notable_types': 1}, {'book/written_work/author': 132}, {'book/written_work/subjects': 1}] |
| 21 | Unanswerable | i miss you | ['musical recording'] | ['0rpl32', '0_jl8wt', '0ft8dt2', '0fv71by', '01ssyvv'] | what is <e> | common/topic/notable_types | [{'music/composition/form': 17}, {'common/topic/notable_types': 355}, {'music/album/release_type': 58}, {'tv/tv_program/genre': 2}, {'film/film/genre': 3}, {'music/release/format': 3}] |
| 22 | Unanswerable | greenland | ['iceland'] | ['0kj003k', '0l30m0h', '0m7317g', '0mv8pzq', '0mrzlrp'] | what album was the song <e> featured on | music/release_track/release | [{'music/release_track/release': 1}] |
| 23 | Unanswerable | bus | ['56 dartmouth crossing'] | ['01bjv'] | name a <e> transit line | metropolitan_transit/transit_vehicle/transit_lines | [{'metropolitan_transit/transit_vehicle/transit_lines': 1}, {'metropolitan_transit/transit_service_type/transit_lines': 1}] |
| 24 | Unanswerable | bidaai | ['music & musicals'] | ['02qpjw4'] | what type of film is <e> | media_common/netflix_title/netflix_genres | [{'film/film/genre': 318}, {'media_common/netflix_title/netflix_genres': 31}, {'film/film/language': 4}] |
| 25 | Unanswerable | 8596 alchata | ['palomar observatory'] | ['03y0pzy'] | where was <e> discovered | astronomy/astronomical_discovery/discovery_site | [{'astronomy/astronomical_discovery/discovery_site': 37}, {'astronomy/star_system_body/star_system': 1}] |
| 26 | Unanswerable | dirty love | ['dirty love'] | ['0122nsg', '0ntj01k', '0m6lqn_', '015n353', '012f1y0'] | what is a track from <e> | music/release/album | [{'music/release_track/recording': 7}, {'music/release/album': 1}, {'music/recording/tracks': 22}, {'music/release_track/release': 9}, {'music/release/track_list': 38}, {'music/recording/song': 2}, {'music/composition/recordings': 1}] |
| 27 | Unanswerable | minutes to midnight | ["valentine's day"] | ['0f83dpg', '0ftx4vb', '0sj290r', '0sgz3c9', '0f7ffd9'] | what track is on <e> | music/release/track | [{'music/release/track_list': 4}, {'music/release/track': 7}] |
| 28 | Unanswerable | the fame monster | ['brd', 'bundesrepublik deutschland', 'deutschland'] | ['0fwxcb3', '0g7c2t3', '0g85hvb', '0np_20y', '0f_b6b7'] | what country was <e> done in | music/release/region | [{'music/release/region': 1}] |
| 29 | Unanswerable | the frame-up | ['drama', 'drama film', 'dramatic programming'] | ['09vnw_k'] | what kind of movie is <e> | film/film/genre | [{'film/film/genre': 141}, {'film/film/language': 1}] |
| 30 | Unanswerable | reading | ['reading'] | ['0mft29s', '0ns5s8y', '0ns5rtq', '0m0y19j', '0d_p_4q'] | which release was <e> on | music/release_track/recording | [{'music/release_track/recording': 1}, {'music/release_track/release': 6}, {'music/recording/releases': 6}] |
| 31 | Unanswerable | clyde | ['missouri', 'mo', 'show-me state'] | ['0_z5z', '02wbwd', '049528f', '01138j', '04b0ky2'] | what us state contains <e> | location/location/containedby | [{'location/location/containedby': 5}] |
| 32 | Unanswerable | robert harris | ['canada', 'canuckistan', 'dominion of canada'] | ['0gc63b2', '0gc3lhh', '0hf2gqv', '01vd4x', '0v9djbr'] | where was <e> ( painter ) born | people/person/place_of_birth | [{'people/person/place_of_birth': 1}] |
| 33 | Unanswerable | lamont | ['america', "estats units d'amèrica", 'the states'] | ['0q_84', '04b2tsy', '0114q4', '09j5hv', '0480bx_'] | which country is <e> in | location/location/containedby | [{'location/location/containedby': 58}, {'location/hud_county_place/county': 1}] |
| 34 | Unanswerable | the story of the amulet | ['fiction'] | ['06fyp_'] | what is the genre of the book <e> | book/book/genre | [{'book/written_work/subjects': 1}, {'book/book/genre': 42}] |
| 35 | Unanswerable | goga kapoor | ['bharat', 'bharat ganrajya', 'hindustan'] | ['0j_9xt'] | which country was <e> born in | people/person/place_of_birth | [{'people/person/nationality': 15}, {'people/person/place_of_birth': 30}] |
| 36 | Unanswerable | cobra: studio version | ['fantasia'] | ['0dp73l0', '0d_bz8k', '033r2wp'] | what is a track from the release <e> | music/release/track | [{'music/release/track_list': 5}, {'music/release/track': 7}] |
| 37 | Unanswerable | the silence of the lambs | ['thomas harris'] | ['028yh_z', '0ccd8s'] | who wrote the book <e> | book/written_work/author | [{'book/book_edition/author_editor': 4}, {'book/written_work/author': 38}] |
| 38 | Unanswerable, Noise | black fire | ['african-american studies'] | ['04w7810'] | what is <e> about | book/written_work/subjects | [{'book/written_work/subjects': 86}, {'book/book/genre': 2}] |
| 39 | Unanswerable | namco | ['crisis zone'] | ['01rt2z'] | what is a game published by <e> | cvg/cvg_publisher/games_published | [{'cvg/cvg_publisher/game_versions_published': 7}, {'cvg/cvg_publisher/games_published': 4}] |
| 40 | Unanswerable | half-life 2 | ['first-person shooter', 'fps'] | ['02rncz'] | what type of game is <e> | cvg/computer_videogame/cvg_genre | [{'cvg/computer_videogame/cvg_genre': 54}, {'cvg/computer_videogame/gameplay_modes': 8}] |
| 41 | Unanswerable | a place in the sun | ['romance', 'romance film', 'romantic drama'] | ['0gdjrjk', '0b_xrk3', '0gx2sh8', '072192'] | what kind of motion picture is <e> | film/film/genre | [{'film/film/genre': 2}] |
| 42 | Unanswerable | ellen allien remix collection | ['album'] | ['03g22tr', '01n61t7'] | how was the album <e> released | music/album/release_type | [{'music/album/release_type': 5}] |
| 43 | Unanswerable | medford bryan evans | ['america', "estats units d'amèrica", 'the states'] | ['0b77lg8'] | what country is <e> from | people/person/nationality | [{'people/person/nationality': 279}, {'people/person/place_of_birth': 10}] |
| 44 | Unanswerable | star | ['star'] | ['0xsplg2', '0my34h', '0pl9fs', '0g2cm8_', '0dv26_w'] | the recording <e> is an adaptation of which song | music/recording/canonical_version | [{'music/recording/canonical_version': 1}] |
| 45 | Unanswerable | roll over beethoven | ['roll over beethoven'] | ['0_dj8k7', '0vz3t2', '0lvtdvf', '0128j16', '0mxfk2c'] | what is the name of a track from <e> | music/recording/tracks | [{'music/artist/track': 1}, {'music/release/track': 4}, {'music/recording/tracks': 2}, {'music/release_track/recording': 2}] |
| 46 | Unanswerable | bink! | ['singer', 'vocalist'] | ['01w9sqk'] | what is <e> 's profession | people/person/profession | [{'people/person/profession': 333}, {'common/topic/notable_types': 87}, {'people/person/gender': 3}] |
| 47 | Unanswerable | soon | ['chris squire', 'christopher russell edward \\"chris\\" squire', 'christopher russell edward squire'] | ['0_7v2w6', '02r8389', '0zx5yph'] | who wrote the composition <e> | music/composition/lyricist | [{'music/composition/lyricist': 5}, {'music/composition/composer': 2}] |
| 48 | Unanswerable, Noise | the cure | ['inbetween days'] | ['0g1g24_', '03fh4r0', '039qfbx', '0fd7prw', '0fjqxks'] | what is a song by <e> | music/artist/track | [{'music/artist/track': 79}, {'music/release/track_list': 1}] |
| 49 | Unanswerable | fantasy | ['les contes de la nuit', 'tales of the night'] | ['0_j4156', '0xz7xp', '01ltz_b', '017kpb1', '0_6jhhl'] | what is a type of <e> | film/film_genre/films_in_this_genre | [{'common/topic/notable_types': 1}, {'film/film_genre/films_in_this_genre': 7}] |


In [23]:
import json
with open('question_interpretations.txt', 'w') as outfile:
    json.dump(interpretations, outfile)