In [33]:
import json
import glob
import pandas as pd
import requests

In [34]:
cache = {}
def id2name(q):
    q = q.upper()
    if q in cache:
        return cache[q]
    url = 'https://www.wikidata.org/w/api.php?action=wbgetentities&props=labels&ids=%s&languages=en&format=json' % q
    resp = requests.get(url)
    v = resp.json()['entities'][q]['labels']['en']['value']
    cache[q] = v
    return v

In [35]:
files = glob.glob("extractions/part-*")
data = []
for path in files:
    with open(path) as file:
        lines = file.readlines()
        data.extend([json.loads(l) for l in lines])

In [36]:
EVAL_NUM = 100
EVAL_REL = 'P26'

In [37]:
df = pd.DataFrame.from_records(data)
df.sort_values('probability', ascending=False, inplace=True)

In [38]:
df = df[df['predictedPredicate'] == EVAL_REL]

In [39]:
obj_names = [id2name(q) for q in df[:EVAL_NUM]['obj']]
sub_names = [id2name(q) for q in df[:EVAL_NUM]['subject']]

In [40]:
df_obj = pd.DataFrame(obj_names, index=df[:EVAL_NUM].index, columns=['obj_name'])
df_sub = pd.DataFrame(sub_names, index=df[:EVAL_NUM].index, columns=['sub_name'])
df = df.join(df_sub).join(df_obj)

In [41]:
subset = df[:EVAL_NUM][['sub_name','obj_name', 'sentence','probability', 'subject', 'obj']]
subset.to_csv(EVAL_REL + '.csv')
subset

Unnamed: 0,sub_name,obj_name,sentence,probability,subject,obj
38627,Chichibunomiya Rugby Stadium,Princess Chichibu,Prince Chichibu and his wife Princess Setsuko ...,0.985383,Q631237,Q3244162
91791,Michael Owen,Montgomery,Owen and his wife Margaret Montgomery raised f...,0.976180,Q128829,Q29364
49750,Robert Browning,Elizabeth Barrett Browning,The poet Robert Browning and his wife Elizabet...,0.976037,Q233265,Q228494
109237,Robert Browning,Elizabeth I of England,The poet Robert Browning and his wife Elizabet...,0.976037,Q233265,Q7207
22845,Robert Browning,Elizabeth I of England,The poet Robert Browning and his wife Elizabet...,0.976037,Q233265,Q7207
101615,Virgil,James Earp,Virgil and his wife Addie Earp followed the ne...,0.971164,Q1398,Q3806537
51326,Elisabeth of Bavaria,Princess Stéphanie of Monaco,Elisabeth and her daughter Stephanie did not h...,0.967943,Q150782,Q215012
2317,Prince Adalbert of Bavaria,Princess Pilar of Bavaria,Prince Adalbert was a writer and historian; Pr...,0.967744,Q60610,Q2507563
27315,Otto Devrient,Charlotte,Gustav Emil Devrient was the youngest son of t...,0.967197,Q88289,Q16565
115600,Marshall University,Confederate States of America,"In October 1942, Marshall and his wife Emma Gr...",0.965360,Q1379613,Q81931


In [42]:
df

Unnamed: 0,obj,predictedPredicate,probability,sentence,source,subject,sub_name,obj_name
38627,Q3244162,P26,0.985383,Prince Chichibu and his wife Princess Setsuko ...,urn:wikidata:Q5956403,Q631237,Chichibunomiya Rugby Stadium,Princess Chichibu
91791,Q29364,P26,0.976180,Owen and his wife Margaret Montgomery raised f...,urn:wikidata:Q3350218,Q128829,Michael Owen,Montgomery
49750,Q228494,P26,0.976037,The poet Robert Browning and his wife Elizabet...,urn:wikidata:Q725741,Q233265,Robert Browning,Elizabeth Barrett Browning
109237,Q7207,P26,0.976037,The poet Robert Browning and his wife Elizabet...,urn:wikidata:Q583271,Q233265,Robert Browning,Elizabeth I of England
22845,Q7207,P26,0.976037,The poet Robert Browning and his wife Elizabet...,urn:wikidata:Q1423531,Q233265,Robert Browning,Elizabeth I of England
101615,Q3806537,P26,0.971164,Virgil and his wife Addie Earp followed the ne...,urn:wikidata:Q3806537,Q1398,Virgil,James Earp
51326,Q215012,P26,0.967943,Elisabeth and her daughter Stephanie did not h...,urn:wikidata:Q93390,Q150782,Elisabeth of Bavaria,Princess Stéphanie of Monaco
2317,Q2507563,P26,0.967744,Prince Adalbert was a writer and historian; Pr...,urn:wikidata:Q242421,Q60610,Prince Adalbert of Bavaria,Princess Pilar of Bavaria
27315,Q16565,P26,0.967197,Gustav Emil Devrient was the youngest son of t...,urn:wikidata:Q95358,Q88289,Otto Devrient,Charlotte
115600,Q81931,P26,0.965360,"In October 1942, Marshall and his wife Emma Gr...",urn:wikidata:Q4768939,Q1379613,Marshall University,Confederate States of America
