In [1]:
import json
import glob
import pandas as pd
import requests

In [2]:
cache = {}
def id2name(q):
    q = q.upper()
    if q in cache:
        return cache[q]
    url = 'https://www.wikidata.org/w/api.php?action=wbgetentities&props=labels&ids=%s&languages=en&format=json' % q
    resp = requests.get(url)
    v = resp.json()['entities'][q]['labels']['en']['value']
    cache[q] = v
    return v

In [3]:
files = glob.glob("extractions/part-*")
data = []
for path in files:
    with open(path) as file:
        lines = file.readlines()
        data.extend([json.loads(l) for l in lines])

In [4]:
EVAL_NUM = 100
EVAL_REL = 'P69'

In [5]:
df = pd.DataFrame.from_records(data)
df.sort_values('probability', ascending=False, inplace=True)

In [6]:
df = df[df['predictedPredicate'] == EVAL_REL]

In [7]:
obj_names = [id2name(q) for q in df[:EVAL_NUM]['obj']]
sub_names = [id2name(q) for q in df[:EVAL_NUM]['subject']]

In [8]:
df_obj = pd.DataFrame(obj_names, index=df[:EVAL_NUM].index, columns=['obj_name'])
df_sub = pd.DataFrame(sub_names, index=df[:EVAL_NUM].index, columns=['sub_name'])
df = df.join(df_sub).join(df_obj)

In [9]:
subset = df[:EVAL_NUM][['sub_name','obj_name', 'sentence','probability', 'subject', 'obj']]
subset.to_csv(EVAL_REL + '.csv')
subset

Unnamed: 0,sub_name,obj_name,sentence,probability,subject,obj
46970,Ted Nelson,Brown University,"Ted Nelson coined the words ""hypertext"" and ""h...",0.946554,Q62852,Q49114
106692,Robert Johnson,Cornell University,Robert Johnson received his Ph.D. from Cornell...,0.934588,Q192133,Q49115
12631,Orlando Patterson,Harvard University,"Orlando Patterson, a sociology professor at Ha...",0.934369,Q7103155,Q13371
22170,Jason Mattera,Roger Williams University,"In 2005, Jason Mattera graduated summa cum lau...",0.931528,Q6163052,Q3940116
28773,Paul Barbara,Hofstra University,Paul Barbara received his bachelor’s degree in...,0.931357,Q10804357,Q1623314
62378,Gerald Cleaver,John Henry Schwarz,Gerald Cleaver did his Ph.D. at Caltech where ...,0.931281,Q547849,Q710213
38907,Kevin McNulty,Yale University,McNulty earned a bachelor's degree in 1976 fro...,0.931111,Q3195759,Q49112
46405,Dana Ward,"University of California, Berkeley",Dana Ward received his BA from University of C...,0.930664,Q1159240,Q168756
122503,Ben Finney,University of Hawaii,"Ben Finney, B.A. 1955 – University of Hawaii p...",0.929676,Q4885640,Q217439
54734,Mervyn Warren,Oakwood University,"Undeterred, Warren graduated summa cum laude i...",0.929418,Q3306699,Q1056848


In [10]:
df

Unnamed: 0,obj,predictedPredicate,probability,sentence,source,subject,sub_name,obj_name
46970,Q49114,P69,0.946554,"Ted Nelson coined the words ""hypertext"" and ""h...",urn:wikidata:Q7805975,Q62852,Ted Nelson,Brown University
106692,Q49115,P69,0.934588,Robert Johnson received his Ph.D. from Cornell...,urn:wikidata:Q7346150,Q192133,Robert Johnson,Cornell University
12631,Q13371,P69,0.934369,"Orlando Patterson, a sociology professor at Ha...",urn:wikidata:Q1132127,Q7103155,Orlando Patterson,Harvard University
22170,Q3940116,P69,0.931528,"In 2005, Jason Mattera graduated summa cum lau...",urn:wikidata:Q6163052,Q6163052,Jason Mattera,Roger Williams University
28773,Q1623314,P69,0.931357,Paul Barbara received his bachelor’s degree in...,urn:wikidata:Q10804357,Q10804357,Paul Barbara,Hofstra University
62378,Q710213,P69,0.931281,Gerald Cleaver did his Ph.D. at Caltech where ...,urn:wikidata:Q3103863,Q547849,Gerald Cleaver,John Henry Schwarz
38907,Q49112,P69,0.931111,McNulty earned a bachelor's degree in 1976 fro...,urn:wikidata:Q6396980,Q3195759,Kevin McNulty,Yale University
46405,Q168756,P69,0.930664,Dana Ward received his BA from University of C...,urn:wikidata:Q1159240,Q1159240,Dana Ward,"University of California, Berkeley"
122503,Q217439,P69,0.929676,"Ben Finney, B.A. 1955 – University of Hawaii p...",urn:wikidata:Q6603038,Q4885640,Ben Finney,University of Hawaii
54734,Q1056848,P69,0.929418,"Undeterred, Warren graduated summa cum laude i...",urn:wikidata:Q3306699,Q3306699,Mervyn Warren,Oakwood University
