## Start

In [1]:
from rosette.api import API, DocumentParameters, RosetteException
import pandas as pd
import wikipedia
import requests
import numpy as np

In [3]:
# %load links/utils.py
QUERY_DICT = {'Organization Founded By^-1':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P112 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                           ],
              'Organization Founded By':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P112 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                        ],
              'Organization Headquarters':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P159 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                          ],
              'Organization Subsidiary Of^-1':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P355 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                              ],
              'Organization Subsidiary Of':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P355 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                           ],
              'Organization top employees':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P169 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }""", # CEO
                                            """SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P488 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }""" # Chairperson
                                            ],
              'Person Employee or Member of^-1':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P108 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }""",
                                            """SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P527 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }""" 
                                                ],
              'Person Employee or Member of':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P108 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""",
                                              """SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P463 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""## member of ---> Band Members
                                            ],
              'Person Place of Birth':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P19 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                      ],
              'Person Current and Past Location of Residence':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P551 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                                              ],
              'Person Parents':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P22 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""", #Father
                                """SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P25 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""", #Mother
                                """SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P1038 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""" #Relative (Adopted Parents?)
                                # Shall we include stepparents??
                               ],
              'Person Parents^-1':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P40 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                  ],
              'Person Siblings':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P3373 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                ],
              'Person Spouse':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P26 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                              ],
              'Citizen of':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P27 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                           ],
              'Educated at':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P69 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                            ]
             }




from SPARQLWrapper import SPARQLWrapper, JSON   
from rosette.api import API, DocumentParameters, RosetteException
import pandas as pd
import wikipedia
import requests
import numpy as np
import pickle
import random
from threading import Lock
import os, sys
import threading
from threading import Thread
import time
import queue

class Utils:
    
    def __init__(self):
        self.id_dict = {}
        self.lock = Lock()
        self.load_dict()
        
    
    def __del__(self):
        self.save_dict()

    def get_id(self, message, dict_to_use=None):
#         if dict_to_use:
#             dict_to_use = dict_to_use
#         else:
#             global id_dict
#             dict_to_use = id_dict
    
        if message in self.id_dict:
            return self.id_dict[message]
        else:
            API_ENDPOINT = "https://www.wikidata.org/w/api.php"
            query = message
            params = {
                'action': 'wbsearchentities',
                'format': 'json',
                'language': 'en',
                'search': query
            }
            r = requests.get(API_ENDPOINT, params = params)
            try:
                with self.lock:
                    self.id_dict[message] = r.json()['search'][0]['id']
                return self.id_dict[message]
            except Exception:
                return -1 #The id doesn't exist


    def id_to_name(self, eid):
#         if dict_to_use:
#             dict_to_use = dict_to_use
#         else:
#             global id_dict
#             dict_to_use = id_dict

        if eid in self.id_dict.values():
            return [key for key, value in self.id_dict.items() if value == eid][0]
        else:
            API_ENDPOINT = "https://www.wikidata.org/w/api.php"
            query = eid
            params = {
                'action': 'wbsearchentities',
                'format': 'json',
                'language': 'en',
                'search': query
            }
            r = requests.get(API_ENDPOINT, params = params)
            try:
                with self.lock:
                    self.id_dict[ r.json()['search'][0]['label'] ] = r.json()['search'][0]['id']
                return r.json()['search'][0]['label']
            except Exception:
                return -1 #The id doesn't exist


    def get_results(self, query, value, endpoint_url="https://query.wikidata.org/sparql"):
        sparql = SPARQLWrapper(endpoint_url)
        sparql.setQuery(query%value)
        sparql.setReturnFormat(JSON)
        return sparql.query().convert()


    def ground_truth(self, relation, subject, debug=False):
        global QUERY_DICT
        results = []
        gt = []
        try:
            results = [self.get_results(query, self.get_id(subject)) for query in QUERY_DICT[relation]]
            for result in results:
                for r in result["results"]["bindings"]:
                    gt.append(r['itemLabel']['value'])
        except:
            if debug:
                print (relation, subject)
        return gt

    def add_ground_truth(self, df, debug=False):
        if df.empty:
            return df
        if debug:
            print (df)
        df = df.reset_index()
        df['Pseudo Ground Truth'] = df.apply(lambda row: self.ground_truth(row['Relationship'], row['Subject']), axis=1)
        df['Count_PGT'] = df['Pseudo Ground Truth'].apply(lambda x: len(x))
        df = df.set_index(['Subject','Relationship'])
        return df

    def add_recall_score(self, df):
        df['Recall Prediction'] = np.random.randint(0, 100, df.shape[0])/100
        return df


    def load_dict(self):
        try:
            with open('data/dumps/id_dict.pkl', 'rb') as fp:
                self.id_dict = pickle.load(fp)
        except:
            print ("Creating a new Dictionary")
            self.id_dict = {}


    def save_dict(self):
        with self.lock:
            old_dict = self.get_dict()
            self.id_dict = {**self.id_dict, **old_dict}
            with open('data/dumps/id_dict.pkl', 'wb') as fp:
                pickle.dump(self.id_dict, fp, protocol=pickle.HIGHEST_PROTOCOL)
                print("Saved")


    def get_dict(self):
        di = {}
        with open('data/dumps/id_dict.pkl', 'rb') as fp:
            di = pickle.load(fp)
        return di


    def Analyse(self, message, alt_url='https://api.rosette.com/rest/v1/'):
        """ Run the example """
        # Create an API instance
        api = API(user_key="89350904c7392a44f0f9019563be727a", service_url=alt_url)

        # Set selected API options.
        # For more information on the functionality of these
        # and other available options, see Rosette Features & Functions
        # https://developer.rosette.com/features-and-functions#morphological-analysis-introduction

        # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only

        # Opening the ID Dictionary
#         load_dict()
        ### Will Close after Analysis of the document is completed

        params = DocumentParameters()
        relationships_text_data = wikipedia.page(message).content[:20000]
        params["content"] = relationships_text_data
        rel = []
        message_id = self.get_id(message)
        message_split = message.split(" ")
        try:
            RESULT = api.relationships(params)
            #print(RESULT)
            for r in RESULT['relationships']:
                arg2_split = r['arg2'].split(" ")
                confidence = '?'
                if "confidence" in r:
                    confidence = str(round(r["confidence"],2))
                if any(s in arg2_split for s in message_split):
                    if self.get_id(r['arg2']) == message_id:
                        rel.append({'Relationship':r['predicate']+'^-1', 'Subject':r['arg2'], 'Object':r['arg1'], 'Confidence': confidence})
                rel.append({'Relationship':r['predicate'],'Subject':r['arg1'],'Object':r['arg2'], 'Confidence': confidence})

            ## Closing the ID Dict
            self.save_dict()
            ##
            return rel, message_id
        except RosetteException as exception:
            print(exception)


class HeatMaps(Thread):
    def __init__(self, lock, relation='Educated at', eid=None, name=None, rel_dict={}):
        Thread.__init__(self)
        self.q1 = queue.Queue()
        self.q2 = queue.Queue()
        self.u = Utils()
        self.lock = lock
        self.rel_dict = rel_dict
        self.eid = eid
        self.message = name
        self.error = None
        self.relation = relation
        self.inverse = True if "^-1" in relation else False
        if name:
            self.eid = self.u.get_id(name)
        else:
            self.message = self.u.id_to_name(eid)
        self.start()
        
        
    def run(self):
        if self.eid not in self.rel_dict:
            a = Thread(target = self.Analyse, args = ())
            b = Thread(target = self.ground_truth, args = ())
            a.start()
            b.start()
            a.join()
            b.join()
        self.matrix_block()


    def Analyse(self):
        """ Run the example """
        # Create an API instance
        api = API(user_key="89350904c7392a44f0f9019563be727a", service_url='https://api.rosette.com/rest/v1/')
#         u = Utils()
        params = DocumentParameters()
        relationships_text_data = []
        
        while True:
            try:
                relationships_text_data = wikipedia.page(self.message).content[:20000]
                break
            except wikipedia.DisambiguationError as e:
                print(self.eid, self.message)
                nameclash = True
                for n in e.options:
                    if self.u.get_id(n) == self.eid:
                        if n == self.message:
                            pass
                        else:
                            self.message = n
                            nameclash = False
                            break
                if nameclash:
                    self.message = " "
            except wikipedia.exceptions.PageError as e:
                self.error = self.u.id_to_name(self.eid) + " " + str(e)
                print (self.error)
                break
            
        params["content"] = relationships_text_data
        rel = []
        message_id = self.u.get_id(self.message)
        message_split = self.message.split(" ")
        pred_list = []
        try:
            RESULT = []
            with self.lock:
                RESULT = api.relationships(params)
            
            args = ['arg1','arg2']
            arg_to_split = 'arg2' if self.inverse else 'arg1'
            args.remove(arg_to_split)
            other_arg = args[0]
            rel_to_compare = self.relation.split("^-1")[0]
                
            for r in RESULT['relationships']:
                if r['predicate'] == rel_to_compare:
                    arg_split = r[arg_to_split].split(" ") # Subject Split 
                    if any(s in arg_split for s in message_split): # Searching for alias names
                        if self.u.get_id(r[arg_to_split]) == message_id:
                            pred_list.append(r[other_arg])
                            
            self.q1.put(set(pred_list))
        except RosetteException as exception:
            print(exception)
            self.q1.put(set(pred_list))


    def ground_truth(self):
#         u = Utils()
        
        pgt = set(self.u.ground_truth(self.relation, self.message))
        self.q2.put(pgt)
    
    
    def matrix_block(self):
        if self.eid in self.rel_dict:
            self.pgt = self.rel_dict[self.eid]['PGT']
            self.extracted = self.rel_dict[self.eid]['Extracted']
            self.contained = self.rel_dict[self.eid]['Contained']
        else:
            q1 = self.q1.get() # Extracted from API
            q2 = self.q2.get() # PGT
            #print(self.message, q1)
            #print(self.message, q2)
            self.pgt = len(q2)
            self.extracted = len(q1)
            q1 = [self.u.get_id(i) for i in q1]
            q2 = [self.u.get_id(i) for i in q2]
            #print(self.message, q1)
            #print(self.message, q2)
            count = 0
            for i in q1:
                if i in q2:
                    count += 1
            self.contained = count

    def get_values(self):
        if self.error:
            raise Exception(self.error)
        return [self.eid, self.message, self.extracted, self.contained, self.pgt]
    
    
    
    
class Distribution(Thread):
    def __init__(self, eid=None, name=None, lock=None, rel_dict={}):
        Thread.__init__(self)
        self.doc_len = None
        self.u = Utils()
        self.eid = eid
        self.message = name
        self.error = None
        if name:
            self.eid = self.u.get_id(name)
        else:
            self.message = self.u.id_to_name(eid)
        if eid in rel_dict:
            self.doc_len = rel_dict[eid]['Doc_Length']
            return
        self.start()
    
    def run(self):
        while True:
            try:
                document = wikipedia.page(self.message).content
                self.doc_len = len(document)
                break
            except wikipedia.DisambiguationError as e:
                print(self.eid, self.message)
                nameclash = True
                for n in e.options:
                    if self.u.get_id(n) == self.eid:
                        if n == self.message:
                            pass
                        else:
                            self.message = n
                            nameclash = False
                            break
                if nameclash:
                    self.message = " "
            except wikipedia.exceptions.PageError as e:
                self.error = self.u.id_to_name(self.eid) + " " + str(e)
                print (self.error)
                break
    
    def get_values(self):
        if self.error:
            raise Exception(self.error)
        return [self.eid, self.message, self.doc_len]

In [11]:
u = Utils()

In [22]:
def Analyse(message):
    """ Run the example """
    # Create an API instance
    alt_url='https://api.rosette.com/rest/v1/'
    api = API(user_key="89350904c7392a44f0f9019563be727a", service_url=alt_url)

    params = DocumentParameters()
    relationships_text_data = wikipedia.page(message).content[:20000]
    params["content"] = relationships_text_data
    rel = []
    message_id = u.get_id(message)
    message_split = message.split(" ")
    try:
        RESULT = api.relationships(params)

        for r in RESULT['relationships']:
            arg2_split = r['arg2'].split(" ")
            confidence = '?'
            if "confidence" in r:
                confidence = str(round(r["confidence"],2))
            if any(s in arg2_split for s in message_split):
                if u.get_id(r['arg2']) == message_id:
                    rel.append({'Relationship':r['predicate']+'^-1', 'Subject':r['arg2'], 'Object':r['arg1'], 'Confidence': confidence})
            rel.append({'Relationship':r['predicate'],'Subject':r['arg1'],'Object':r['arg2'], 'Confidence': confidence})

        return rel, message_id
    except RosetteException as exception:
        print(exception)


In [28]:
message = 'Diorama (band)'
alt_url='https://api.rosette.com/rest/v1/'
api = API(user_key="89350904c7392a44f0f9019563be727a", service_url=alt_url)

params = DocumentParameters()
relationships_text_data = wikipedia.page(message).content[:20000]
params["content"] = relationships_text_data
rel = []
message_id = u.get_id(message)
message_split = message.split(" ")

In [29]:
RESULT = api.relationships(params)

In [38]:
wikipedia.page('Diorama (band)').content

'Diorama is a German electropop band. The name of the band is a metaphor which represents their notion of music as an artistic form of expression.\n\n\n== History ==\nDiorama was founded in 1996 as a musical project of Torben Wendt. Torben’s music was soon recognized by Adrian Hates of Diary of Dreams, so with his support and the support of Rainer Assmann (Daf/ Fad Gadget), a debut-album “Pale” was released in 1999. It was received very well both by the critics and the audience.In 2000, Torben’s friend Felix Marc joined Diorama as a keyboard player, co-producer and backing vocalist. The second album “Her Liquid Arms” was released in April 2001. Despite its stronger rhythms and more forceful electronic sounds, the music preserved its distinctive atmosphere, established with the first album. The song “Advance” became one of the leading club hits and the band was becoming increasingly popular. The second album was followed by club-oriented single “Device” (December, 2001).\nBassist Bernar

In [41]:
get_id('Diorama (band)')

-1

In [35]:
u.id_to_name('Q207697')

'Diorama'

In [40]:
def get_id(message):
    API_ENDPOINT = "https://www.wikidata.org/w/api.php"
    query = message
    params = {
        'action': 'wbsearchentities',
        'format': 'json',
        'language': 'en',
        'search': query
    }
    r = requests.get(API_ENDPOINT, params = params)
    try:
        return (r.json()['search'][0]['id'])
    except Exception:
        return -1

In [None]:
def Analyse(message, alt_url='https://api.rosette.com/rest/v1/'):
    """ Run the example """
    # Create an API instance
    api = API(user_key="89350904c7392a44f0f9019563be727a", service_url=alt_url)

    # Set selected API options.
    # For more information on the functionality of these
    # and other available options, see Rosette Features & Functions
    # https://developer.rosette.com/features-and-functions#morphological-analysis-introduction

    # api.set_option('modelType','perceptron') #Valid for Chinese and Japanese only
    params = DocumentParameters()
    relationships_text_data = wikipedia.page(message).content[:20000]
    params["content"] = relationships_text_data
    rel = []
    gt = []
    message_id = get_id(message)
    try:
        RESULT = api.relationships(params)
        #print(RESULT)
        for r in RESULT['relationships']:
#             gt += add_ground_truth(r['predicate'], r['arg1'], r['arg2'])
            confidence = -1
            if "confidence" in r:
                confidence = r["confidence"]
            if get_id(r['arg2']) == message_id:
                rel.append({'Relationship':r['predicate']+'^-1', 'Subject':r['arg2'], 'Object':r['arg1'], 'Confidence': confidence})
            rel.append({'Relationship':r['predicate'],'Subject':r['arg1'],'Object':r['arg2'], 'Confidence': confidence})
        return rel, message_id
    except RosetteException as exception:
        print(exception)

In [None]:
sample_message = "Steve Jobs" #"Andrew Ng"
result, message_id = Analyse(sample_message)

In [None]:
df = pd.DataFrame(result, columns=['Subject','Relationship','Object','Confidence'])
# print (df)

In [None]:
df

In [None]:
main_df = df[df['Subject'].apply(lambda row: get_id(row)) == message_id]
other_df = df[~df.isin(main_df).all(1)]

In [None]:
####################### NEW ####################
e1Grp = df.sort_values('Object', ascending=True).drop_duplicates().groupby(['Subject','Relationship']).agg(lambda x: list(x)).reset_index()
rows = []
_ = e1Grp.apply(lambda row: [rows.append([row['Subject'],row['Relationship'], e2, e3]) for e2,e3 in zip(row.Object, row.Confidence)], axis=1)
e1Grp = pd.DataFrame(rows, columns=e1Grp.columns).set_index(['Subject','Relationship'])

# e1Grp

In [None]:
main_df['RScore'] = np.random.randint(0, 100, main_df.shape[0])

In [None]:
main_df

In [None]:
main_df = main_df[[c for c in main_df if c not in ['Confidence']] + ['Confidence']]

In [None]:
print(main_df.to_html())

In [None]:
main_df = main_df.sort_values('Object', ascending=True).drop_duplicates().groupby(['Subject','Relationship']).agg(lambda x: list(x))
main_df['Count'] = main_df['Object'].apply(lambda x: len(x))
# print(main_df)
main_df

In [None]:
other_df = other_df.sort_values('Object', ascending=True).drop_duplicates().groupby(['Subject','Relationship']).agg(lambda x: list(x))
other_df['Count'] = other_df['Object'].apply(lambda x: len(x))
# print(other_df)
other_df

# Pseudo Ground Truth from Wikidata

In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"

# query = """SELECT ?item ?itemLabel WHERE {
#   ?item wdt:P112 wd:Q19837.
#   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
# }"""


def get_results(query, value, endpoint_url="https://query.wikidata.org/sparql"):
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query%value)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


def ground_truth(relation, subject):
    results = []
    gt = []
    try:
        results = [get_results(query, get_id(subject)) for query in query_dict[relation]]
        for result in results:
            for r in result["results"]["bindings"]:
                gt.append(r['itemLabel']['value'])
    except:
        print (relation, subject)
#         gt = ['Unknown']
    return gt

In [None]:
query_dict = {'Organization Founded By^-1':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P112 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                           ],
              'Organization Founded By':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P112 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                        ],
              'Organization Headquarters':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P159 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                          ],
              'Organization Subsidiary Of^-1':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P355 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                              ],
              'Organization Subsidiary Of':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P355 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                           ],
              'Organization top employees':["""SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P169 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }""", # CEO
                                            """SELECT ?item ?itemLabel WHERE {
                                          wd:%s wdt:P488 ?item.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }""" # Chairperson
                                            ],
              'Person Employee or Member of^-1':["""SELECT ?item ?itemLabel WHERE {
                                          ?item wdt:P108 wd:%s.
                                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                        }"""
                                                ],
              'Person Employee or Member of':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P108 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                            ],
              'Person Place of Birth':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P19 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                      ],
              'Person Current and Past Location of Residence':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P551 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                                              ],
              'Person Parents':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P22 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""", #Father
                                """SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P25 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""", #Mother
                                """SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P1038 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }""" #Relative (Adopted Parents?)
                                # Shall we include stepparents??
                               ],
              'Person Parents^-1':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P40 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                  ],
              'Person Siblings':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P3373 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                                ],
              'Person Spouse':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P26 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                              ],
              'Citizen of':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P27 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                           ],
              'Educated at':["""SELECT ?item ?itemLabel WHERE {
                                              wd:%s wdt:P69 ?item.
                                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                                            }"""
                            ]
             }

In [None]:
results = []
results = [get_results(endpoint_url, query, get_id("Steve Jobs")) for query in query_dict['Educated at']]

gt = []
for result in results:
    for r in result["results"]["bindings"]:
        gt.append(r['itemLabel']['value'])
print (len(gt))
gt

In [None]:
temp['Relationship']

In [None]:
temp = main_df.reset_index()
temp

In [None]:
temp['Ground Truth'] = temp.apply(lambda row: ground_truth(row['Relationship'], row['Subject']), axis=1)
temp['Count_GT'] = temp['Ground Truth'].apply(lambda x: len(x))

In [None]:
temp.set_index(['Subject','Relationship'])

In [None]:
main_df

In [None]:
a = {'33':33}

In [None]:
def f1():
    global a
    a['33'] = 33
    print ("B\n",a)

In [None]:
def f2():
    global a
    with open('data.p', 'rb') as fp:
        a = pickle.load(fp)
    a['1222'] = 1222
    print ("A\n",a)
    f1()
    with open('data.p', 'wb') as fp:
        pickle.dump(a, fp, protocol=pickle.HIGHEST_PROTOCOL)
    print("C\n",a)

In [None]:
def f(batch_size=1,num_workers=1,shuffle=None,):
    print(batch_size)
    print(shuffle)
    print(num_workers)

In [None]:
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 6}

In [None]:
f(**params)

In [None]:
root = "./dataset/train/"

In [None]:
root[:-1]

In [None]:
os.listdir(root)

In [None]:
import os

# Test

In [1]:
import requests
from SPARQLWrapper import SPARQLWrapper, JSON

In [5]:
def get_results(query, endpoint_url="https://query.wikidata.org/sparql"):
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

In [6]:
get_results(query)

EndPointInternalError: EndPointInternalError: endpoint returned code 500 and response. 

Response:
b'SPARQL-QUERY: queryStr=SELECT ?person (COUNT(*) AS ?cnt) WHERE {\n           hint:Query hint:optimizer "None"\n\n           {SELECT ?person WHERE {\n             ?person wdt:P31 wd:Q5.\n           }}.\n\n           ?person ?prop ?thing\n     } GROUP BY ?person\n     ORDER BY DESC(?cnt)\njava.util.concurrent.TimeoutException\n\tat java.util.concurrent.FutureTask.get(FutureTask.java:205)\n\tat com.bigdata.rdf.sail.webapp.BigdataServlet.submitApiTask(BigdataServlet.java:293)\n\tat com.bigdata.rdf.sail.webapp.QueryServlet.doSparqlQuery(QueryServlet.java:679)\n\tat com.bigdata.rdf.sail.webapp.QueryServlet.doGet(QueryServlet.java:290)\n\tat com.bigdata.rdf.sail.webapp.RESTServlet.doGet(RESTServlet.java:240)\n\tat com.bigdata.rdf.sail.webapp.MultiTenancyServlet.doGet(MultiTenancyServlet.java:271)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:687)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:790)\n\tat org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:865)\n\tat org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1655)\n\tat org.wikidata.query.rdf.blazegraph.throttling.ThrottlingFilter.doFilter(ThrottlingFilter.java:338)\n\tat org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1642)\n\tat ch.qos.logback.classic.helpers.MDCInsertingServletFilter.doFilter(MDCInsertingServletFilter.java:49)\n\tat org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1642)\n\tat org.wikidata.query.rdf.blazegraph.filters.ClientIPFilter.doFilter(ClientIPFilter.java:43)\n\tat org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1634)\n\tat org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:146)\n\tat org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:257)\n\tat org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1340)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)\n\tat org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)\n\tat org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1242)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)\n\tat org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:220)\n\tat org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)\n\tat org.eclipse.jetty.server.Server.handle(Server.java:503)\n\tat org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:364)\n\tat org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:260)\n\tat org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:305)\n\tat org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:103)\n\tat org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:118)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)\n\tat org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:765)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:683)\n\tat java.lang.Thread.run(Thread.java:748)\n'

In [3]:
query = '''SELECT ?person (COUNT(*) AS ?cnt) WHERE {
           hint:Query hint:optimizer "None"

           {SELECT ?person WHERE {
             ?person wdt:P31 wd:Q5.
           }}.

           ?person ?prop ?thing
     } GROUP BY ?person
     ORDER BY DESC(?cnt)'''