In [None]:
import pandas as pd
import requests
import re
from SPARQLWrapper import SPARQLWrapper, JSON
from collections import defaultdict

In [None]:
endpoint_url = "https://query.wikidata.org/sparql"

In [None]:
people=pd.read_csv('./neurotree/people_cs.tsv', sep='\t', encoding='utf-8',  escapechar='\\')
connect=pd.read_csv('./neurotree/connect_cs.tsv', sep='\t', encoding='latin-1',  escapechar='\\')
people['name']=people['firstname'].fillna('')+" "+people['middlename'].fillna('')+" "+people['lastname'].fillna('')
people['name']=people['name'].str.replace('.','',regex=False)
people['name']=people['name'].str.replace(' +',' ',regex=True)

In [None]:
def find_similar(names):
    global people
    names=[name.strip() for name in names]
    names=[name.replace('.','') for name in names]
    names=[re.sub(' +',' ',name) for name in names]
    sim=people['name'].str.contains('|'.join(names),flags=re.IGNORECASE, regex=True)
    sim_inx=sim[sim==True].index
    if len(sim_inx)==1:
        return str(people.values[sim_inx[0]][0])
    else:
        return str(len(sim_inx))

In [None]:
def getEntity_id(name):
    query=name
    Wikidata_api="https://www.wikidata.org/w/api.php"
    params={
        'action':'wbsearchentities',
        'search':query,
        'language':'en',
        'format':'json',
        'props':''
    }
    res=requests.get(Wikidata_api, params=params)
    if len(res.json()['search'])==1:
        return res.json()['search'][0]['id']
    else:
        return None

In [None]:
wikidata_query = """SELECT ?entityLabel ?entityAltLabel ?occupationLabel ?field_of_workLabel ?organizationLabel 
?start_time_organization ?end_time_organization ?awardLabel ?point_in_time_award ?doc_advisorLabel ?doc_studentLabel ?doc_thesisLabel ?studentLabel ?student_ofLabel ?notable_workLabel ?website ?dblp_id 
?academic_treeid ?acm_id ?employeeLabel ?start_time_employee ?end_time_employee ?educationLabel ?degree_educationLabel 
?start_time_education ?end_time_education ?academic_degreeLabel ?point_in_time_academic_degree 
?conffered_by_academic_degreeLabel
WHERE 
{ BIND(wd:Q92741 AS ?entity).
  optional{?entity wdt:P27 ?country.}
  optional{?entity wdt:P106 ?occupation.}
  optional{?entity wdt:P101 ?field_of_work.}
  optional{?entity p:P463 ?stmt4. 
          ?stmt4 ps:P463 ?organization.}
  optional{?stmt4 (pq:P580|pq:P585) ?start_time_organization.}
  optional{?stmt4 pq:P582 ?end_time_organization.}
  optional{?entity p:P166 ?stmt3.
            ?stmt3 ps:P166 ?award.}
  optional{?stmt3 pq:P166 ?point_in_time_award.}
  optional{?entity wdt:P184 ?doc_advisor.}
  optional{?entity wdt:P184 ?doc_advisor.}
  optional{?entity wdt:P185 ?doc_student.}
  optional{?entity wdt:P1026 ?doc_thesis.}
  optional{?entity wdt:P802 ?student.}
  optional{?entity wdt:P1066 ?student_of.}
  optional{?entity wdt:P800 ?notable_work.}
  optional{?entity wdt:P856 ?website.}
  optional{?entity wdt:P2456 ?dblp_id.}
  optional{?entity wdt:P2381 ?academic_treeid.}
  optional{?entity wdt:P864 ?acm_id.}
  optional{?entity p:P108 ?stmt.
           ?stmt  ps:P108 ?employee.}
  optional{?stmt  pq:P580 ?start_time_employee.}
  optional{?stmt  pq:P582 ?end_time_employee.}
  optional{?entity p:P69 ?stmt1.
           ?stmt1  ps:P69 ?education.}
  optional{?stmt1  pq:P512 ?degree_education.}
  optional{?stmt1  pq:P580 ?start_time_education.}
  optional{?stmt1  pq:P582 ?end_time_education.}
  optional{?entity p:P512 ?stmt2.
           ?stmt2  ps:P512 ?academic_degree.}
  optional{?stmt2  pq:P585 ?point_in_time_academic_degree.}
  optional{?stmt2  pq:P1027 ?conffered_by_academic_degree.}
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""

In [None]:
def get_results(endpoint_url, query):
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

In [None]:
q_result=get_results(endpoint_url,wikidata_query)

In [None]:
q_result["results"]["bindings"][0]

In [None]:
dct = defaultdict(set)
stmt=['employee','education','organization','award','academic_degree']
for result in q_result["results"]["bindings"]:
    temp=defaultdict(list)
    flag=False
    for key in result:      
        for a in stmt:
            if a in key:
                temp[a].append(result[key]['value'])
                flag=True
                break
        if not flag:
            dct[key].add(result[key]['value'])       
    for key in temp:
        if len(temp[key])>0:
            dct[key+"Label"].add(tuple(temp[key]))
ab.append(dct)

In [None]:
dct = defaultdict(set)
stmt=['employee','education','organization','award','academic_degree']
for result in q_result["results"]["bindings"]:
    edu=[]
    emp=[]
    for key in result:      
        if key == 'start_time_emp' or key=='end_time_emp':
            emp.append(result[key]['value'].split('T')[0])
        elif key=='start_time_edu' or key=='end_time_edu':
            edu.append(result[key]['value'].split('T')[0])
        elif key=='educationLabel':
            edu.append(result[key]['value'])
        elif key=='employeeLabel':
            emp.append(result[key]['value'])
        else:
            dct[key].add(result[key]['value'])
    if len(edu) > 0:
        dct['educationLabel'].add(tuple(edu))
    if len(emp) > 0:
        dct['employeeLabel'].add(tuple(emp))
a.append(dct)

In [None]:
names=people[['pid','name']]
stud_pid=[]
stud_wiki_id=[]
studLabel=[]
supLabel=[]
sup_wiki_id=[]
sup_pid=[]
for pid, name in names.values[:]:
    query = """PREFIX wd: <http://www.wikidata.org/entity/>
    SELECT Distinct ?entityLabel ?supervisor ?supervisorLabel ?supervisorAltLabel
    WHERE
    {
      BIND(%s As ?entity)
      ?entity (wdt:P184|wdt:P1066) ?supervisor.
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }"""
    res=entity_id(name)
    if res is not None:
        keyword="wd"+":"+res
        query=query%keyword
        results = get_results(endpoint_url, query)
        if len(results["results"]["bindings"]) > 0:
            #print('entity:{}'.format(res))
            for result in results["results"]["bindings"]:
                wiki_name=[]
                wiki_name.append(result['supervisorLabel']['value'])
                if result.get('supervisorAltLabel')==None:
                    wiki_name=wiki_name
                else:
                    altName=result['supervisorAltLabel']['value'].split(",")
                    wiki_name=wiki_name+altName
                stud_pid.append(pid)
                stud_wiki_id.append(keyword)
                studLabel.append(result['entityLabel']['value'])
                supLabel.append(result['supervisorLabel']['value'])
                sup_wiki_id.append(result['supervisor']['value'])
                s_pid=find_similar(wiki_name)
                sup_pid.append(s_pid)
info=pd.DataFrame(list(zip(stud_pid, stud_wiki_id,studLabel,supLabel,sup_wiki_id,sup_pid))
                  ,columns =['stud_pid', 'stud_wiki_id','studLabel','supLabel','sup_wiki_id','sup_pid'])

In [None]:
def knowledge_graph(name, wikidataApi, getqueryResult, query, prefix):
    entity_id=wikidataApi(name)
    if entity_id is not None:
        item=prefix+":"+res
        query=query%item
        results = getqueryResult(endpoint_url, query)
        if len(results["results"]["bindings"]) > 0:
            for result in results["results"]["bindings"]:
                wiki_name=[]
                wiki_name.append(result['supervisorLabel']['value'])
                if result.get('supervisorAltLabel') is None:
                    wiki_name=wiki_name
                else:
                    altName=result['supervisorAltLabel']['value'].split(",")
                    wiki_name=wiki_name+altName
                stud_pid.append(pid)
                stud_wiki_id.append(keyword)
                studLabel.append(result['entityLabel']['value'])
                supLabel.append(result['supervisorLabel']['value'])
                sup_wiki_id.append(result['supervisor']['value'])
                s_pid=find_similar(wiki_name)
                sup_pid.append(s_pid)
info=pd.DataFrame(list(zip(stud_pid, stud_wiki_id,studLabel,supLabel,sup_wiki_id,sup_pid))
                  ,columns =['stud_pid', 'stud_wiki_id','studLabel','supLabel','sup_wiki_id','sup_pid'])
    

In [None]:
info

In [None]:
action=query&format=json&prop=pageprops&titles=Les%20Mis%C3%A9rables&formatversion=2&ppprop=wikibase_item

In [None]:
temp={}

In [None]:
(temp[] for a in ['employee','education','organization','award','academic_degree'])

In [None]:
print('keyword:{}'.format(name))
print('student:{}'.format(result['entityLabel']['value']))
print('supervisor_id:{}'.format(result['supervisor']['value']))
print('supervisor:{}'.format(wiki_name))
pids=(pid for pid in (find_similar(n) for n in wiki_name) if pid != "0")
print('pid:{}'.format(list(pids)))
pids=(pid for pid in (find_similar(n) for n in wiki_name) if pid != "0")

In [None]:
people[people['name'].str.contains("Good",flags=re.IGNORECASE, regex=True)]

In [None]:
pid=people[people['name'].str.contains("David",flags=re.IGNORECASE, regex=True)]['pid']

In [None]:
pid.values[0]

In [None]:
connect[connect['pid2']==pid.values[0]]

In [None]:
people[people['pid']==2003]

In [None]:
people[people['pid']==849]

In [None]:
connect.columns

In [None]:
people['name'].str

In [None]:
a=query1.groupby('entityLabel')['institutionLabel'].apply(list)

In [None]:
query = """PREFIX wd: <http://www.wikidata.org/entity/>
SELECT Distinct ?entityLabel ?supervisorLabel ?supervisorAltLabel
WHERE
{
  BIND(wd:Q7173587 AS ?entity)
  ?entity (wdt:P184|wdt:P1066) ?supervisor.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}"""
results = get_results(endpoint_url, query)
for result in results["results"]["bindings"]:
    print(result['entityLabel']['value']) 
    print('{}'.format(result.get('supervisorAltLabel')))

In [None]:
a=set()
b=[2,3,4]
a.add(b)