In [355]:
import rdflib

# create an empty Graph
g = rdflib.ConjunctiveGraph()

# parse a local RDF file by specifying the format
result = g.parse("Desktop/dhdk_epds/resources/artchives.nq", format='nquads')

query_results = g.query(
    """SELECT ?class (COUNT(?individual) AS ?tot)
    WHERE { ?individual a ?class .}
    GROUP BY ?class ?tot""")

for query_res in query_results:
    print(query_res[0], query_res["tot"]) # notice the two alternative ways to recall values in the tuple

http://www.wikidata.org/entity/Q5 24
http://www.wikidata.org/entity/Q9388534 25
http://www.wikidata.org/entity/Q31855 5


In [356]:
from rdflib import Namespace , Literal , URIRef
from rdflib.namespace import RDF , RDFS


# bind the uncommon namespaces
wd = Namespace("http://www.wikidata.org/entity/") # remember that a prefix matches a URI until the last slash (or hashtag #)
wdt = Namespace("http://www.wikidata.org/prop/direct/")
art = Namespace("https://w3id.org/artchives/")
rdfs = Namespace ("http://www.w3.org/2000/01/")

# Get the list of art historians in our graph "g"
arthistorians_list = set()

# iterate over the triples in the graph
for s,p,o in g.triples(( None, wdt.P170, None)):   # people "o" are the creator "wdt.P170" of a collection "s"
    if "wikidata.org/entity/" in str(o):           # look for the substring to filter wikidata entities only
        arthistorians_list.add('<' + str(o) + '>')     # remember to transform them in strings! 
    
print(arthistorians_list)


{'<http://www.wikidata.org/entity/Q60185>', '<http://www.wikidata.org/entity/Q1641821>', '<http://www.wikidata.org/entity/Q537874>', '<http://www.wikidata.org/entity/Q1089074>', '<http://www.wikidata.org/entity/Q1629748>', '<http://www.wikidata.org/entity/Q1712683>', '<http://www.wikidata.org/entity/Q19997512>', '<http://www.wikidata.org/entity/Q995470>', '<http://www.wikidata.org/entity/Q1271052>', '<http://www.wikidata.org/entity/Q18935222>', '<http://www.wikidata.org/entity/Q3051533>', '<http://www.wikidata.org/entity/Q457739>', '<http://www.wikidata.org/entity/Q6700132>', '<http://www.wikidata.org/entity/Q3057287>', '<http://www.wikidata.org/entity/Q41616785>', '<http://www.wikidata.org/entity/Q90407>', '<http://www.wikidata.org/entity/Q1715096>', '<http://www.wikidata.org/entity/Q55453618>', '<http://www.wikidata.org/entity/Q1373290>', '<http://www.wikidata.org/entity/Q2824734>', '<http://www.wikidata.org/entity/Q85761254>', '<http://www.wikidata.org/entity/Q61913691>', '<http://w

In [171]:
from SPARQLWrapper import SPARQLWrapper, JSON
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

# get the endpoint API
wikidata_endpoint = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
# prepare the values to be queried
historians = ' '.join(arthistorians_list) # <uri1> <uri2> <uri3> ... <uriN>

# prepare the query
birthplace_query = """
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?historian ?birthplace ?birthplace_label
WHERE {
    VALUES ?historian {"""+historians+"""} . # look how we include a variable in a query string!
    ?historian wdt:P19 ?birthplace . 
    ?birthplace rdfs:label ?birthplace_label .
    FILTER (langMatches(lang(?birthplace_label), "EN"))
    } 
group by ?historian ?birthplace ?birthplace_label
"""

# set the endpoint 
sparql_wd = SPARQLWrapper(wikidata_endpoint)
# set the query
sparql_wd.setQuery(birthplace_query)
# set the returned format
sparql_wd.setReturnFormat(JSON)
# get the results
results = sparql_wd.query().convert()
duplicate = []
# manipulate the result
for result in results["results"]["bindings"]:
    historian_uri = result["historian"]["value"]
    if historian_uri not in duplicate: 
        duplicate.append(historian_uri)
        print("historian:", historian_uri)
        if "birthplace" in result: # some historians may have no birthplace recorded in Wikidata!
            birthplace = result["birthplace"]["value"]
            if "birthplace_label" in result: 
                birthplace_label = result["birthplace_label"]["value"]
                print("found:", birthplace, birthplace_label)
                if historian_uri not in duplicate: 
                    # only if both uri and label are found we add them to the graph
                    g.add(( URIRef(historian_uri) , URIRef(wdt.P19) , URIRef(birthplace) ))
                    g.add(( URIRef(birthplace) , RDFS.label , Literal(birthplace_label) ))
                
print (len(duplicate))

historian: http://www.wikidata.org/entity/Q88907
found: http://www.wikidata.org/entity/Q1726 Munich
historian: http://www.wikidata.org/entity/Q41616785
found: http://www.wikidata.org/entity/Q1971847 Nauheim
historian: http://www.wikidata.org/entity/Q995470
found: http://www.wikidata.org/entity/Q242478 Levoča
historian: http://www.wikidata.org/entity/Q457739
found: http://www.wikidata.org/entity/Q649 Moscow
historian: http://www.wikidata.org/entity/Q537874
found: http://www.wikidata.org/entity/Q84 London
historian: http://www.wikidata.org/entity/Q3051533
found: http://www.wikidata.org/entity/Q993164 Epsom
historian: http://www.wikidata.org/entity/Q90407
found: http://www.wikidata.org/entity/Q3075 Fürth
historian: http://www.wikidata.org/entity/Q18935222
found: http://www.wikidata.org/entity/Q64 Berlin
historian: http://www.wikidata.org/entity/Q1089074
found: http://www.wikidata.org/entity/Q220 Rome
historian: http://www.wikidata.org/entity/Q1296486
found: http://www.wikidata.org/entity/

In [357]:
# prepare the values to be queried
historians = ' '.join(arthistorians_list) # <uri1> <uri2> <uri3> ... <uriN>

# prepare the query
studyplace_query = """
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?historian ?birthplace ?birthplace_label ?coordinates1 ?eduplace ?eduplace_label ?coordinates2 
WHERE {
    VALUES ?historian {"""+historians+"""} . 
    optional {?historian wdt:P108 ?birthplace . ?birthplace rdfs:label ?birthplace_label .
    FILTER (langMatches(lang(?birthplace_label), "EN")) optional {?birthplace wdt:P625 ?coordinates1}}
    optional {?historian wdt:P69 ?eduplace . 
    ?eduplace rdfs:label ?eduplace_label .
    FILTER (langMatches(lang(?eduplace_label), "EN")) optional {?eduplace wdt:P625 ?coordinates2 }}
    } 
    
"""

# set the endpoint 
sparql_wd = SPARQLWrapper(wikidata_endpoint)
# set the query
sparql_wd.setQuery(studyplace_query)
# set the returned format
sparql_wd.setReturnFormat(JSON)
# get the results
results = sparql_wd.query().convert()
duplicate = []
# manipulate the result
for result in results["results"]["bindings"]:
    print(result)
    historian_uri = result["historian"]["value"]
    #print("historian:", historian_uri)
    if "birthplace" in result: 
        birthplace = result["birthplace"]["value"]
        if "birthplace_label" in result: 
            birthplace_label = result["birthplace_label"]["value"]
            #print("work:", birthplace, birthplace_label)

                    # only if both uri and label are found we add them to 
            g.add(( URIRef(historian_uri) , URIRef(wdt.P108) , URIRef(birthplace) ))
            g.add(( URIRef(birthplace) , RDFS.label , Literal(birthplace_label) ))
            if "coordinates1" in result:
                work_coord = result["coordinates1"]["value"][6:-1].split(" ")
                g.add(( URIRef(birthplace) , URIRef(wdt.P625) , Literal(work_coord) ))
    if "eduplace" in result: 
        eduplace = result["eduplace"]["value"]
        if "eduplace_label" in result: 
            eduplace_label = result["eduplace_label"]["value"]
            #print("education:", eduplace, eduplace_label)
                    # only if both uri and label are found we add them to the graph
            g.add(( URIRef(historian_uri) , URIRef(wdt.P69) , URIRef(eduplace) ))
            g.add(( URIRef(eduplace) , RDFS.label , Literal(eduplace_label) ))
            if "coordinates2" in result:
                eduplace_coord = result["coordinates2"]["value"][6:-1].split(" ")
                g.add(( URIRef(eduplace) , URIRef(wdt.P625) , Literal(eduplace_coord) ))


{'historian': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q19997512'}, 'eduplace': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q13371'}, 'eduplace_label': {'xml:lang': 'en', 'type': 'literal', 'value': 'Harvard University'}, 'coordinates2': {'datatype': 'http://www.opengis.net/ont/geosparql#wktLiteral', 'type': 'literal', 'value': 'Point(-71.116943888 42.374443888)'}}
{'historian': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q19997512'}, 'eduplace': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q13371'}, 'eduplace_label': {'xml:lang': 'en-ca', 'type': 'literal', 'value': 'Harvard University'}, 'coordinates2': {'datatype': 'http://www.opengis.net/ont/geosparql#wktLiteral', 'type': 'literal', 'value': 'Point(-71.116943888 42.374443888)'}}
{'historian': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q19997512'}, 'eduplace': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q13371'}, 'eduplace_label': {'xml:lang': 'en-gb', 

Tirare fuori lista di p921 come lista storici - valutare come mantenere il collegamento fra storico e p921 - e effettuare check su wikidata per verificare quali sono riconducibili a città, università, e altri luoghi di lavoro

In [358]:


prova = {}
final = {}
for hist, prop, eduplace in g.triples((None, wdt.P69, None)):
    for eduplace, hasname, eduplacename in g.triples((eduplace, RDFS.label, None)):
        for eduplace, hasCoordinates, coord in g.triples((eduplace, wdt.P625, None)):
            for s, p, name in g.triples(( hist, RDFS.label, None)):   
                if "wikidata.org/entity/" in str(s):  
                    key = (eduplacename.strip(), "<" + eduplace.strip() +">", float(coord.split(" ")[1][1:-2]), float(coord.split(" ")[0][2:-2]))
                    value = tuple(["<" + hist.strip() +">", name.strip()])
                    if key not in prova.keys():
                        prova[key] = set([value])
                    else:
                        prova[key].update([value])


                    
for hist, prop, eduplace in g.triples((None, wdt.P108, None)):
    for eduplace, hasname, eduplacename in g.triples((eduplace, RDFS.label, None)):
        for eduplace, hasCoordinates, coord in g.triples((eduplace, wdt.P625, None)):
            for s, p, name in g.triples(( hist, RDFS.label, None)):   
                if "wikidata.org/entity/" in str(s):  
                    key = (eduplacename.strip(), "<" + eduplace.strip() +">", float(coord.split(" ")[1][1:-2]), float(coord.split(" ")[0][2:-2]))
                    print(key)
                    value = tuple(["<" + hist.strip() +">", name.strip()])
                    if key not in prova.keys():
                        prova[key] = set([value])
                    else:
                        prova[key].update([value])


for k, v in prova.items():
    for el in v:
        if k not in final.keys():
            final[k] = [el]
        else:
            final[k].append(el)




('University of Oxford', '<http://www.wikidata.org/entity/Q34433>', 51.755, -1.255)
('University of Oxford', '<http://www.wikidata.org/entity/Q34433>', 51.755, -1.255)
('University of Oxford', '<http://www.wikidata.org/entity/Q34433>', 51.755, -1.255)
('University of Cambridge', '<http://www.wikidata.org/entity/Q35794>', 52.205277777, 0.117222222)
('University of Cambridge', '<http://www.wikidata.org/entity/Q35794>', 52.205277777, 0.117222222)
('University of Cambridge', '<http://www.wikidata.org/entity/Q35794>', 52.205277777, 0.117222222)
('Vassar College', '<http://www.wikidata.org/entity/Q2093794>', 41.686866666, -73.895188888)
('Vassar College', '<http://www.wikidata.org/entity/Q2093794>', 41.686866666, -73.895188888)
('Vassar College', '<http://www.wikidata.org/entity/Q2093794>', 41.686866666, -73.895188888)
('Vassar College', '<http://www.wikidata.org/entity/Q2093794>', 41.686866666, -73.895188888)
('Vassar College', '<http://www.wikidata.org/entity/Q2093794>', 41.686866666, -73.

In [359]:
import csv

# prepare the csv file
with open('periods_count.csv', mode='w') as my_file:
    my_writer = csv.writer(my_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
    # write the column names
    my_writer.writerow(['place', 'lat', 'long'])
    
    # access the list of tuples of the query results
    for res in prova:
        # write in the csv
        my_writer.writerow([res[0], res[1], res[2]])

In [360]:
import pandas as pd

# parse the csv into a dataframe
df = pd.read_csv("periods_count.csv")
# print the first 5 rows
df.head()

Unnamed: 0,place,lat,long
0,University of Freiburg,<http://www.wikidata.org/entity/Q153987>,47.994167
1,Downside School,<http://www.wikidata.org/entity/Q5303281>,51.255
2,Sapienza University of Rome,<http://www.wikidata.org/entity/Q209344>,41.903333
3,Humboldt University of Berlin,<http://www.wikidata.org/entity/Q152087>,52.518056
4,New York University,<http://www.wikidata.org/entity/Q49210>,40.73


In [361]:
g.serialize(destination='Desktop/dhdk_epds/resources/artchives2.nq', format='nquads')
result = g.parse("Desktop/dhdk_epds/resources/artchives2.nq", format='nquads')



In [366]:
related = {}
for s,p,o in g.triples(( None, wdt.P170, None)):   # people "o" are the creator "wdt.P170" of a collection "s"
    if "wikidata.org/entity/" in str(o):           # look for the substring to filter wikidata entities only
        for hist, prop, obj in g.triples((o, wdt.P921, None)):     
            for subj, pr, name in g.triples(( hist, RDFS.label, None)):   
                key = tuple(['<' + str(hist) + '>', name.strip()])
                value = '<' + str(obj) + '>'
                if key not in related.keys():
                    related[key] = [value]
                else:
                    related[key].append(value)
    
for k, v in related.items():
    print(k, v)



('<http://www.wikidata.org/entity/Q19997512>', 'Everett Fahy') ['<http://www.wikidata.org/entity/Q213439>', '<http://www.wikidata.org/entity/Q213439>', '<http://www.wikidata.org/entity/Q537874>', '<http://www.wikidata.org/entity/Q537874>', '<http://www.wikidata.org/entity/Q38>', '<http://www.wikidata.org/entity/Q38>', '<http://www.wikidata.org/entity/Q49210>', '<http://www.wikidata.org/entity/Q49210>', '<http://www.wikidata.org/entity/Q3558578>', '<http://www.wikidata.org/entity/Q3558578>', '<http://www.wikidata.org/entity/Q1089074>', '<http://www.wikidata.org/entity/Q1089074>', '<http://www.wikidata.org/entity/Q13371>', '<http://www.wikidata.org/entity/Q13371>', '<http://www.wikidata.org/entity/Q2044>', '<http://www.wikidata.org/entity/Q2044>', '<http://www.wikidata.org/entity/Q191423>', '<http://www.wikidata.org/entity/Q191423>', '<http://www.wikidata.org/entity/Q682827>', '<http://www.wikidata.org/entity/Q682827>', '<http://www.wikidata.org/entity/Q1345>', '<http://www.wikidata.org/

In [376]:
related_dict = {}
related_dict1= {}
for k, v in related.items():   
    relatedlist = ' '.join(v)


    query_results = """
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        SELECT DISTINCT ?place ?place_label ?coord
        WHERE {
            VALUES ?place {"""+relatedlist+"""} . 
            ?place rdfs:label ?place_label . 
            FILTER (langMatches(lang(?place_label), "EN")) . ?place wdt:P625 ?coord
            } 

        """

    # set the endpoint 
    sparql_wd = SPARQLWrapper(wikidata_endpoint)
    # set the query
    sparql_wd.setQuery(query_results)
    # set the returned format
    sparql_wd.setReturnFormat(JSON)
    # get the results
    results = sparql_wd.query().convert()

    for result in results["results"]["bindings"]:
        #print(result)
        place = '<' + str(result["place"]["value"]) + '>'
        place_name = str(result["place_label"]["value"]) 
        coord = result["coord"]["value"][6:-1].split(" ")
        key = tuple([place_name, place, float(coord[1]), float(coord[0])])
        for value in v:
            if value == str(key[1]):
                if key not in prova.keys():
                    prova[key] = set([k])
                else: 
                    prova[key].update([k])
                

                
for k, v in prova.items():
    for el in v:
        if k not in related_dict1.keys():
            related_dict1[k] = [el]
        else:
            related_dict1[k].append(el)


for k, v in related_dict1.items():       
    print(k, v)


('University of Freiburg', '<http://www.wikidata.org/entity/Q153987>', 47.994166666, 7.846944444) [('<http://www.wikidata.org/entity/Q18935222>', 'Werner Cohn'), ('<http://www.wikidata.org/entity/Q1712683>', 'Julius S. Held'), ('<http://www.wikidata.org/entity/Q1629748>', 'Kurt Badt')]
('Downside School', '<http://www.wikidata.org/entity/Q5303281>', 51.255, -2.495) [('<http://www.wikidata.org/entity/Q537874>', 'John Pope-Hennessy')]
('Sapienza University of Rome', '<http://www.wikidata.org/entity/Q209344>', 41.903333333, 12.515833333) [('<http://www.wikidata.org/entity/Q2824734>', 'Adolfo Venturi'), ('<http://www.wikidata.org/entity/Q1089074>', 'Federico Zeri')]
('Humboldt University of Berlin', '<http://www.wikidata.org/entity/Q152087>', 52.518055555, 13.393333333) [('<http://www.wikidata.org/entity/Q18935222>', 'Werner Cohn'), ('<http://www.wikidata.org/entity/Q1715096>', 'Ulrich Middeldorf'), ('<http://www.wikidata.org/entity/Q60185>', 'Aby Warburg'), ('<http://www.wikidata.org/enti

In [375]:
from ipywidgets import HTML

from ipyleaflet import Map, Marker, Popup, LayersControl

center = (41.080684, -30.683374)


m = Map(center=center, zoom=3, close_popup_on_click=False)



for k,v in related_dict1.items():
    m.add_layer(marker)
    marker = Marker(location=(k[2], k[3]))
    message2 = HTML()
    marker.popup = message2
    message2.description = ""
    namelist = str()
    for value in v:
        namelist= namelist + "<a href='http://artchives.fondazionezeri.unibo.it/historian-" + value[0][32:-1] + "'>" + value[1] + "</a>" + ". "
    message2.value = "<b>" + k[0] + "</b>" + "<br>"  + namelist





m

Map(center=[41.080684, -30.683374], close_popup_on_click=False, controls=(ZoomControl(options=['position', 'zo…

In [390]:
hist_dict = {}
hist_list = []
for k, v in related_dict1.items():
    for value in v:
        if value not in hist_list:
            hist_list.append(value)
    for el in hist_list:
        if el in v:
            if el not in hist_dict.keys():
                hist_dict[el] = [k]
            else:
                hist_dict[el].append(k)
        

for k, v in hist_dict.items():            
    print(k, v)

('<http://www.wikidata.org/entity/Q18935222>', 'Werner Cohn') [('University of Freiburg', '<http://www.wikidata.org/entity/Q153987>', 47.994166666, 7.846944444), ('Humboldt University of Berlin', '<http://www.wikidata.org/entity/Q152087>', 52.518055555, 13.393333333), ('New York University', '<http://www.wikidata.org/entity/Q49210>', 40.73, -73.995), ('Florence', '<http://www.wikidata.org/entity/Q2044>', 43.771388888, 11.254166666), ('University of Strasbourg', '<http://www.wikidata.org/entity/Q157575>', 48.580277777, 7.764444444), ('Assisi', '<http://www.wikidata.org/entity/Q20103>', 43.07, 12.6175), ('Uffizi', '<http://www.wikidata.org/entity/Q51252>', 43.768438888, 11.2559), ('Uffizi', '<http://www.wikidata.org/entity/Q51252>', 43.7684, 11.2559), ('Uffizi', '<http://www.wikidata.org/entity/Q51252>', 43.768997, 11.255698)]
('<http://www.wikidata.org/entity/Q1712683>', 'Julius S. Held') [('University of Freiburg', '<http://www.wikidata.org/entity/Q153987>', 47.994166666, 7.846944444),

In [419]:
from ipyleaflet import LayerGroup
center = (41.080684, -30.683374)


m = Map(center=center, zoom=3, close_popup_on_click=False)
control = LayersControl(position='topright')
m.add_control(control)

for k,v in hist_dict.items():
    for value in v:
        marker = Marker(location=(value[2], value[3]), name=k[1])
        m.add_layer(marker)
        message2 = HTML()
        marker.popup = message2
        message2.description = ""
        message2.value ="<b>" + "<a href='http://artchives.fondazionezeri.unibo.it/historian-" + k[0][32:-1] +"'>" + k[1] + "</a>" + "</b>" + "<br>"  + value[0]
 





m

Map(center=[41.080684, -30.683374], close_popup_on_click=False, controls=(ZoomControl(options=['position', 'zo…