In [1]:
#! pip install rdflib
import rdflib
from rdflib import Namespace , Literal , URIRef
from rdflib.namespace import RDF , RDFS

# create an empty Graph
g = rdflib.ConjunctiveGraph()

# parse a local RDF file by specifying the format
result = g.parse("Desktop/dhdk_epds/resources/artchives.nq", format='nquads')

# bind the uncommon namespaces
wd = Namespace("http://www.wikidata.org/entity/") # remember that a prefix matches a URI until the last slash (or hashtag #)
wdt = Namespace("http://www.wikidata.org/prop/direct/")
art = Namespace("https://w3id.org/artchives/")
rdfs = Namespace ("http://www.w3.org/2000/01/")

# Get the list of art historians in our graph "g"
arthistorians_list = set()

# iterate over the triples in the graph
for s,p,o in g.triples(( None, wdt.P170, None)):   # people "o" are the creator "wdt.P170" of a collection "s"
    if "wikidata.org/entity/" in str(o):           # look for the substring to filter wikidata entities only
        arthistorians_list.add('<' + str(o) + '>')     # remember to transform them in strings! 
    
#print(arthistorians_list)


In [2]:
#!pip install SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper, JSON
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

# get the endpoint API
wikidata_endpoint = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"

# prepare the values to be queried
historians = ' '.join(arthistorians_list) # <uri1> <uri2> <uri3> ... <uriN>
#print(historians)
# prepare the query: for each historian in ARTchives check in wikidata if there are work or education places.
formationplace_query = """ 
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?historian ?workplace ?workplace_label ?coordinates1 ?eduplace ?eduplace_label ?coordinates2 
WHERE {
    VALUES ?historian {"""+historians+"""} . 
    optional {?historian wdt:P108 ?workplace . ?workplace rdfs:label ?workplace_label .
    FILTER (langMatches(lang(?workplace_label), "EN")) optional {?workplace wdt:P625 ?coordinates1}}
    optional {?historian wdt:P69 ?eduplace . 
    ?eduplace rdfs:label ?eduplace_label .
    FILTER (langMatches(lang(?eduplace_label), "EN")) optional {?eduplace wdt:P625 ?coordinates2 }}
    } 
    
"""

# set the endpoint 
sparql_wd = SPARQLWrapper(wikidata_endpoint)
# set the query
sparql_wd.setQuery(formationplace_query)
# set the returned format
sparql_wd.setReturnFormat(JSON)
# get the results
results = sparql_wd.query().convert()
duplicate = []
# manipulate the result
for result in results["results"]["bindings"]:
    #print(result)
    historian_uri = result["historian"]["value"]
    #print("historian:", historian_uri)
    if "workplace" in result: 
        workplace = result["workplace"]["value"]
        if "workplace_label" in result: 
            workplace_label = result["workplace_label"]["value"]
            #print("work:", workplace, workplace_label)

                    # only if both uri and label are found we add them to 
            g.add(( URIRef(historian_uri) , URIRef(wdt.P108) , URIRef(workplace) ))
            g.add(( URIRef(workplace) , RDFS.label , Literal(workplace_label) ))
            if "coordinates1" in result:
                work_coord = result["coordinates1"]["value"][6:-1].split(" ")
                g.add(( URIRef(workplace) , URIRef(wdt.P625) , Literal(work_coord) ))
    
    if "eduplace" in result: 
        eduplace = result["eduplace"]["value"]
        if "eduplace_label" in result: 
            eduplace_label = result["eduplace_label"]["value"]
            #print("education:", eduplace, eduplace_label)
                    # only if both uri and label are found we add them to the graph
            g.add(( URIRef(historian_uri) , URIRef(wdt.P69) , URIRef(eduplace) ))
            g.add(( URIRef(eduplace) , RDFS.label , Literal(eduplace_label) ))
            if "coordinates2" in result:
                eduplace_coord = result["coordinates2"]["value"][6:-1].split(" ")
                g.add(( URIRef(eduplace) , URIRef(wdt.P625) , Literal(eduplace_coord) ))


In [3]:
firstdict = {}
final = {}
for hist, prop, place in g.triples((None, wdt.P69, None)):
    for eduplace, hasname, eduplacename in g.triples((place, RDFS.label, None)):
        for edup, hascoordinates, coord in g.triples((place, wdt.P625, None)):
            for historian, p, name in g.triples(( hist, RDFS.label, None)):
                if "wikidata.org/entity/" in str(historian):  
                    key = (eduplacename.strip(), "<" + place.strip() +">", float(coord.split(" ")[1][1:-2]), float(coord.split(" ")[0][2:-2]))
                    value = tuple(["<" + hist.strip() +">", name.strip()])
                    if key not in firstdict.keys():
                        firstdict[key] = set([value])
                    else:
                        firstdict[key].update([value])


                    
for hist, prop, place in g.triples((None, wdt.P108, None)):
    for place, hasname, workplacename in g.triples((place, RDFS.label, None)):
        for place, hasCoordinates, coord in g.triples((place, wdt.P625, None)):
            for historian, p, name in g.triples(( hist, RDFS.label, None)):   
                if "wikidata.org/entity/" in str(historian):  
                    key = (workplacename.strip(), "<" + place.strip() +">", float(coord.split(" ")[1][1:-2]), float(coord.split(" ")[0][2:-2]))
                    value = tuple(["<" + hist.strip() +">", name.strip()])
                    if key not in firstdict.keys():
                        firstdict[key] = set([value])
                    else:
                        firstdict[key].update([value])


for k, v in firstdict.items(): #creation of a dictionary that has as key a tuple with the info for a place and as values a list of tuples of art historians connected to that place.
    for el in v:
        if k not in final.keys():
            final[k] = [el]
        else:
            final[k].append(el)




In [4]:
#! pip import csv
import csv

# prepare the csv file
with open('formationplaces.csv', mode='w') as my_file:
    my_writer = csv.writer(my_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
    # write the column names
    my_writer.writerow(['place', 'place_uri','lat', 'long'])
    
    # access the list of tuples of the query results
    for res in final:
        # write in the csv
        my_writer.writerow([res[0], res[1], res[2], res[3]])

In [5]:
#! pip import pandas
import pandas as pd

# parse the csv into a dataframe
df = pd.read_csv("formationplaces.csv")
# print the first 5 rows
df.head()

Unnamed: 0,place,place_uri,lat,long
0,New College,<http://www.wikidata.org/entity/Q1376987>,51.754167,-1.251667
1,Ludwig Maximilian University of Munich,<http://www.wikidata.org/entity/Q55044>,48.150833,11.580278
2,Slade School of Fine Art,<http://www.wikidata.org/entity/Q1399299>,51.525,-0.1344
3,Vassar College,<http://www.wikidata.org/entity/Q2093794>,41.686867,-73.895189
4,New York University,<http://www.wikidata.org/entity/Q49210>,40.73,-73.995


In [6]:
g.serialize(destination='Desktop/dhdk_epds/resources/artchives2.nq', format='nquads')
result = g.parse("Desktop/dhdk_epds/resources/artchives2.nq", format='nquads')



In [7]:
related = {} #dictionary to collect the main subjects of an art historian -p921- present in ARTchives data 
for s,p,o in g.triples(( None, wdt.P170, None)):   
    if "wikidata.org/entity/" in str(o):           
        for hist, prop, obj in g.triples((o, wdt.P921, None)):     
            for subj, pr, name in g.triples(( hist, RDFS.label, None)):   
                key = tuple(['<' + str(hist) + '>', name.strip()])
                value = '<' + str(obj) + '>'
                if key not in related.keys(): #the dict has as keys the historians and as values the related list of main subjects.
                    related[key] = [value]
                else:
                    related[key].append(value)
    
for k, v in related.items():
    print(k, v)



('<http://www.wikidata.org/entity/Q55453618>', 'Stefano Tumidei') ['<http://www.wikidata.org/entity/Q160538>', '<http://www.wikidata.org/entity/Q314579>', '<http://www.wikidata.org/entity/Q13367>', '<http://www.wikidata.org/entity/Q1361667>', '<http://www.wikidata.org/entity/Q1089074>', '<http://www.wikidata.org/entity/Q2044>', '<http://www.wikidata.org/entity/Q220>', '<http://www.wikidata.org/entity/Q1891>', '<http://www.wikidata.org/entity/Q1263>']
('<http://www.wikidata.org/entity/Q1715096>', 'Ulrich Middeldorf') ['<http://www.wikidata.org/entity/Q7787>', '<http://www.wikidata.org/entity/Q7787>', '<http://www.wikidata.org/entity/Q714972>', '<http://www.wikidata.org/entity/Q714972>', '<http://www.wikidata.org/entity/Q70804>', '<http://www.wikidata.org/entity/Q70804>', '<http://www.wikidata.org/entity/Q317053>', '<http://www.wikidata.org/entity/Q317053>', '<http://www.wikidata.org/entity/Q1361667>', '<http://www.wikidata.org/entity/Q1361667>', '<http://www.wikidata.org/entity/Q55044>'

In [8]:
related_dict= {}
prova = {}
for k, v in related.items():  #for each art historian I extract those main subjects which are defined as places in wikidatata 
    relatedlist = ' '.join(v)


    query_results = """
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        SELECT DISTINCT ?place ?place_label ?coord
        WHERE {
            VALUES ?place {"""+relatedlist+"""} . 
            ?place rdfs:label ?place_label . 
            FILTER (langMatches(lang(?place_label), "EN")) . ?place wdt:P625 ?coord
            } 

        """

    # set the endpoint 
    sparql_wd = SPARQLWrapper(wikidata_endpoint)
    # set the query
    sparql_wd.setQuery(query_results)
    # set the returned format
    sparql_wd.setReturnFormat(JSON)
    # get the results
    results = sparql_wd.query().convert()

    for result in results["results"]["bindings"]:
        #print(result)
        place = '<' + str(result["place"]["value"]) + '>'
        place_name = str(result["place_label"]["value"]) 
        coord = result["coord"]["value"][6:-1].split(" ")
        key = tuple([place_name, place, float(coord[1]), float(coord[0])])
        for value in v:
            if value == str(key[1]):
                if key not in prova.keys():
                    prova[key] = set([k])
                else: 
                    prova[key].update([k])
                

                
for k, v in prova.items():
    for el in v:
        if k not in related_dict.keys():
            related_dict[k] = [el]
        else:
            related_dict[k].append(el)


for k, v in related_dict.items():       
    print(k, v)


('Rome', '<http://www.wikidata.org/entity/Q220>', 41.893055555, 12.482777777) [('<http://www.wikidata.org/entity/Q1641821>', 'Otto Lehmann-Brockhaus'), ('<http://www.wikidata.org/entity/Q61913691>', 'Luisa Vertova'), ('<http://www.wikidata.org/entity/Q90407>', 'Richard Krautheimer'), ('<http://www.wikidata.org/entity/Q3057287>', 'Ernst Steinmann'), ('<http://www.wikidata.org/entity/Q55453618>', 'Stefano Tumidei'), ('<http://www.wikidata.org/entity/Q457739>', 'Leo Steinberg'), ('<http://www.wikidata.org/entity/Q1089074>', 'Federico Zeri'), ('<http://www.wikidata.org/entity/Q2824734>', 'Adolfo Venturi'), ('<http://www.wikidata.org/entity/Q85761254>', 'Julian Kliemann')]
('Emilia-Romagna', '<http://www.wikidata.org/entity/Q1263>', 44.75, 11.0) [('<http://www.wikidata.org/entity/Q55453618>', 'Stefano Tumidei')]
('Bologna', '<http://www.wikidata.org/entity/Q1891>', 44.493888888, 11.342777777) [('<http://www.wikidata.org/entity/Q55453618>', 'Stefano Tumidei'), ('<http://www.wikidata.org/enti

In [9]:
#!pip importa ipywidgets
from ipywidgets import HTML

from ipyleaflet import Map, Marker, Popup, LayersControl

center = (41.080684, -30.683374)


m = Map(center=center, zoom=3, close_popup_on_click=False)



for k,v in related_dict.items():
    marker = Marker(location=(k[2], k[3]))
    m.add_layer(marker)
    message = HTML()
    marker.popup = message
    message.description = ""
    namelist = str()
    for value in v:
        namelist= namelist + "<a href='http://artchives.fondazionezeri.unibo.it/historian-" + value[0][32:-1] + "'>" + value[1] + "</a>" + ". "
    message.value = "<b>" + k[0] + "</b>" + "<br>"  + namelist





m

Map(center=[41.080684, -30.683374], close_popup_on_click=False, controls=(ZoomControl(options=['position', 'zo…

In [10]:
hist_dict = {} #dict that has as keys art historians and as values the list of related places. 
hist_list = []
for k, v in related_dict.items():
    for value in v:
        if value not in hist_list:
            hist_list.append(value)
    for el in hist_list:
        if el in v:
            if el not in hist_dict.keys():
                hist_dict[el] = [k]
            else:
                hist_dict[el].append(k)
        

for k, v in hist_dict.items():            
    print(k, v)

('<http://www.wikidata.org/entity/Q1641821>', 'Otto Lehmann-Brockhaus') [('Rome', '<http://www.wikidata.org/entity/Q220>', 41.893055555, 12.482777777), ('Ludwig Maximilian University of Munich', '<http://www.wikidata.org/entity/Q55044>', 48.150833333, 11.580277777), ('Göttingen', '<http://www.wikidata.org/entity/Q3033>', 51.533888888, 9.935555555), ('Unna', '<http://www.wikidata.org/entity/Q3949>', 51.534722222, 7.688888888), ('University of Marburg', '<http://www.wikidata.org/entity/Q155354>', 50.810833333, 8.773611111), ('Zentralinstitut für Kunstgeschichte', '<http://www.wikidata.org/entity/Q190617>', 48.1445, 11.5665), ('University of Innsbruck', '<http://www.wikidata.org/entity/Q875788>', 47.263055555, 11.383888888)]
('<http://www.wikidata.org/entity/Q61913691>', 'Luisa Vertova') [('Rome', '<http://www.wikidata.org/entity/Q220>', 41.893055555, 12.482777777), ('Florence', '<http://www.wikidata.org/entity/Q2044>', 43.771388888, 11.254166666), ('Harvard University Center for Italian 

In [11]:
from ipyleaflet import LayerGroup
center = (41.080684, -30.683374)


m = Map(center=center, zoom=3, close_popup_on_click=False)
control = LayersControl(position='topright')
m.add_control(control)

for k,v in hist_dict.items():
    layer_group = LayerGroup(layers=(), name=k[1]) #creating different layers for different historians
    m.add_layer(layer_group)
    for value in v:
        marker = Marker(location=(value[2], value[3]))
        message = HTML()
        marker.popup = message
        message.description = ""
        message.value ="<b>" + "<a href='http://artchives.fondazionezeri.unibo.it/historian-" + k[0][32:-1] +"'>" + k[1] + "</a>" + "</b>" + "<br>"  + value[0]
        layer_group.add_layer(marker)





m

Map(center=[41.080684, -30.683374], close_popup_on_click=False, controls=(ZoomControl(options=['position', 'zo…

In [12]:
people = []
for k, v in related.items():   
    relatedlist = ' '.join(v)
    #print(relatedlist)
    query_res = """
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX wd: <http://www.wikidata.org/entity/>
        SELECT DISTINCT ?person ?person_label ?occupation
        WHERE {
            VALUES ?person {"""+relatedlist+"""} . 
            ?person wdt:P31 wd:Q5; rdfs:label ?person_label . 
            VALUES ?occupation {wd:Q1792450 wd:Q201788 wd:Q36180 wd:Q4164507 wd:Q1126160 wd:Q10732476} . 
            ?person wdt:P106 ?occupation .
            FILTER (langMatches(lang(?person_label), "EN") ) 
            } 

        """

    # set the endpoint 
    sparql_wd = SPARQLWrapper(wikidata_endpoint)
    # set the query
    sparql_wd.setQuery(query_res)
    # set the returned format
    sparql_wd.setReturnFormat(JSON)
    # get the results
    results = sparql_wd.query().convert()

    for result in results["results"]["bindings"]:
        #print(result)
        person = '<' + str(result["person"]["value"]) + '>'
        person_name = str(result["person_label"]["value"])
        key = tuple([person_name, person])
        for value in v:
            if value == str(key[1]):
                tupla = tuple([k[1], k[0], person_name, person, 2])
                if tupla not in people:
                    people.append(tupla)

            
#print(people)


In [13]:
coll_related = {}
for coll,p,hist in g.triples(( None, wdt.P170, None)):
    for historian, proper, hist_name in g.triples((hist, RDFS.label, None)):     # people "o" are the creator "wdt.P170" of a collection "s"
        for collection, pr, coll_name in g.triples(( coll, RDFS.label, None)):   
            for col, prop, content in g.triples((coll, wdt.P921, None)):     
                for cont, pro, content_label in g.triples(( content, RDFS.label, None)): 
                    if "wikidata.org/entity/" in str(content):    
                        key = tuple([hist_name.strip(), '<' + str(hist) + '>'])
                        value = '<' + str(content) + '>'
                        if key not in coll_related.keys():
                            coll_related[key] = set([value])
                        else:
                            coll_related[key].add(value)
    


  
for k, v in coll_related.items():
    print( k, v)
    

#print(collrelatedlist)
 


('Stefano Tumidei', '<http://www.wikidata.org/entity/Q55453618>') {'<http://www.wikidata.org/entity/Q21543541>', '<http://www.wikidata.org/entity/Q7018>', '<http://www.wikidata.org/entity/Q912991>', '<http://www.wikidata.org/entity/Q134194>', '<http://www.wikidata.org/entity/Q166118>', '<http://www.wikidata.org/entity/Q14378>', '<http://www.wikidata.org/entity/Q3762068>', '<http://www.wikidata.org/entity/Q9134>', '<http://www.wikidata.org/entity/Q3779126>', '<http://www.wikidata.org/entity/Q3615689>', '<http://www.wikidata.org/entity/Q7015>', '<http://www.wikidata.org/entity/Q61481008>', '<http://www.wikidata.org/entity/Q23619996>', '<http://www.wikidata.org/entity/Q371979>', '<http://www.wikidata.org/entity/Q37853>', '<http://www.wikidata.org/entity/Q52905023>', '<http://www.wikidata.org/entity/Q134307>', '<http://www.wikidata.org/entity/Q891532>', '<http://www.wikidata.org/entity/Q1111180>', '<http://www.wikidata.org/entity/Q4692>', '<http://www.wikidata.org/entity/Q7017>', '<http://

In [14]:
for k, v in coll_related.items(): 
    coll_list = ' '.join(v)
    query_res = """
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX wd: <http://www.wikidata.org/entity/>
        SELECT DISTINCT ?person ?person_label ?occupation
        WHERE {
            VALUES ?person {"""+coll_list+"""} . 
            ?person wdt:P31 wd:Q5; rdfs:label ?person_label .
            VALUES ?occupation {wd:Q1792450 wd:Q201788 wd:Q36180 wd:Q4164507 wd:Q1126160 wd:Q10732476} . 
            ?person wdt:P106 ?occupation .
            FILTER (langMatches(lang(?person_label), "EN")) . 
            } 

        """

    # set the endpoint 
    sparql_wd = SPARQLWrapper(wikidata_endpoint)
    # set the query
    sparql_wd.setQuery(query_res)
    # set the returned format
    sparql_wd.setReturnFormat(JSON)
    # get the results
    results = sparql_wd.query().convert()

    for result in results["results"]["bindings"]:
        person = '<' + str(result["person"]["value"]) + '>'
        person_name = str(result["person_label"]["value"]) 
        key = tuple([person_name, person])
        #print(death)
        for value in v:
            if value == str(key[1]):
                tupla = tuple([k[0], k[1], person_name, person, 3])
                if tupla not in people:
                    people.append(tupla)

for el in people:           
    print(el)

('Stefano Tumidei', '<http://www.wikidata.org/entity/Q55453618>', 'Federico Zeri', '<http://www.wikidata.org/entity/Q1089074>', 2)
('Stefano Tumidei', '<http://www.wikidata.org/entity/Q55453618>', 'Roberto Longhi', '<http://www.wikidata.org/entity/Q1361667>', 2)
('Ulrich Middeldorf', '<http://www.wikidata.org/entity/Q1715096>', 'Roberto Longhi', '<http://www.wikidata.org/entity/Q1361667>', 2)
('Ulrich Middeldorf', '<http://www.wikidata.org/entity/Q1715096>', 'Adolph Goldschmidt', '<http://www.wikidata.org/entity/Q70804>', 2)
('Ulrich Middeldorf', '<http://www.wikidata.org/entity/Q1715096>', 'Heinrich Wölfflin', '<http://www.wikidata.org/entity/Q123466>', 2)
('John Pope-Hennessy', '<http://www.wikidata.org/entity/Q537874>', 'Donatello', '<http://www.wikidata.org/entity/Q37562>', 2)
('John Pope-Hennessy', '<http://www.wikidata.org/entity/Q537874>', 'John Pope-Hennessy', '<http://www.wikidata.org/entity/Q537874>', 2)
('Leo Steinberg', '<http://www.wikidata.org/entity/Q457739>', 'Erwin Pan

In [15]:
wdpeople_query = """ 
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?historian ?student ?student_label ?master ?master_label ?influencer ?influencer_label ?signper ?signper_label
WHERE {
    VALUES ?historian {"""+historians+"""} . 
    optional {?historian wdt:P802 ?student . ?student rdfs:label ?student_label .
    FILTER (langMatches(lang(?student_label), "EN"))}
    optional {?historian wdt:P1066 ?master . 
    ?master rdfs:label ?master_label .
    FILTER (langMatches(lang(?master_label), "EN"))}
    optional {?historian wdt:P737 ?influencer . 
    ?influencer rdfs:label ?influencer_label .
    FILTER (langMatches(lang(?influencer_label), "EN"))} 
    optional {?historian wdt:P3342 ?signper . 
    ?signper rdfs:label ?signper_label .
    FILTER (langMatches(lang(?signper_label), "EN"))}
    } 
"""

sparql_wd = SPARQLWrapper(wikidata_endpoint)
    # set the query
sparql_wd.setQuery(wdpeople_query)
    # set the returned format
sparql_wd.setReturnFormat(JSON)
    # get the results
results = sparql_wd.query().convert()

for result in results["results"]["bindings"]:
    #print(result)
    historian = result["historian"]["value"]
    if "influencer" in result: 
        influencer = result["influencer"]["value"]
        #print(influencer)
        if "influencer_label" in result: 
            influencer_label = result["influencer_label"]["value"]
            #print("influencer:", influencer, influencer_label)

                    # only if both uri and label are found we add them to 
            g.add(( URIRef(historian) , URIRef(wdt.P737) , URIRef(influencer) ))
            g.add(( URIRef(influencer) , RDFS.label , Literal(influencer_label) ))
           
    if "student" in result: 
        student = result["student"]["value"]
        print(historian)
        if "student_label" in result: 
            student_label = result["student_label"]["value"]
            print("historian:", historian, "student:", student, student_label)

                        # only if both uri and label are found we add them to 
            g.add(( URIRef(historian) , URIRef(wdt.P802) , URIRef(student) ))
            g.add(( URIRef(student) , RDFS.label , Literal(student_label) )) 
    
    if "master" in result: 
        master = result["master"]["value"]
        if "master_label" in result: 
            master_label = result["master_label"]["value"]
            #print("historian:", historian, "master:", master_label)

                        # only if both uri and label are found we add them to 
            g.add(( URIRef(historian) , URIRef(wdt.P1066) , URIRef(master) ))
            g.add(( URIRef(master) , RDFS.label , Literal(master_label) ))
    
    if "signper" in result: 
        signper = result["signper"]["value"]
        if "signper_label" in result: 
            signper_label = result["signper_label"]["value"]
            #print("signper:", signper, signper_label)

                        # only if both uri and label are found we add them to 
            g.add(( URIRef(historian) , URIRef(wdt.P3342) , URIRef(signper) ))
            g.add(( URIRef(signper) , RDFS.label , Literal(signper_label) ))   

http://www.wikidata.org/entity/Q2824734
historian: http://www.wikidata.org/entity/Q2824734 student: http://www.wikidata.org/entity/Q3659000 Carlo Anti
http://www.wikidata.org/entity/Q2824734
historian: http://www.wikidata.org/entity/Q2824734 student: http://www.wikidata.org/entity/Q19301518 Géza de Fràncovich
http://www.wikidata.org/entity/Q2824734
historian: http://www.wikidata.org/entity/Q2824734 student: http://www.wikidata.org/entity/Q253864 Pietro Toesca
http://www.wikidata.org/entity/Q2824734
historian: http://www.wikidata.org/entity/Q2824734 student: http://www.wikidata.org/entity/Q253864 Pietro Toesca
http://www.wikidata.org/entity/Q2824734
historian: http://www.wikidata.org/entity/Q2824734 student: http://www.wikidata.org/entity/Q253864 Pietro Toesca


In [16]:
for hist, prop, influencer in g.triples((None, wdt.P737, None)):
    print(hist)
    for influencer, hasname, influencername in g.triples((influencer, RDFS.label, None)):
        for historian, p, name in g.triples(( hist, RDFS.label, None)):
            if "wikidata.org/entity/" in str(hist):  
                tuplawd = tuple([name.strip(), "<" + hist.strip() +">", influencername.strip(), "<" + influencer.strip() +">", 1])
                if tuplawd not in people:
                    people.append(tuplawd)
                
for hist, prop, student in g.triples((None, wdt.P802, None)):
    #print(hist)
    for historian, p, name in g.triples(( hist, RDFS.label, None)):
        #print(name)
        for student, hasname, studentname in g.triples((student, RDFS.label, None)):
            if "wikidata.org/entity/" in str(hist):  
                tuplawd = tuple([name.strip(), "<" + hist.strip() +">", studentname.strip(), "<" + student.strip() +">", 1])
                #print(tuplawd)
                if tuplawd not in people:
                    people.append(tuplawd)

for hist, prop, master in g.triples((None, wdt.P1066, None)):
    #print(hist)
    for historian, p, name in g.triples(( hist, RDFS.label, None)):
        for master, hasname, mastername in g.triples((master, RDFS.label, None)):
            if "wikidata.org/entity/" in str(hist):  
                tuplawd = tuple([name.strip(), "<" + hist.strip() +">", mastername.strip(), "<" + master.strip() +">", 1])
                #print(tuplawd)
                if tuplawd not in people:
                    people.append(tuplawd)
                
                
for hist, prop, signper in g.triples((None, wdt.P3342, None)):
    #print(hist)
    for historian, p, name in g.triples(( hist, RDFS.label, None)):
        for signper, hasname, signpername in g.triples((signper, RDFS.label, None)):
            if "wikidata.org/entity/" in str(hist):  
                tuplawd = tuple([name.strip(), "<" + hist.strip() +">", signpername.strip(), "<" + signper.strip() +">", 1])
                #print(tuplawd)
                if tuplawd not in people:
                    people.append(tuplawd)
                


In [17]:
with open('people.csv', mode='w') as my_file:
    my_writer = csv.writer(my_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
    # write the column names
    my_writer.writerow(['art_hist', 'art_hist_uri','person', 'person_uri', 'weight'])
    
    # access the list of tuples of the query results
    for res in people:
        # write in the csv
        my_writer.writerow([res[0], res[1], res[2], res[3], res[4]])

In [18]:
# parse the csv into a dataframe
df = pd.read_csv("people.csv")
# print the first 5 rows
df.head()

Unnamed: 0,art_hist,art_hist_uri,person,person_uri,weight
0,Stefano Tumidei,<http://www.wikidata.org/entity/Q55453618>,Federico Zeri,<http://www.wikidata.org/entity/Q1089074>,2
1,Stefano Tumidei,<http://www.wikidata.org/entity/Q55453618>,Roberto Longhi,<http://www.wikidata.org/entity/Q1361667>,2
2,Ulrich Middeldorf,<http://www.wikidata.org/entity/Q1715096>,Roberto Longhi,<http://www.wikidata.org/entity/Q1361667>,2
3,Ulrich Middeldorf,<http://www.wikidata.org/entity/Q1715096>,Adolph Goldschmidt,<http://www.wikidata.org/entity/Q70804>,2
4,Ulrich Middeldorf,<http://www.wikidata.org/entity/Q1715096>,Heinrich Wölfflin,<http://www.wikidata.org/entity/Q123466>,2


In [19]:
from pyvis import network as net


people_net = net.Network(height="750px", width="100%", bgcolor="white", font_color="#1cae81", notebook="True")

# set the physics layout of the network
people_net.barnes_hut()
people_data = pd.read_csv("people.csv")

sources = people_data['art_hist']
targets = people_data['person']
weights = people_data['weight']
uri = people_data['art_hist_uri']

edge_data = zip(sources, targets, weights)

for e in edge_data:
    #print(e)
    src = e[0]
    dst = e[1]
    w = e[2]


    people_net.add_node(src, src, title=src, color= "#1cae81")
    people_net.add_node(dst, dst, title=dst, color= "#1cae81")
    if w == 1:
        people_net.add_edge(src, dst, value=w, color="black")
    elif w ==2:
        people_net.add_edge(src, dst, value=w, color="gainsboro")
    elif w ==3:
        people_net.add_edge(src, dst, value=w, color="#1cae81")

neighbor_map = people_net.get_adj_list()

# add neighbor data to node hover data
for node in people_net.nodes:
    #print(node)
    node["title"] += " relations:<br>" + "<br>".join(neighbor_map[node["id"]])
    node["value"] = len(neighbor_map[node["id"]])   
    node["label"] = node["id"] 

people_net.show("people.html")

In [20]:

# sparql

query_periods = g.query(
    """PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    SELECT ?hist_label ?hist ?period (SAMPLE(?label) AS ?period_label) 
    WHERE {?coll <https://w3id.org/artchives/hasSubjectPeriod> ?period ; rdfs:label ?coll_label 
    . ?coll wdt:P170 ?hist . ?hist rdfs:label ?hist_label .
    ?period rdfs:label ?label . 
    }
    GROUP BY ?period ?label ?hist ?hist_label 
    ORDER BY ?period""")

periods = set()
period_dict = {}
for result in query_periods:
    #print(result)
    hist = result[0].strip()
    period = result[2].strip()
    #print(period)
    if hist not in period_dict.keys():
        period_dict[hist] = set([period])
    else:
        period_dict[hist].add(period)
    

for k,v in period_dict.items():
    for value in v:
        periods.add('<' + str(value) + '>') 
print(periods)

{'<http://www.wikidata.org/entity/Q211884>', '<http://www.wikidata.org/entity/Q7015>', '<http://www.wikidata.org/entity/Q131808>', '<http://www.wikidata.org/entity/Q7017>', '<http://www.wikidata.org/entity/Q4692>', '<http://www.wikidata.org/entity/Q7016>', '<http://www.wikidata.org/entity/Q7018>', '<http://www.wikidata.org/entity/Q37068>', '<http://www.wikidata.org/entity/Q38166>', '<http://www.wikidata.org/entity/Q7034>', '<http://www.wikidata.org/entity/Q429811>', '<http://www.wikidata.org/entity/Q14378>', '<http://www.wikidata.org/entity/Q6955>', '<http://www.wikidata.org/entity/Q37853>', '<http://www.wikidata.org/entity/Q8683>', '<http://www.wikidata.org/entity/Q12554>'}


In [21]:
period_list = ' '.join(periods)
period_res = """
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX wd: <http://www.wikidata.org/entity/>
        SELECT DISTINCT ?period ?period_label ?startdate ?enddate
        WHERE {
            VALUES ?period {"""+period_list+"""} . 
            ?period wdt:P580 ?startdate ; wdt:P582 ?enddate; rdfs:label ?period_label .
            FILTER (langMatches(lang(?period_label), "EN"))
            
            } 
        """
sparql_wd = SPARQLWrapper(wikidata_endpoint)
    # set the query
sparql_wd.setQuery(period_res)
    # set the returned format
sparql_wd.setReturnFormat(JSON)
    # get the results
results = sparql_wd.query().convert()

period_dict = {}
for result in results["results"]["bindings"]:
    period = '<' + str(result["period"]["value"]) + '>'
    period_label = result["period_label"]["value"]
    start = int(result["startdate"]["value"][0:4])
    end = int(result["enddate"]["value"][0:4])
    key = tuple([period, period_label])
    years = tuple([start, end])
    if key not in period_dict.keys():
        period_dict[key] = years

for k, v in period_dict.items():
    print(k,v)


('<http://www.wikidata.org/entity/Q6955>', '19th century') (1801, 1900)
('<http://www.wikidata.org/entity/Q4692>', 'Renaissance') (1400, 1650)
('<http://www.wikidata.org/entity/Q7015>', '18th century') (1701, 1800)
('<http://www.wikidata.org/entity/Q7016>', '17th century') (1601, 1700)
('<http://www.wikidata.org/entity/Q7034>', '14th century') (1301, 1400)
('<http://www.wikidata.org/entity/Q7018>', '15th century') (1401, 1500)
('<http://www.wikidata.org/entity/Q12554>', 'Middle Ages') (476, 1500)
('<http://www.wikidata.org/entity/Q7017>', '16th century') (1501, 1600)
('<http://www.wikidata.org/entity/Q8683>', 'Cold War') (1947, 1991)
('<http://www.wikidata.org/entity/Q14378>', 'neoclassicism') (1760, 1830)
('<http://www.wikidata.org/entity/Q14378>', 'Neoclassicism') (1760, 1830)
('<http://www.wikidata.org/entity/Q37853>', 'Baroque') (1590, 1750)
('<http://www.wikidata.org/entity/Q37068>', 'Romanticism') (1800, 1900)


In [22]:
artist_rel = []
artist_dict = {}
artist_set = set()
for k, v in coll_related.items(): 
    coll_list = ' '.join(v)
    query_res = """
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX wd: <http://www.wikidata.org/entity/>
        SELECT DISTINCT ?person ?person_label ?occupation ?birthdate ?deathdate
        WHERE {
            VALUES ?person {"""+coll_list+"""} . 
            ?person wdt:P31 wd:Q5; rdfs:label ?person_label .
            VALUES ?occupation {wd:Q1281618 wd:Q42973 wd:Q483501 wd:Q1028181 wd:Q329439} . 
            ?person wdt:P106 ?occupation ; wdt:P569 ?birthdate ; wdt:P570 ?deathdate.
            FILTER (langMatches(lang(?person_label), "EN")) . 
            } 

        """

    # set the endpoint 
    sparql_wd = SPARQLWrapper(wikidata_endpoint)
    # set the query
    sparql_wd.setQuery(query_res)
    # set the returned format
    sparql_wd.setReturnFormat(JSON)
    # get the results
    results = sparql_wd.query().convert()

    for result in results["results"]["bindings"]:
        #print(result)
        artist = '<' + str(result["person"]["value"]) + '>'
        artist_name = str(result["person_label"]["value"]) 
        death = str(result["deathdate"]["value"]) 
        birth = str(result["birthdate"]["value"])
        key = tuple([artist, artist_name, int(birth[0:4]), int(death[0:4])])
        for value in v:
            #print(value)
            if value == artist:
                if key not in artist_dict.keys():      
                    artist_dict[key] = set([k[0]])
                else:
                    artist_dict[key].add(k[0])
                artist_set.add(artist)
                
for k, v in artist_dict.items():
    print(k, v)
    

('<http://www.wikidata.org/entity/Q160538>', 'Gian Lorenzo Bernini', 1598, 1680) {'Richard Krautheimer', 'Wolfgang Lotz', 'Stefano Tumidei', 'Federico Zeri'}
('<http://www.wikidata.org/entity/Q336798>', 'Alessandro Algardi', 1598, 1654) {'Stefano Tumidei'}
('<http://www.wikidata.org/entity/Q912991>', 'Marco Palmezzano', 1460, 1539) {'Stefano Tumidei'}
('<http://www.wikidata.org/entity/Q3762068>', 'Giacomo De Maria', 1762, 1838) {'Stefano Tumidei'}
('<http://www.wikidata.org/entity/Q1111180>', 'Pietro Bracci', 1700, 1773) {'Stefano Tumidei'}
('<http://www.wikidata.org/entity/Q23619996>', 'Antonio Trentanove', 1745, 1812) {'Stefano Tumidei'}
('<http://www.wikidata.org/entity/Q371979>', 'Pierre Puget', 1620, 1694) {'Stefano Tumidei'}
('<http://www.wikidata.org/entity/Q314579>', 'Melozzo da Forlì', 1438, 1494) {'Stefano Tumidei'}
('<http://www.wikidata.org/entity/Q5592>', 'Michelangelo', 1475, 1564) {'John Pope-Hennessy', 'Ernst Steinmann', 'Kornél Fabriczy'}
('<http://www.wikidata.org/ent

In [23]:
for k, v in artist_dict.items():
    for value in v:
        tupla = tuple([k[1], value, 1])
        if tupla not in artist_rel:
            artist_rel.append(tupla)
        for el in v:
            if value != el:
                tupla = tuple([el, value, 1])
                tuplabis = tuple([value, el, 1])
                if tuplabis not in artist_rel:
                    artist_rel.append(tupla)
    
print(artist_rel)

[('Gian Lorenzo Bernini', 'Richard Krautheimer', 1), ('Wolfgang Lotz', 'Richard Krautheimer', 1), ('Stefano Tumidei', 'Richard Krautheimer', 1), ('Federico Zeri', 'Richard Krautheimer', 1), ('Gian Lorenzo Bernini', 'Wolfgang Lotz', 1), ('Stefano Tumidei', 'Wolfgang Lotz', 1), ('Federico Zeri', 'Wolfgang Lotz', 1), ('Gian Lorenzo Bernini', 'Stefano Tumidei', 1), ('Federico Zeri', 'Stefano Tumidei', 1), ('Gian Lorenzo Bernini', 'Federico Zeri', 1), ('Alessandro Algardi', 'Stefano Tumidei', 1), ('Marco Palmezzano', 'Stefano Tumidei', 1), ('Giacomo De Maria', 'Stefano Tumidei', 1), ('Pietro Bracci', 'Stefano Tumidei', 1), ('Antonio Trentanove', 'Stefano Tumidei', 1), ('Pierre Puget', 'Stefano Tumidei', 1), ('Melozzo da Forlì', 'Stefano Tumidei', 1), ('Michelangelo', 'John Pope-Hennessy', 1), ('Ernst Steinmann', 'John Pope-Hennessy', 1), ('Kornél Fabriczy', 'John Pope-Hennessy', 1), ('Michelangelo', 'Ernst Steinmann', 1), ('Kornél Fabriczy', 'Ernst Steinmann', 1), ('Michelangelo', 'Kornél F

In [24]:
from collections import Counter

count_dict = Counter(artist_rel)
artist_rel_final = []

for k, v in count_dict.items():
    if v != 1:
        x = list(k)
        x[2] = v
        k = tuple(x)
    artist_rel_final.append(k)
    
print(artist_rel_final)

[('Gian Lorenzo Bernini', 'Richard Krautheimer', 1), ('Wolfgang Lotz', 'Richard Krautheimer', 1), ('Stefano Tumidei', 'Richard Krautheimer', 1), ('Federico Zeri', 'Richard Krautheimer', 1), ('Gian Lorenzo Bernini', 'Wolfgang Lotz', 1), ('Stefano Tumidei', 'Wolfgang Lotz', 1), ('Federico Zeri', 'Wolfgang Lotz', 1), ('Gian Lorenzo Bernini', 'Stefano Tumidei', 1), ('Federico Zeri', 'Stefano Tumidei', 1), ('Gian Lorenzo Bernini', 'Federico Zeri', 1), ('Alessandro Algardi', 'Stefano Tumidei', 1), ('Marco Palmezzano', 'Stefano Tumidei', 1), ('Giacomo De Maria', 'Stefano Tumidei', 1), ('Pietro Bracci', 'Stefano Tumidei', 1), ('Antonio Trentanove', 'Stefano Tumidei', 1), ('Pierre Puget', 'Stefano Tumidei', 1), ('Melozzo da Forlì', 'Stefano Tumidei', 1), ('Michelangelo', 'John Pope-Hennessy', 1), ('Ernst Steinmann', 'John Pope-Hennessy', 2), ('Kornél Fabriczy', 'John Pope-Hennessy', 1), ('Michelangelo', 'Ernst Steinmann', 1), ('Kornél Fabriczy', 'Ernst Steinmann', 1), ('Michelangelo', 'Kornél F

In [25]:
for k, v in coll_related.items():
    #print(k, v)
    for key, value in period_dict.items():
        for artist, hist in artist_dict.items():
            if key[0] in v and artist[0] in v:
                if (value[0] <= artist[2] and value[1] >= artist[3]) or (value[0] >= artist[2] and value[1] <= artist[3]) or (value[0] >= artist[2] and (value[1] + 15) >= artist[3] and artist[2] >= value[1]) or (value[0] <= artist[2] and (value[1] - 15) >= artist[2] and value[1] <= artist[3]):
                    tupla = tuple([k[0], artist[1], 2])
                    #print(tupla)
                    tupla_remove1 = tuple([k[0], artist[1], 1])
                    tupla_remove2 = tuple([artist[1], k[0], 1])
                    if tupla_remove1 in artist_rel_final: 
                        artist_rel_final.remove(tupla_remove1)
                    elif tupla_remove2 in artist_rel_final: 
                        artist_rel_final.remove(tupla_remove2)
                    artist_rel_final.append(tupla)
                        
print(artist_rel_final)                 

[('Wolfgang Lotz', 'Richard Krautheimer', 1), ('Stefano Tumidei', 'Richard Krautheimer', 1), ('Federico Zeri', 'Richard Krautheimer', 1), ('Stefano Tumidei', 'Wolfgang Lotz', 1), ('Federico Zeri', 'Wolfgang Lotz', 1), ('Federico Zeri', 'Stefano Tumidei', 1), ('Ernst Steinmann', 'John Pope-Hennessy', 2), ('Kornél Fabriczy', 'John Pope-Hennessy', 1), ('Kornél Fabriczy', 'Ernst Steinmann', 1), ('Everett Fahy', 'John Pope-Hennessy', 1), ('Everett Fahy', 'Ernst Steinmann', 1), ('Donatello', 'John Pope-Hennessy', 1), ('Federico Zeri', 'John Pope-Hennessy', 1), ('Cecil Beaton', 'John Pope-Hennessy', 1), ('Leo Steinberg', 'Richard Krautheimer', 1), ('Auguste Rodin', 'Leo Steinberg', 1), ('Gustav Ludwig', 'Luisa Vertova', 1), ('Leo Steinberg', 'Luisa Vertova', 1), ('Fritz Heinemann', 'Luisa Vertova', 1), ('Titian', 'Gustav Ludwig', 1), ('Leo Steinberg', 'Gustav Ludwig', 1), ('Fritz Heinemann', 'Gustav Ludwig', 3), ('Fritz Heinemann', 'Leo Steinberg', 1), ('Luca Beltrami', 'Ernst Steinmann', 1),

In [26]:
with open('artists.csv', mode='w') as my_file:
    my_writer = csv.writer(my_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
    # write the column names
    my_writer.writerow(['art_hist','artist','weight'])
    
    # access the list of tuples of the query results
    for res in artist_rel_final:
        # write in the csv
        my_writer.writerow([res[0], res[1], res[2]])

In [27]:
# parse the csv into a dataframe
df = pd.read_csv("artists.csv")
# print the first 5 rows
df.head()

Unnamed: 0,art_hist,artist,weight
0,Wolfgang Lotz,Richard Krautheimer,1
1,Stefano Tumidei,Richard Krautheimer,1
2,Federico Zeri,Richard Krautheimer,1
3,Stefano Tumidei,Wolfgang Lotz,1
4,Federico Zeri,Wolfgang Lotz,1


In [29]:
people_net = net.Network(height="750px", width="100%", bgcolor="white", font_color="#1cae81", notebook="True")

# set the physics layout of the network
people_net.barnes_hut()
people_data = pd.read_csv("artists.csv")

sources = people_data['art_hist']
targets = people_data['artist']
weights = people_data['weight']


edge_data = zip(sources, targets, weights)

for e in edge_data:
    #print(e)
    src = e[0]
    dst = e[1]
    w = e[2]


    people_net.add_node(src, src, title=src, color= "#1cae81")
    people_net.add_node(dst, dst, title=dst, color= "#1cae81")
    if w == 1:
        people_net.add_edge(src, dst, value=w, color="grey")
    elif w == 2:
        people_net.add_edge(src, dst, value=w, color="#1cae81")
    elif w == 3:
        people_net.add_edge(src, dst, value=w, color="black")
    
        

neighbor_map = people_net.get_adj_list()

# add neighbor data to node hover data
for node in people_net.nodes:
    #print(node)
    node["title"] += " relations:<br>" + "<br>".join(neighbor_map[node["id"]])
    node["value"] = len(neighbor_map[node["id"]])   
    node["label"] = node["id"] 

people_net.show("people.html")