## To Do
Dictionaries that need multiple reconciliation: 
1. people (done)

2. places: 
- cities
- countries
- institutions

3. Level1: 
- named obj
- quality
- expressional
- actions

4. Level2: 
- characters (done)
- named obj
- places
- events
- personifications
- allegories
- invenzioni

5. Level3
- concepts

6. Simulation

## Imports

In [1]:
# !pip install requests
import requests

#!pip install SPARQLWrapper

from SPARQLWrapper import SPARQLWrapper, JSON
import ssl

#!pip install fuzzywuzzy
import difflib
# !pip install python-Levenshtein
# !pip install thefuzz
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
# from thefuzz import fuzz
# from thefuzz import process
import json
import csv
import time #



In [2]:
#!pip install rdflib
from rdflib import URIRef, BNode, Literal, Graph
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD
from rdflib import Namespace, Literal, XSD

In [3]:
# endpoints

sparql_getty = "http://vocab.getty.edu/sparql"
wikidata_endpoint = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
iconclass_api = ""

## Ancillary functions

In [9]:
# ancillary functions


def sparql_query_setting(query, endpoint):# set the endpoint 
  sparql = SPARQLWrapper(endpoint)
  # set the query
  sparql.setQuery(query)
  # set the returned format
  sparql.setReturnFormat(JSON)
  # get the results
  results = sparql.query().convert()
  return results



def wikidata_reconciliation(r, endpoint, q_class=None): # specify the class of which the individual should be instance of
# r = json results of the get request 
  # double check if the entity belongs to the right class
  if 'search' in r and len(r['search']) >= 1:
      if q_class: # if a class is given, it checkes if the wikidata entity belongs to the correct class. 
        query_string = """ASK {<"""+r['search'][0]['concepturi']+"""> wdt:P31 wd:"""+q_class+""". }"""
        res = sparql_query_setting(query_string, endpoint)
        # print("\nRES", query, query_string, res)
        if res["boolean"] == True: # add a timeout

          return [ r['search'][0]['concepturi'] , 'class_match']
        else:
          return [ r['search'][0]['concepturi'] , 'no_class_match']
      else:
        return [ r['search'][0]['concepturi'] , 'no_class_given']
  else:
      return 'not matched'

# having a dictionary containing q terms, query wd for asking its related terms in external vocabulary, as specified by the query.
def alignments_through_wd(dictionary, query, variable_list, endpoint, url_string): 
  all_items = dictionary.items() # ordered dict of tuples
  for item in all_items: # tuple with key-value pairs
    if type(item[1]) == list: 
      for el in item[1]: # item[1]
        if el.startswith("http://www.wikidata.org/entity/"): # in this way, we also filter automatically also the " rec" keys
          print(el, item[0])
          cleaned_el = el.replace("\"","")
          final_query = query.replace("toBeReplaced", cleaned_el)
          res = sparql_query_setting(final_query, endpoint)
          for result in res["results"]["bindings"]:
            for var in variable_list: 
              aligned = url_string+result[var]["value"]
              print(aligned)
              if aligned not in dictionary[item[0]]: 
                dictionary[item[0]].append(aligned)
  return(dictionary)
    
# function already present in the main script - remove/add terms from a dictionary
def remove_add_terms(dictionary, pair_list, instruction_string): # dictionary to be updated, list containing tuples with key-link to be removed
  for pair in pair_list: # pair[0] = name, pair[1] = value
   # print(pair)
    if instruction_string == "remove": 
      if pair[1] in dictionary[pair[0]]:
        dictionary[pair[0]].remove(pair[1])
    elif instruction_string == "add": 
      # if dictionary[pair[0]][0].startswith("http://icondataset.org/") and len(dictionary[pair[0]]) == 1: # we double check that the term is really without an alignment
      if len(dictionary[pair[0]]) <=1:
        dictionary[pair[0]] = [pair[1]]
  return dictionary

# mean of the fuzzywuzzy parameters for word similarity
def fuzz_ratio_mean(name, term):
  ratio1 = fuzz.ratio(name, term)
  ratio2 = fuzz.partial_ratio(name, term)
  ratio3 = fuzz.token_sort_ratio(name, term)
  ratio4 = fuzz.WRatio(name, term)
  mean = (ratio1+ratio2+ratio3+ratio4)/4
  return mean

# open dictionaries stored in json
def open_json(json_file): 
  with open(json_file, mode='r', encoding="utf-8") as jsonfile:
    dictName = json.load(jsonfile)
    return dictName

# save a dictionary in json
def store_in_json(file_name, dictName): 
  with open(file_name, mode='w', encoding="utf-8") as jsonfile:
    json.dump(dictName, jsonfile)


# stop words and others removal. After this passage: ready to reconcile

def rumor_removal(dictionary):
  for item in dictionary.items(): 
    if type(item[1]) == list: 
      for i in range(len(item[1])- 1, -1, -1): # len - 1 = last postition; start -1 = 0; move of -1
        if item[1][i].startswith("http") is False:
          del item[1][i]
  return(dictionary)

# store in csv

def store_csv(file_name, first_line, list_of_lists): 
    with open(file_name, mode='w', newline='', encoding='UTF-8') as my_file:
        file = csv.writer(my_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
        file.writerow(first_line) 
        for l in list_of_lists: 
            file.writerow(l) 
    return(file_name)

# store in csv
# input: list of tuples, where tup[0] is the name and tup[1] is the reconciled link
# store in a json where the first column is the name and the second column is a concat of all the values having that key, with " @ " as separator
def from_tup_to_csv(list_of_tup, file_name, first_line): 
    list_of_lists = []
    tot_dict = {}
    for tup in list_of_tup: 
        if tup[0] not in tot_dict: 
            tot_dict[tup[0]] = []
        tot_dict[tup[0]].append(tup[1])

    for item in tot_dict.items(): 
        list_of_lists.append([item[0], ' @ '.join(item[1])])
        
    with open(file_name, mode='w', newline='') as my_file:
        file = csv.writer(my_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
        file.writerow(first_line) 
        for l in list_of_lists: 
            file.writerow(l) 
    return(file_name) 

import csv

# from dict to csv: 
def store_dict_in_csv(file_name, first_line, dictionary):
    list_of_lists = []
    for tup in dictionary.items(): 
        if tup[0].endswith(" rec"): 
            col1 = tup[0].replace(" rec", "")
            if type(tup[1])== list:
                col2 = ' @ '.join(tup[1])
            else: 
                col2 = tup[1]
            list_of_lists.append([col1, col2])
    final = store_csv(file_name, first_line, list_of_lists)
    return final
    

# Word filtering
* detetection of plurals: words of the same tipology with a high rank of similarity will be standardized in the format word(plural end of word)
* detection of synonyms through wordnet

dictionaries will be updated with the new form, the correspondence will be stored in a list of tuples. 

## Detection of plurals

**Method**: <br>
1. For each interested vocabulary, create 2 lists containing all the keys. With a for loop check all the words similarities through the fuzzywuzzy means algorithm
2. identify the difference between similar forms. if the difference is equal to "s", "es", "ies", the words are considered singular/plural. Print the others: if there are plurals, add them manually to the list of tuples afer passage 3.  
3. store the result in a list of tuples (at the beginning inserted as sets to avoid repetitions) where tup[0] = singular, tup[1] = plural, tup[2] = neutral form in "name(s)" type
4. substitute the forms with singular/plural in the dictionary with the new neutral form, adding, if present, the values of the previous keys

In [170]:
# dictionary opening
naturalElementDict = open_json("baseDictionaries/naturalElementDict.json")
characterDict = open_json("Final/characterAlignedFinal.json")

In [119]:
# function that detects plurals, 1) store them in a list of tuples in the format tup[0] = singular, tup [1] = plural, tup[2] = neutral form with (s)
# and 2) update the given dictionary, appending the previous values to the new key in the form "name(s)" and deleting the plural/singular forms
# suggestion: since the dictionary is directly modified, make a copy of it before running
# last point to be solved: shall we modify the link corresponding to the new form? eg. take only the singular, appending it only the first time? in any case the singular form is the first appended value
def detect_plurals(dictionary, plurals_list):
    keys = list(dictionary.keys())
    keys2 = []
    keys3 = []
    doubles = []
    final_dict = {}
    rec_words = list(filter(lambda x: 'rec' in x, keys)) # we select the keys for the recociled terms, to filter them out
    for term in keys: 
        if term not in rec_words:
            keys2.append(term) 
            keys3.append(term) # we create a duplicate list to copare terms to find similarities

    for key in keys3: 
        for key2 in keys2: 
            if key !=key2:
                if fuzz_ratio_mean(key, key2) > 85: # the plural "es" is 87
                    if set((key,key2)) not in doubles:
                        doubles.append(set((key,key2)))
    for s in doubles: 
        couple = tuple(s)
        if len(couple[0])> len(couple[1]): # as it is more frequent, we decide that the longer term is the plural form
            base_term = couple[1]
            plural_term = couple[0]
        elif len(couple[0])< len(couple[1]): 
            base_term = couple[0]
            plural_term = couple[1]
        difference = plural_term.replace(base_term, "")
        pl_addition = "("+str(difference)+")"
       # final_form = str(base_term)+pl_addition
        if difference == "s" or difference =="e" or difference =="es": # exeptions: woman(en), man(en), latin plurals (sacophagus. -i)
            plurals_list.append((base_term, plural_term))
        else: 
            print("not included in plurals: ", final_form)
                            
    for tup in plurals_list: 
        for term in dictionary.copy(): 
            if term == tup[0] or term == tup[1]: 
                if tup[2] not in dictionary: 
                    dictionary[tup[2]] = [dictionary[term]] # we assign the original value to the new key
                    dictionary[tup[2]+ " rec"] = dictionary[term+" rec"]
                else: 
                    if dictionary[term] not in dictionary[tup[2]]:
                        print(term, tup[2], dictionary[tup[2]])
                        dictionary[tup[2]].append(dictionary[term]) # in this case, the base dictionary have a str as value
                    for r in dictionary[term+ " rec"]: 
                        if r not in dictionary[tup[2]+ " rec"]: # the reconciliation may find the same term
                            dictionary[tup[2]+ " rec"].append(r)
                del dictionary[term]
                del dictionary[term+ " rec"]
                #if tup[2] not in final_dict: 
                 #   final_dict[tup[2]] = [dictionary[term]] # the base dictionary shoud already have list as a value
                #    final_dict[tup[2]+ " rec"] = dictionary[term+" rec"]
                #else: 
                #    if dictionary[term] not in final_dict[tup[2]]:
                 #       final_dict[tup[2]].append(dictionary[term]) # in this case, the base dictionary have a str as value
                 #   for r in dictionary[term+ " rec"]: 
                 #       if r not in final_dict[tup[2]+ " rec"]: # the reconciliation may find the same term
                 #           final_dict[tup[2]+ " rec"].append(r)
                 # we replace the singular/plural form with the new one to be reconciled
          #  else: 
           #     if not term == tup[0]+" rec" or term == tup[1]+" rec":
             #       final_dict[term] = dictionary[term] # we copy the items that have no plural
  #  print(plurals_list)
    print(dictionary)
    return dictionary

# test
d = {"cicci": "link", "cicci rec":["extLink"], "ciccis": "links", "ciccis rec": ["extLiks"]}
plu = []

dictionary = detect_plurals(d, plu)

ciccis cicci(s) ['link']
{'cicci(s)': ['link', 'links'], 'cicci(s) rec': ['extLink', 'extLiks']}


In [124]:
not_fitted_plurals = []
not_fitted_plurals.append(("sarcophagus", "sarcophagi", "sarcophagus(i)"))
not_fitted_plurals.append(("woman", "women", "woman(women)"))
not_fitted_plurals.append(("man", "men", "man(men)"))
not_fitted_plurals.append(("ox", "oxen", "ox(oxen)"))
not_fitted_plurals.append(("wood", "woods", "wood(s)"))
not_fitted_plurals.append(("kid", "kids", "kid(s)"))
not_fitted_plurals.append(("child", "childdren", "child(dren)"))
not_fitted_plurals.append(("savage", "savages", "savage(s)"))

final = detect_plurals(naturalElementDict2, not_fitted_plurals)

not included in plurals:  acantus plant(acanthus plant)
not included in plurals:  lance(ba)
not included in plurals:  bear(d)
not included in plurals:  ox(b)
not included in plurals:  cart(chart)
not included in plurals:  chariot(eer)
not included in plurals:  child(ren)
not included in plurals:  child(ren)
not included in plurals:  cock(clock)
not included in plurals:  convex lines(concave lines)
not included in plurals:  lowest zone of the tomb(highest zone of the tomb)
not included in plurals:  man face(wo)
not included in plurals:  sarcophagi(sarcophagus)
not included in plurals:  sea(t)
not included in plurals:  shadowed face(shadowened face)
not included in plurals:  statue of a man(statue of a woman)
not included in plurals:  human figure with animal legs(human figures with animal legs)
sarcophagus sarcophagus(i) ['http://icondataset.org/sarcophagi']
women woman(women) ['http://icondataset.org/woman']
men man(men) ['http://icondataset.org/man']
oxen ox(oxen) ['http://icondataset

In [128]:
print(naturalElementDict2["man(men)"])

['http://icondataset.org/man', 'http://icondataset.org/men']


In [178]:
print(fuzz_ratio_mean("child(dren)", "child")) # fuzz ratio should be >75

79.75


In [72]:
# plurals of natural elements
#plurals_list = []
# print(characterDict3)
#characterDict4 = detect_plurals(characterDict3, plurals_list)

KeyError: 'Angels rec'

## Detection of synonyms through wordnet
* there's a pyhon library for it: https://github.com/goodmami/wn usage documentation for another library: https://www.nltk.org/howto/wordnet.html 
* api documentation: https://wordnet.princeton.edu/related-projects 
* **a tutorial for ntlk wordnet usage**: https://pythonprogramming.net/wordnet-nltk-tutorial/ 
* may be useful for alignment approaches: https://aclanthology.org/2020.lrec-1.597.pdf

* Synset: a set of synonyms that share a common meaning.
* conversion of wordnet to LOD: https://www.w3.org/community/bpmlod/wiki/Converting_WordNets_to_Linked_Data 
* other wordnet (3.0)search, with RDF format for each lemma: http://wordnet-rdf.princeton.edu/
* **wordnet rdf official**: http://wordnet-rdf.princeton.edu/about 
* ontology: http://wordnet-rdf.princeton.edu/ontology.html
* what is offset/8 digit: https://wordnet.princeton.edu/documentation/wndb5wn#:~:text=semantic%20concordance%20texts.-,synset_offset,syntactic%20category%2C%20a%20data%20structure%20containing%20the%20parsed%20synset%20is%20returned.,-Data%20File%20Format 
* how to extract RDF links: http://wordnet-rdf.princeton.edu/about#:~:text=the%20licensing%20information-,WordNet%20Identifiers,-Words%20are%20identified

* NB: the offset numbers that we have with ntlk library are the ones of version 3.0 (not the last one), that are not in rdf but also in json. In the json there is the correspondance between the old key and the new one. If the number has 7 digits, you have to add a 0 at the beginning. problem: no full json file for all the terms

### To do: 

1. verify if the wordnet term is pertinent
2. verify if there are synonims used in the dataset
3. only for them, extract (with the method here) the numbers of the version 3.0, and then use it as a link (our word in the dataset skos:broader link wordnet 3.0). link example: http://wordnet-rdf.princeton.edu/pwn30/00001740-a 

In [131]:
from nltk.corpus import wordnet as wn

In [159]:
syns = wn.synsets('dog') # result: list with synset(name)
# result: 'dog.n.01'
#print(syns)
#print(syns[0])
name = syns[0].name() # we obtain the name (ID) of the resource. 
print(name)
#print(wn.synset('dog.n.01').definition()) # ask for definition
dog = wn.synset('dog.n.01') # ask for hyperonyms
# s = syns[0]
#for l in syns[0].lemmas(): 
 #   print(s.name(),"-definition:", s.definition(),"-synonyms:", l.name()) # s, s.definition(),



dog.n.01


In [166]:
syns = wn.synsets('abacus')
offsets_list = [(s.offset(), s) for s in syns]
offsets_dict = dict(offsets_list)
print(offsets_dict)

{2666347: Synset('abacus.n.01'), 2666196: Synset('abacus.n.02')}


In [167]:
synset_offset
syns = wn.synsets('abacus')
offsets_list = [(s.synset_offset(), s) for s in syns]
print(offsets_list)

NameError: name 'synset_offset' is not defined

In [192]:
# code adapted from: https://www.holisticseo.digital/python-seo/nltk/wordnet
syn_dict = {}
def synonym_antonym_extractor(dictionary, syn_dict):
    syn_names = {}
    names_list = list(dictionary.keys())
    for name in names_list.copy(): 
        if name.endswith("rec"):
            names_list.remove(name)
    for n in names_list: 
    # from nltk.corpus import wordnet
        synonyms = []
        syn = wn.synsets(n) # it does already a world lookup, synonyms included
        if syn != []: 
            for l in syn[0].lemmas():
                synonyms.append(l.name())
               # print(syn[0].name(),"-definition:", syn[0].definition(),"-synonyms:", l.name())
            syn_dict[n] = synonyms 
            wname = syn[0].name().split(".")[0]
            if fuzz_ratio_mean(n, wname) <80 and n in synonyms: 
                syn_names[n] = syn[0].name() # wname
        
            print(n, syn[0].name(),"-definition:", syn[0].definition(),"-synonyms:", synonyms)
    # print(set(antonyms))
    return syn_names
natSynNames = synonym_antonym_extractor(naturalElementDict2, syn_dict)

abaci abacus.n.01 -definition: a tablet placed horizontally on top of the capital of a column as an aid in supporting the architrave -synonyms: ['abacus']
altar altar.n.01 -definition: the table in Christian churches where communion is given -synonyms: ['altar', 'communion_table', "Lord's_table"]
altarpiece altarpiece.n.01 -definition: a painted or carved screen placed above and behind an altar or communion table -synonyms: ['altarpiece', 'reredos']
amphitheater amphitheater.n.01 -definition: a sloping gallery with seats for spectators (as in an operating room or theater) -synonyms: ['amphitheater', 'amphitheatre']
amphora amphora.n.01 -definition: an ancient jar with two handles and a narrow neck; used to hold oil or wine -synonyms: ['amphora']
animals animal.n.01 -definition: a living organism characterized by voluntary movement -synonyms: ['animal', 'animate_being', 'beast', 'brute', 'creature', 'fauna']
anteroom anteroom.n.01 -definition: a large entrance or reception room or area 

deer deer.n.01 -definition: distinguished from Bovidae by the male's having solid deciduous antlers -synonyms: ['deer', 'cervid']
demon devil.n.02 -definition: an evil supernatural being -synonyms: ['devil', 'fiend', 'demon', 'daemon', 'daimon']
desk desk.n.01 -definition: a piece of furniture with a writing surface and usually drawers or other compartments -synonyms: ['desk']
donkey donkey.n.01 -definition: the symbol of the Democratic Party; introduced in cartoons by Thomas Nast in 1874 -synonyms: ['donkey']
dragoon dragoon.n.01 -definition: a member of a European military unit formerly composed of heavily armed cavalrymen -synonyms: ['dragoon']
drapery curtain.n.01 -definition: hanging cloth used as a blind (especially for a window) -synonyms: ['curtain', 'drape', 'drapery', 'mantle', 'pall']
eagle eagle.n.01 -definition: any of various large keen-sighted diurnal birds of prey noted for their broad wings and strong soaring flight -synonyms: ['eagle', 'bird_of_Jove']
earrings earring

In [195]:
print(natSynNames)
save = store_in_json("synonyms_dict.json", natSynNames)

{'bald-headed': 'bald', 'band': 'set', 'boar': 'wild_boar', 'bowls': 'lawn_bowling', 'boy': 'male_child', 'chains': 'irons', 'chest': 'thorax', 'chlamys': 'perianth', 'cornucopia': 'horn_of_plenty', 'demon': 'devil', 'drapery': 'curtain', 'glasses': 'spectacles', 'globe': 'earth', 'ground': 'land', 'halo': 'aura', 'human': 'homo', 'journeyman': 'craftsman', 'knapsack': 'backpack', 'lance': 'spear', 'monument': 'memorial', 'nosegay': 'bouquet', 'pig': 'hog', 'portrait': 'portrayal', 'procession': 'emanation', 'pupil': 'student', 'purse': 'bag', 'rafts': 'tons', 'scroll': 'coil', 'stake': 'interest', 'statuette': 'figurine', 'stone': 'rock', 'syrinx': 'panpipe', 'tombstone': 'gravestone', 'trumpet': 'cornet', 'veil': 'head_covering'}


In [169]:
print(syn_dict) # here, the value is the name of the wordnet synset. we can substitute the used word with this to avoid duplicates

{'bald-headed': 'bald', 'band': 'set', 'boar': 'wild_boar', 'bowls': 'lawn_bowling', 'boy': 'male_child', 'chains': 'irons', 'chest': 'thorax', 'chlamys': 'perianth', 'cornucopia': 'horn_of_plenty', 'demon': 'devil', 'drapery': 'curtain', 'glasses': 'spectacles', 'globe': 'earth', 'ground': 'land', 'halo': 'aura', 'human': 'homo', 'journeyman': 'craftsman', 'knapsack': 'backpack', 'lance': 'spear', 'monument': 'memorial', 'nosegay': 'bouquet', 'pig': 'hog', 'portrait': 'portrayal', 'procession': 'emanation', 'pupil': 'student', 'purse': 'bag', 'rafts': 'tons', 'scroll': 'coil', 'stake': 'interest', 'statuette': 'figurine', 'stone': 'rock', 'syrinx': 'panpipe', 'tombstone': 'gravestone', 'trumpet': 'cornet', 'veil': 'head_covering'}


In [171]:
# characterDict
characterSynDict = {}
synonym_antonym_extractor(characterDict, characterSynDict)
print(characterSynDict)

Adam adam.n.01 -definition: (Old Testament) in Judeo-Christian mythology; the first man and the husband of Eve and the progenitor of the human race -synonyms: ['Adam']
Aeneas aeneas.n.01 -definition: a mythical Greek warrior who was a leader on the Trojan side of the Trojan War; hero of the Aeneid -synonyms: ['Aeneas']
Aeolus aeolus.n.01 -definition: god of the winds in ancient mythology -synonyms: ['Aeolus']
Apollo apollo.n.01 -definition: (Greek mythology) Greek god of light; god of prophecy and poetry and music and healing; son of Zeus and Leto; twin brother of Artemis -synonyms: ['Apollo', 'Phoebus', 'Phoebus_Apollo']
Apostles apostle.n.01 -definition: an ardent early supporter of a cause or reform -synonyms: ['apostle']
Argus argus.n.01 -definition: (Greek mythology) a giant with 100 eyes; was guardian of the heifer Io and was slain by Hermes -synonyms: ['Argus']
Ariadne ariadne.n.01 -definition: beautiful daughter of Minos and Pasiphae; she fell in love with Theseus and gave him 

In [175]:
# print(syn_dict) NB: it works with the old version of the dictionary
keys = list(syn_dict.keys())
#rint(keys)
values = list(syn_dict.values())
#print(values)
items = list(syn_dict.items())
#rint(items)
for item in items: 
    key = item[0]
    v = item[1]
   #print(key, values)
    if v != []:
        for value in v: 
            if value != key: 
                if value in keys: 
                   print(key, value)

band circle
chalice goblet
curls scroll
curtain drapery
curtain mantle
drapery curtain
drapery mantle
earth globe
flames fire
globe earth
ground earth
lance spear
purse bag
rock stone
spear lance
stone rock


In [None]:
#note for the valuable synonyms: 
woods forest
spear lance
stone rock
purse bag
men gentleman
men human
kid child
lance spear
ground earth
globe earth
chalice goblet
garlands wreath

# plurals to be updated: 
"woman", "women", "woman(women)"
"man", "men", "man(men)"
"ox", "oxen", "ox(oxen)"
"wood", "woods", "wood(s)"
"kid", "kids", "kid(s)"
"child", "childdren", "child(dren)"
"savage", "savages", "savage(s)"
"savage", "savages", "savage(s)"


## Main reconciliation function

In [5]:
# function to test and improve

def reconciliation_function(dictionary, query, endpoint=None, variables_list=None, q_class=None): # ancillary function: we are already in a for loop with "name"
  all_items = dictionary.items()
  for item in all_items: 
    if item[0].endswith(" rec"): #and item[1] == []:  
      name_rec = item[0] 
      name = item[0].replace(" rec", "")
     # print(name, item[1])
      stop_words = list(filter(lambda x: 'stop' in x, item[1])) # checks if the word "stop" is in the list. returns a list of words containing "stop"
     # if item[1] == []: # we take only the ones without a manual alignment or comment to avoid mistakes/repetitions
      
      # wikidata api request
      if type(query) == dict and endpoint == wikidata_endpoint:
        if "stop wd" not in stop_words: # if there is, it means that the iteration has already been done
          final_query = query 
          final_query['search'] = name
          API_WD = "https://www.wikidata.org/w/api.php"
          r =    requests.get(API_WD, params = final_query).json() # json with all the classes to be filtered
          res = wikidata_reconciliation(r, endpoint, q_class)
          time.sleep(5)
          item[1].append("stop wd")
          if res[1] == 'class_match' or res[1] == 'no_class_given':
            if res[0] not in dictionary[name+" rec"]:
              print(name, "resulting q class is", res[0])
              dictionary[name+" rec"].append(res[0])
       # elif if type(query) == dict and endpoint == iconclass_endpoint: # to be developed
       
      elif type(query) == str: 
        final_query = query.replace("toBeReplaced", name)
          # iconclass api request
        if query.startswith("https://iconclass"):
          if "stop ic" not in stop_words:
          #print(query)
            candidate_list = []
            response = requests.get(final_query)
            final = response.json()
            for el in final["result"]: 
              candidate_list.append(el)
            dictionary[name+" rec"].append("stop ic") # in this way, we search the term only once
            #print(candidate_list)
            for code in candidate_list: 
              # querying directly the code gives the response with keywords: 
              query_code= "https://iconclass.org/"+code+".json"  
              response_code = requests.get(query_code)
              final_code = response_code.json()
              if "txt" in final_code: 
                if "en" in final_code["txt"]: 
                  desc = final_code["txt"]["en"]
                  if "kw" in final_code:
                      if "en" in final_code["kw"]:
                          to_be_checked = final_code["kw"]["en"]
                          if len(to_be_checked) ==1:
                            for candidate in to_be_checked: 
                              if fuzz_ratio_mean(name, candidate) >= 95: 
                                print(name, candidate, code, desc)
                                if "https://iconclass.org/"+code not in dictionary[name+" rec"]: 
                                  dictionary[name+" rec"].append("https://iconclass.org/"+code)
        else:
          res = sparql_query_setting(final_query, endpoint)

          for result in res["results"]["bindings"]:
            if "@en" in str(result[variables_list[0]]["value"]): # case found in getty vocabs
              term = str(result[variables_list[0]]["value"]).replace("@en", "")
            else: 
              term = str(result[variables_list[0]]["value"])
            if fuzz_ratio_mean(name, term) >= 95: 
              print(name, term)
              if result[variables_list[1]]["value"] not in dictionary[name+" rec"]: 
                dictionary[name+" rec"] = [result[variables_list[1]]["value"]]


    # result["Descr"]["value"], result["ScopeNote"]["value"], result["Type"]["value"]

       # print(result["Subject"]["value"], result["Term"]["value"], result["Parents"]["value"])


## Iconclass reconciliation

In [None]:
characterDict = open_json("character_Iconclass_first_match.json")

In [None]:
done_char = """Adam
Aeneas
Aeolus
Aglauros
Aion/Phanes
Anteros
Antiphas, son of Laoocoon
Antoninus Pius
Apollo
Apollo god of Sun
Apollo Medicus or Aesculapius
Apostles
Archangel Gabriel
Argus
Ariadne
Athene
bacchant
Bacchus
Bishop Simone Saltarelli
Blindfold Cupid
Cacus
Cerberus
Christ
Christ as a child
Companion of Serapis
crocodile
Cybele
David
Devil
Diana
Dido
Dieu Amour
Dionysus
Erato
Erymanthian Boar
Europa
Europa's Companions
Eve
Fedro
Female Pan
Flora
Four Saints
Ganymede
Geminae Veneres
Giuliano de' Medici
God
God's hand
Hecuba
Helen
Hercules
Hermes Sphenopogon
Herse
Hespontine Sibyl
Hylas
Ixion
Jest
Joachim
Judith
Jupiter
Jupiter as an eagle
King Eurystheus
Laocoon
Leach
Lorenzo de' Medici
Marine snakes
Mars
Mary
Member of Medici family
Member of the crowd of faithful in prayer
Member of the procession
Mercury
Mercury (Hermes)
Mida
Minerva
Moses
Muses
Mithras
natural Venus
Neptune
Noah
Nymphs
Olympian Gods
Orpheus
Pan
Pan, God of Panic
Paris
Perseus
Phaeton
Planetary deities
Pluto
Polyxena
Pope Julius II
Presbyter Bruno
Prometheus
Prophet
Proserpina
puer mingens
Pygmalion
Rachel
Rhea
Roman emperors
Saint Anne
Saint Basilius
Saint Cosmas
Saint Damian
Saint John the Evangelist
Saint Luke
Saint Mark
Saint Paul
Saint Peter
Saint Theodora of Alexandria
Salamander
Satan
Saturn
Satyr
Serapis
Silenus
Simeon
Sol Invictus
Sol Iustitiae
Son of the widow of Nain
Sybil
Sisyphus""" 

new = done_char.replace("\n"," @ ")
final = new.split(" @ ")
print(final)

['Adam', 'Aeneas', 'Aeolus', 'Aglauros', 'Aion/Phanes', 'Anteros', 'Antiphas, son of Laoocoon', 'Antoninus Pius', 'Apollo', 'Apollo god of Sun', 'Apollo Medicus or Aesculapius', 'Apostles', 'Archangel Gabriel', 'Argus', 'Ariadne', 'Athene', 'bacchant', 'Bacchus', 'Bishop Simone Saltarelli', 'Blindfold Cupid', 'Cacus', 'Cerberus', 'Christ', 'Christ as a child', 'Companion of Serapis', 'crocodile', 'Cybele', 'David', 'Devil', 'Diana', 'Dido', 'Dieu Amour', 'Dionysus', 'Erato', 'Erymanthian Boar', 'Europa', "Europa's Companions", 'Eve', 'Fedro', 'Female Pan', 'Flora', 'Four Saints', 'Ganymede', 'Geminae Veneres', "Giuliano de' Medici", 'God', "God's hand", 'Hecuba', 'Helen', 'Hercules', 'Hermes Sphenopogon', 'Herse', 'Hespontine Sibyl', 'Hylas', 'Ixion', 'Jest', 'Joachim', 'Judith', 'Jupiter', 'Jupiter as an eagle', 'King Eurystheus', 'Laocoon', 'Leach', "Lorenzo de' Medici", 'Marine snakes', 'Mars', 'Mary', 'Member of Medici family', 'Member of the crowd of faithful in prayer', 'Member o

In [None]:
for name in final: 
  name_rec = name+" rec"
  characterDict[name_rec].append("stop ic")
  print(characterDict[name_rec])

['https://iconclass.org/11I62(ADAM)', 'stop ic']
['https://iconclass.org/96C(AENEAS)', 'stop ic']
['stop ic']
['stop ic']
['http://www.iconclass.org/rkd/91A14%28AION%29/', 'stop ic']
['http://www.iconclass.org/rkd/92D1911/', 'stop ic']
['stop ic']
['stop ic']
['https://iconclass.org/92B3', 'https://iconclass.org/95B(MARPESSA)222', 'stop ic']
['stop ic']
['stop ic']
['stop ic']
['stop ic']
['stop ic']
['https://iconclass.org/95B(ARIADNE)', 'https://iconclass.org/94M21', 'https://iconclass.org/92L121', 'https://iconclass.org/92L1781', 'stop ic']
['stop ic']
['https://iconclass.org/92L1912', 'stop ic']
['https://iconclass.org/92L1', 'https://iconclass.org/95A(LYCURGUS)41', 'stop ic']
['stop ic']
['http://www.iconclass.org/rkd/92D18%28BLINDFOLD%29/', 'stop ic']
['stop ic']
['stop ic']
['stop ic']
['stop ic']
['stop ic']
['https://iconclass.org/25F43', 'stop ic']
['stop ic']
['https://iconclass.org/71H', 'https://iconclass.org/46A1262', 'https://iconclass.org/73E113', 'https://iconclass.org

In [None]:
# 1st part of iteration
iconclass_query = "https://iconclass.org/api/search?q=toBeReplaced&lang=en&size=999&page=1&sort=rank&keys=0"
character_iconclass = reconciliation_function(characterDict, iconclass_query)


Adam Adam 11I62(ADAM) Adam (not in biblical context)
Aeneas Aeneas 96C(AENEAS) (story of) Aeneas
Apollo Apollo 92B3 (story of) Apollo (Phoebus)
Apollo Apollo 95B(MARPESSA)222 Apollo tries to take Marpessa away from Idas
Ariadne Ariadne 95B(ARIADNE) (story of) Ariadne
Ariadne Ariadne 94M21 Theseus in love with Ariadne, Minos' daughter
Ariadne Ariadne 92L121 (story of) Ariadne and Bacchus
Ariadne Ariadne 92L1781 triumph of Bacchus and Ariadne
bacchant bacchant 92L1912 male bacchant(es)
Bacchus Bacchus 92L1 (story of) Bacchus (Dionysus), Liber
Bacchus Bacchus 95A(LYCURGUS)41 Lycurgus, king of the Thracian Edones, opposes Bacchus and his train
crocodile crocodile 25F43 crocodiles
David David 71H story of David
David David 46A1262 David (one of the nine worthies)
David David 73E113 David near the tomb of Christ
David David 73EE113 David near the tomb of Christ - EE - the tomb closed (with unbroken seals)
Diana Diana 92C3 (story of) Diana (Artemis)
Diana Diana 94Q511 Orion and Diana hunting


KeyError: ignored

In [None]:
# 2nd part of iteration
iconclass_query = "https://iconclass.org/api/search?q=toBeReplaced&lang=en&size=999&page=1&sort=rank&keys=0"
character_iconclass = reconciliation_function(characterDict, iconclass_query)


Tantalus Tantalus 95A(TANTALUS) (story of) Tantalus
Troilus Troilus 95A(TROILUS) (story of) Troilus
Virgil Virgil 82A(VIRGIL) Virgil
Virgil Virgil 13B2(VIRGIL) sorcerer, magician, wizard, warlock: Virgil
Venus Venus 24C19 Venus (planet)
Venus Venus 92C4 (story of) Venus (Aphrodite)
Venus Venus 92B222 Vulcan and Venus
Venus Venus 24C215 Venus and her children ('Planetenkinder')
Venus Venus 92D15211 Cupid shooting a dart at Venus' order
Venus Venus 92D192163 Psyche with Venus
Cupid Cupid 92C454 Venus and Cupid (Cupid not being mere attribute)


In [None]:
characterDict2 = rumor_removal(characterDict)
print(characterDict2)

{'Adam': 'http://icondataset.org/adam', 'Adam rec': ['https://iconclass.org/11I62(ADAM)'], 'Aeneas': 'http://icondataset.org/aeneas', 'Aeneas rec': ['https://iconclass.org/96C(AENEAS)'], 'Aeolus': 'http://icondataset.org/aeolus', 'Aeolus rec': [], 'Aglauros': 'http://icondataset.org/aglauros', 'Aglauros rec': [], 'Aion/Phanes': 'http://icondataset.org/aion/phanes', 'Aion/Phanes rec': ['http://www.iconclass.org/rkd/91A14%28AION%29/'], 'Anteros': 'http://icondataset.org/anteros', 'Anteros rec': ['http://www.iconclass.org/rkd/92D1911/'], 'Antiphas, son of Laoocoon': 'http://icondataset.org/antiphas-son-of-laoocoon', 'Antiphas, son of Laoocoon rec': [], 'Antoninus Pius': 'http://icondataset.org/antoninus-pius', 'Antoninus Pius rec': [], 'Apollo': 'http://icondataset.org/apollo', 'Apollo rec': ['https://iconclass.org/92B3'], 'Apollo god of Sun': 'http://icondataset.org/apollo-god-of-sun', 'Apollo god of Sun rec': [], 'Apollo Medicus or Aesculapius': 'http://icondataset.org/apollo-medicus-or

In [None]:
to_be_removed = []
ic = "https://iconclass.org/"
to_be_removed.append(("Apollo rec", ic+"95B(MARPESSA)222"))
to_be_removed.append(("Ariadne rec", ic+"94M21"))
to_be_removed.append(("Ariadne rec", ic+"92L121"))
to_be_removed.append(("Ariadne rec", ic+"92L1781")) 

to_be_removed.append(("Bacchus rec", ic+"95A(LYCURGUS)41"))
to_be_removed.append(("David rec", ic+"46A1262"))
to_be_removed.append(("David rec", ic+"73E113"))
to_be_removed.append(("David rec", ic+"73EE113"))
to_be_removed.append(("Diana rec", ic+"94Q511"))
to_be_removed.append(("Diana rec", ic+"92B32421"))
to_be_removed.append(("Helen rec", ic+"95A(MENELAUS)21"))
to_be_removed.append(("Helen rec", ic+"94R41"))
to_be_removed.append(("Helen rec", ic+"94F321"))
to_be_removed.append(("Hercules rec", ic+"98B(COMMODUS)91"))
to_be_removed.append(("Hercules rec", ic+"94A333"))
to_be_removed.append(("Herse rec", ic+"92B522"))
to_be_removed.append(("Judith rec", ic+"71U"))
to_be_removed.append(("Jupiter rec", ic+"24C12"))
to_be_removed.append(("Jupiter rec", ic+"92B21211"))
to_be_removed.append(("Jupiter rec", ic+"94F743"))
to_be_removed.append(("Mars rec", ic+"24C13"))
to_be_removed.append(("Mars rec", ic+"24C213"))
to_be_removed.append(("Mars rec", ic+"92C45211"))
to_be_removed.append(("Mercury rec", ic+"97D121"))

to_be_removed.append(("Mercury rec", ic+"24C14"))
to_be_removed.append(("Mercury rec", ic+"24C216"))
to_be_removed.append(("Minerva rec", ic+"94S311"))
to_be_removed.append(("Muses rec", ic+"48C71101"))
to_be_removed.append(("Muses rec", ic+"48C72101"))
to_be_removed.append(("Muses rec", ic+"48B101"))
to_be_removed.append(("Muses rec", ic+"48C9101"))
to_be_removed.append(("Muses rec", ic+"48BB101"))
to_be_removed.append(("Muses rec", ic+"48C51101"))
to_be_removed.append(("Muses rec", ic+"48CC71101"))
to_be_removed.append(("Neptune rec", ic+"24C15"))
to_be_removed.append(("Neptune rec", ic+"94F744"))
to_be_removed.append(("Orpheus rec", ic+"11D3284"))
to_be_removed.append(("Orpheus rec", ic+"11DD3284"))
to_be_removed.append(("Orpheus rec", ic+"12E84(ORPHEUS)"))
to_be_removed.append(("Pluto rec", ic+"24C16"))
to_be_removed.append(("Pluto rec", ic+"73D6721"))
to_be_removed.append(("Polyxena rec", ic+"95A(ACHILLES)22"))
to_be_removed.append(("Proserpina rec", ic+"92N123"))
to_be_removed.append(("Proserpina rec", ic+"92C4912"))
to_be_removed.append(("Rhea rec", ic+"91B11122"))
to_be_removed.append(("Satan rec", ic+"73D2421"))
to_be_removed.append(("Saturn rec", ic+"24C17"))
to_be_removed.append(("Virgil rec", ic+"13B2(VIRGIL)"))
to_be_removed.append(("Venus rec", ic+"24C19"))
to_be_removed.append(("Venus rec", ic+"92B222"))
to_be_removed.append(("Venus rec", ic+"24C215"))
to_be_removed.append(("Venus rec", ic+"92D15211"))
to_be_removed.append(("Venus rec", ic+"92D192163"))
to_be_removed.append(("Cupid rec", ic+"92C454"))


characterDict3 = remove_add_terms(characterDict2, to_be_removed, "remove")

to_be_added = []
to_be_added.append(("Muses rec", "https://iconclass.org/92D4"))
to_be_added.append(("The Children of Saturn rec", "http://www.iconclass.org/24C211"))
to_be_added.append(("Cupid rec", "https://iconclass.org/92D1"))
characterDict4 = remove_add_terms(characterDict3, to_be_added, "add")

('Apollo rec', 'https://iconclass.org/95B(MARPESSA)222')
('Ariadne rec', 'https://iconclass.org/94M21')
('Ariadne rec', 'https://iconclass.org/92L121')
('Ariadne rec', 'https://iconclass.org/92L1781')
('Bacchus rec', 'https://iconclass.org/95A(LYCURGUS)41')
('David rec', 'https://iconclass.org/46A1262')
('David rec', 'https://iconclass.org/73E113')
('David rec', 'https://iconclass.org/73EE113')
('Diana rec', 'https://iconclass.org/94Q511')
('Diana rec', 'https://iconclass.org/92B32421')
('Helen rec', 'https://iconclass.org/95A(MENELAUS)21')
('Helen rec', 'https://iconclass.org/94R41')
('Helen rec', 'https://iconclass.org/94F321')
('Hercules rec', 'https://iconclass.org/98B(COMMODUS)91')
('Hercules rec', 'https://iconclass.org/94A333')
('Herse rec', 'https://iconclass.org/92B522')
('Judith rec', 'https://iconclass.org/71U')
('Jupiter rec', 'https://iconclass.org/24C12')
('Jupiter rec', 'https://iconclass.org/92B21211')
('Jupiter rec', 'https://iconclass.org/94F743')
('Mars rec', 'https:

In [None]:
storage = store_in_json("character_Iconclass_aligned.json", characterDict4)

## Places

In [7]:
# find the corresponding country of a given city 

getty_endpoint = "http://vocab.getty.edu/sparql"

city_country = []
for city in citiesDict: 
    getty_cities_countries_query = """

        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "toBeReplaced";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      """
    if city.endswith(" rec"): 
        name = city.replace(" rec", "")
        final_query = getty_cities_countries_query.replace("toBeReplaced", name) 
        print(final_query)
        res = sparql_query_setting(final_query, getty_endpoint)
        
       # for result in res["results"]["bindings"]: 
        #    print()
        for result in res["results"]["bindings"]:
            if "Label" in result:
                if "@en" in str(result["Label"]["value"]): # case found in getty vocabs
                    cityLabel = str(result["Label"]["value"]).replace("@en", "")
                else: 
                    cityLabel = str(result["Label"]["value"])
                if "CountryLabel" in result:
                    if "@en" in str(result["CountryLabel"]["value"]): # case found in getty vocabs
                        countryLabel = str(result["CountryLabel"]["value"]).replace("@en", "")
                    else: 
                        countryLabel = str(result["CountryLabel"]["value"])
                
                if fuzz_ratio_mean(name, cityLabel) >= 95: 
                    print(name, cityLabel)
                    line = [cityLabel, result["City"]["value"], countryLabel, result["Country"]["value"]]
                    print(line)
                    city_country.append(line)







        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "St. Gilles";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      


        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Siena";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      
Siena Siena
['Siena', 'http://vocab.getty.edu/tgn/7011179', 'Italy'



        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Rome";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      
Rome Rome
['Rome', 'http://vocab.getty.edu/tgn/7000874', 'Italy', 'http://vocab.getty.edu/tgn/1000080']


        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Naples";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language



        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Reading";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      


        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Copenhagen";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      


        select ?City ?Label ?Country ?CountryLabel{

          

Milan Milan
['Milan', 'http://vocab.getty.edu/tgn/7005903', 'Italy', 'http://vocab.getty.edu/tgn/1000080']


        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Modena";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      


        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Mount Athos";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dc

Salzburg Salzburg
['Salzburg', 'http://vocab.getty.edu/tgn/7003256', 'Austria', 'http://vocab.getty.edu/tgn/1000062']


        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Fiesole";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      


        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Ottawa";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLa



        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Boston";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      


        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Osnabruck";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      


        select ?City ?Label ?Country ?CountryLabel{

          ?C

Mainz Mainz
['Mainz', 'http://vocab.getty.edu/tgn/7004449', 'Germany', 'http://vocab.getty.edu/tgn/7000084']


        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Panticapaeum";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel; dct:language gvp_lang:en]}
        }
      


        select ?City ?Label ?Country ?CountryLabel{

          ?City luc:term "Pompei";
                 gvp:placeType [rdfs:label "cities"@en];
                gvp:broaderPreferred* ?Country.
          ?Country gvp:placeType [rdfs:label "republics"@en] .
          optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
          optional {?Country xl:prefLabel [xl:literalForm ?CountryLabel;

In [None]:
        for result in res["results"]["bindings"]:
            if "@en" in str(result["Label"]["value"]): # case found in getty vocabs
                cityLabel = str(result["Label"]["value"]).replace("@en", "")
            else: 
                cityLabel = str(result["Label"]["value"])
            if "@en" in str(result["CountryLabel"]["value"]): # case found in getty vocabs
                countryLabel = str(result["CountryLabel"]["value"]).replace("@en", "")
            else: 
                countryLabel = str(result["Label"]["value"])
                
            if fuzz_ratio_mean(name, cityLabel) >= 95: 
              print(name, cityLabel)
            line = [cityLabel, result["City"]["value"], countryLabel, result["Country"]["value"]]
            print(line)
            city_country.append(line)

In [10]:
print(city_country)
headings = ["City", "CityLink", "Country", "CountryLink"]
store = store_csv("city_country_tgn.csv", headings, city_country)

[['Siena', 'http://vocab.getty.edu/tgn/7011179', 'Italy', 'http://vocab.getty.edu/tgn/1000080'], ['Vienna', 'http://vocab.getty.edu/tgn/7003321', 'Austria', 'http://vocab.getty.edu/tgn/1000062'], ['Munich', 'http://vocab.getty.edu/tgn/7004333', 'Bavaria', 'http://vocab.getty.edu/tgn/7003669'], ['Munich', 'http://vocab.getty.edu/tgn/7004333', 'Germany', 'http://vocab.getty.edu/tgn/7000084'], ['Berlin', 'http://vocab.getty.edu/tgn/7003712', 'Germany', 'http://vocab.getty.edu/tgn/7000084'], ['Padua', 'http://vocab.getty.edu/tgn/7003085', 'Italy', 'http://vocab.getty.edu/tgn/1000080'], ['Bamberg', 'http://vocab.getty.edu/tgn/7004325', 'Bavaria', 'http://vocab.getty.edu/tgn/7003669'], ['Bamberg', 'http://vocab.getty.edu/tgn/7004325', 'Germany', 'http://vocab.getty.edu/tgn/7000084'], ['Florence', 'http://vocab.getty.edu/tgn/7000457', 'Italy', 'http://vocab.getty.edu/tgn/1000080'], ['Pisa', 'http://vocab.getty.edu/tgn/7006082', 'Italy', 'http://vocab.getty.edu/tgn/1000080'], ['Bari', 'http://

In [6]:
citiesDict = open_json("baseDictionaries/cityDict.json")
countriesDict = open_json("baseDictionaries/countryDict.json")

In [49]:
print(citiesDict)

{'St. Gilles': 'http://icondataset.org/places/st-gilles', 'St. Gilles rec': ['http://vocab.getty.edu/tgn/1026710'], 'Siena': 'http://icondataset.org/places/siena', 'Siena rec': [], 'Vienna': 'http://icondataset.org/places/vienna', 'Vienna rec': [], 'Munich': 'http://icondataset.org/places/munich', 'Munich rec': [], 'Berlin': 'http://icondataset.org/places/berlin', 'Berlin rec': [], 'Anagni': 'http://icondataset.org/places/anagni', 'Anagni rec': [], 'Padua': 'http://icondataset.org/places/padua', 'Padua rec': [], 'Oxford': 'http://icondataset.org/places/oxford', 'Oxford rec': [], 'Auxerre': 'http://icondataset.org/places/auxerre', 'Auxerre rec': [], 'Baltimore': 'http://icondataset.org/places/baltimore', 'Baltimore rec': [], 'Bamberg': 'http://icondataset.org/places/bamberg', 'Bamberg rec': [], 'Florence': 'http://icondataset.org/places/florence', 'Florence rec': [], 'Pisa': 'http://icondataset.org/places/pisa', 'Pisa rec': [], 'Bari': 'http://icondataset.org/places/bari', 'Bari rec': [

In [19]:
# getty countries 

getty_countries_variables = ["Label", "Country"]
getty_countries_query = """

select ?Country ?Label {
  
  ?Country luc:term "toBeReplaced";
         gvp:placeType [rdfs:label "republics"@en]
  optional {?Country xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
}
      """
getty_endpoint = "http://vocab.getty.edu/sparql"
countries = reconciliation_function(countriesDict, getty_countries_query, getty_endpoint, getty_countries_variables)


wd_query = { # nb:applying only to wikidata. Every API has its own parameters. 
        "action": "wbsearchentities",
        "format": "json",
        "language": "en",
        "search": "toBeReplaced"
    }

#countries2 = reconciliation_function(countriesDict, wd_query, wikidata_endpoint, q_class="Q6256")
# print(countriesDict)

Germany Germany
Austria Austria
France France
Greece Greece
Italy Italy


In [20]:
countries2 = reconciliation_function(countriesDict, wd_query, wikidata_endpoint, q_class="Q6256")
print(countriesDict)

Germany resulting q class is http://www.wikidata.org/entity/Q183
Austria resulting q class is http://www.wikidata.org/entity/Q40
United Kingdom resulting q class is http://www.wikidata.org/entity/Q145
France resulting q class is http://www.wikidata.org/entity/Q142
Greece resulting q class is http://www.wikidata.org/entity/Q41
Italy resulting q class is http://www.wikidata.org/entity/Q38
Jordan resulting q class is http://www.wikidata.org/entity/Q810


In [30]:
finalCountries = rumor_removal(countriesDict)
print(countriesDict)
country_headings = ["Country", "CountryLink"]
store = store_dict_in_csv("countriesAligned.csv", country_headings, countriesDict)



{'Germany': 'http://icondataset.org/places/germany', 'Germany rec': ['http://vocab.getty.edu/tgn/7000084', 'http://www.wikidata.org/entity/Q183'], 'Austria': 'http://icondataset.org/places/austria', 'Austria rec': ['http://vocab.getty.edu/tgn/1000062', 'http://www.wikidata.org/entity/Q40'], 'United Kingdom': 'http://icondataset.org/places/united-kingdom', 'United Kingdom rec': ['http://www.wikidata.org/entity/Q145'], 'France': 'http://icondataset.org/places/france', 'France rec': ['http://vocab.getty.edu/tgn/1000070', 'http://www.wikidata.org/entity/Q142'], 'Greece': 'http://icondataset.org/places/greece', 'Greece rec': ['http://vocab.getty.edu/tgn/1000074', 'http://www.wikidata.org/entity/Q41'], 'Italy': 'http://icondataset.org/places/italy', 'Italy rec': ['http://vocab.getty.edu/tgn/1000080', 'http://www.wikidata.org/entity/Q38'], 'Crimea': 'http://icondataset.org/places/crimea', 'Crimea rec': [], 'Jordan': 'http://icondataset.org/places/jordan', 'Jordan rec': ['http://www.wikidata.o

In [None]:
# getty cities 
name = ""
getty_cities_variables = ["Label", "City"]
getty_cities_query = """

select ?City ?Label {
  
  ?City luc:term "ToBeReplaced";
         gvp:placeType [rdfs:label "cities"@en]
  optional {?City xl:prefLabel [xl:literalForm ?Label; dct:language gvp_lang:en]}
}
      """
getty_endpoint = "http://vocab.getty.edu/sparql"
cities = reconciliation_function(citiesDict, getty_cities_query, getty_endpoint, getty_cities_variables)


wd_query = { # nb:applying only to wikidata. Every API has its own parameters. 
        "action": "wbsearchentities",
        "format": "json",
        "language": "en",
        "search": "toBeReplaced"
    }

countries2 = reconciliation_function(citiesDict, wd_query, wikidata_endpoint, q_class="Q515")

{'Subject': {'type': 'uri', 'value': 'http://vocab.getty.edu/aat/300435698'}, 'Term': {'xml:lang': 'en', 'type': 'literal', 'value': 'Cypro-Archaic'}, 'Parents': {'type': 'literal', 'value': 'Cypriote styles, Cypriote, ... Styles and Periods Facet'}, 'ScopeNote': {'xml:lang': 'en', 'type': 'literal', 'value': 'Style and culture that developed in Cyprus ca. 750–450 BCE.'}, 'Type': {'type': 'literal', 'value': 'Concept'}}
{'Subject': {'type': 'uri', 'value': 'http://vocab.getty.edu/aat/300016747'}, 'Term': {'xml:lang': 'en', 'type': 'literal', 'value': 'Northern Archaic'}, 'Parents': {'type': 'literal', 'value': 'Pre-Columbian Maritime area styles, Pre-Columbian Western Eskimo styles, ... Styles and Periods Facet'}, 'Type': {'type': 'literal', 'value': 'Concept'}}
{'Subject': {'type': 'uri', 'value': 'http://vocab.getty.edu/aat/300107784'}, 'Term': {'xml:lang': 'en', 'type': 'literal', 'value': 'Archaic (Persian pottery style)'}, 'Parents': {'type': 'literal', 'value': 'Persian pottery s

## People reconciliation with Wikidata, ULAN, VIAF

In [None]:
# wikidata
wd_query = { # nb:applying only to wikidata. Every API has its own parameters. 
        "action": "wbsearchentities",
        "format": "json",
        "language": "en",
        "search": "toBeReplaced"
    }


people = reconciliation_function(peopleDict, wd_query, wikidata_endpoint, q_class="Q5")

A.E. Popp ['http://viaf.org/viaf/20715488']
Abu Ma'sar ['http://viaf.org/viaf/17355947']
resulting q class is http://www.wikidata.org/entity/Q11373
Achilles Bocchius ['http://viaf.org/viaf/149878']
resulting q class is http://www.wikidata.org/entity/Q3604350
Adam Friedrich Oeser ['http://vocab.getty.edu/ulan/500115770']
resulting q class is http://www.wikidata.org/entity/Q215129
Agesander ['http://vocab.getty.edu/page/500021781']
resulting q class is http://www.wikidata.org/entity/Q983432
Athenodoros ['http://vocab.getty.edu/ulan/500007132']
resulting q class is http://www.wikidata.org/entity/Q280432
Polydorus ['http://vocab.getty.edu/ulan/500031985']
Agnolo Bronzino ['http://vocab.getty.edu/ulan/500004362']
resulting q class is http://www.wikidata.org/entity/Q7803
Albrecht Dürer ['http://vocab.getty.edu/ulan/500115493']
resulting q class is http://www.wikidata.org/entity/Q5580
Alexandre de Laborde ['http://viaf.org/viaf/121808105']
resulting q class is http://www.wikidata.org/entity/Q

In [None]:
# store the first reconciliation with wd in a json
with open('people_wd_match.json', mode='w', encoding="utf-8") as jsonfile:
    json.dump(peopleDict, jsonfile)

### Manual check of the alignment
from the list printed, manually check if the aligned term is correct. If not, remove the aligned term from the dictionary. 

In [None]:
# if the vocabulary is opened from the json
with open('people_wd_match.json', mode='r', encoding="utf-8") as jsonfile:
    people = json.load(jsonfile) # use as a dictionary

FileNotFoundError: ignored

In [None]:
# a hand check of the correctness of the aligned classes is needed. first error: J. Eck, "https://www.wikidata.org/entity/Q13474163"

to_be_removed = []
to_be_removed.append(("J. Eck rec", "http://www.wikidata.org/entity/Q13474163"))
# to_be_removed.append(("J. Eck rec", "http://www.wikidata.org/entity/Q280432"))
to_be_removed.append(("Francesco Barberino rec", "http://www.wikidata.org/entity/Q20056517"))
to_be_removed.append(("Girolamo Olgiati rec", "http://www.wikidata.org/entity/Q19985008"))
to_be_removed.append(("Jan Collaert rec", "http://vocab.getty.edu/ulan/500355195")) # changed in spreadsheet in Jan Collaert II
to_be_removed.append(("Jan Collaert rec", "http://www.wikidata.org/entity/Q571313"))
to_be_removed.append(("Jan van Campen rec", "http://vocab.getty.edu/ulan/500115589"))
to_be_removed.append(("Jan van Campen rec", "http://www.wikidata.org/entity/Q6265913"))
to_be_removed.append(("Keith Christiansen rec", "http://www.wikidata.org/entity/Q6384197"))
to_be_removed.append(("Paul Decker rec", "http://www.wikidata.org/entity/Q1798811"))
to_be_removed.append(("Remigus rec", "http://www.wikidata.org/entity/Q2142997"))
to_be_removed.append(("J. Wilde rec", "http://www.wikidata.org/entity/Q1375268"))
to_be_removed.append(("A. E. Austin rec", "http://www.wikidata.org/entity/Q104286270"))


peopleDict2 = remove_add_terms(peopleDict, to_be_removed, "remove")

to_be_added = []
to_be_added.append(("Jan Collaert rec", "http://vocab.getty.edu/ulan/500040711"))
to_be_added.append(("Jan van Campen rec", "http://vocab.getty.edu/page/ulan/500115589"))
peopleDict3 = remove_add_terms(peopleDict2, to_be_added, "add")


Second reconciliation: if we have the WD term, we align also ULAN and VIAF

In [None]:
ulan_query = """SELECT ?term WHERE{<toBeReplaced> wdt:P245 ?term.}""" # verify if the resulting wd term is a link or is with wd prefix
viaf_query = """SELECT ?term WHERE{<toBeReplaced> wdt:P214 ?term.}""" 
variables = ["term"]

def alignments_through_wd(dictionary, query, variable_list, endpoint, url_string): 
  all_items = dictionary.items() # ordered dict of tuples
  for item in all_items: # tuple with key-value pairs
    if type(item[1]) == list: 
      for el in item[1]: # item[1]
        if el.startswith("http://www.wikidata.org/entity/"): # in this way, we also filter automatically also the " rec" keys
          print(el, item[0])
          cleaned_el = el.replace("\"","")
          final_query = query.replace("toBeReplaced", cleaned_el)
          res = sparql_query_setting(final_query, endpoint)
          for result in res["results"]["bindings"]:
            for var in variable_list: 
              aligned = url_string+result[var]["value"]
              print(aligned)
              if aligned not in dictionary[item[0]]: 
                dictionary[item[0]].append(aligned)
  return(dictionary)

        # idea: if the wd term was reconciled, than we take the ulan or viaf alignment
#ulan_aligned = alignments_through_wd(peopleDict, ulan_query, variables, wikidata_endpoint, "http://vocab.getty.edu/ulan/")

In [None]:
ulan_aligned = alignments_through_wd(peopleDict3, ulan_query, variables, wikidata_endpoint, "http://vocab.getty.edu/ulan/")
viaf_aligned = alignments_through_wd(peopleDict3, viaf_query, variables, wikidata_endpoint, "http://viaf.org/viaf/")

In [None]:
print(peopleDict3)

In [None]:
# one link was wrong: final fixing
to_be_removed = []
to_be_removed.append(("Agesander rec", "http://vocab.getty.edu/page/500021781"))

peopleDict4 = remove_add_terms(peopleDict3, to_be_removed, "remove")

In [None]:
# stop words and others removal. After this passage: ready to reconcile

def rumor_removal(dictionary):
  for item in dictionary.items(): 
    if type(item[1]) == list: 
      for element in item[1]: 
        if not element.startswith("http"): 
          item[1].remove(element)
  return(dictionary)

peopleDictFinal = rumor_removal(peopleDict4)

In [None]:
print(peopleDictFinal)

In [None]:
# store the first reconciliation with wd in a json
with open('people_final_match.json', mode='w', encoding="utf-8") as jsonfile:
    json.dump(peopleDictFinal, jsonfile)

## Characters reconciliation with Wikidata, VIAF, Getty IA, Iconclass

In [None]:
#characterDict5 = rumor_removal(characterDict4)
print(characterDict5)
characterDict6 = rumor_removal(characterDict5)
storage = store_in_json("character_wd_first_alignment.json", characterDict6)

{'Adam': 'http://icondataset.org/adam', 'Adam rec': ['https://iconclass.org/11I62(ADAM)', 'http://www.wikidata.org/entity/Q69488', 'stop wd'], 'Aeneas': 'http://icondataset.org/aeneas', 'Aeneas rec': ['https://iconclass.org/96C(AENEAS)', 'http://www.wikidata.org/entity/Q82732', 'stop wd'], 'Aeolus': 'http://icondataset.org/aeolus', 'Aeolus rec': ['http://www.wikidata.org/entity/Q172549', 'stop wd'], 'Aglauros': 'http://icondataset.org/aglauros', 'Aglauros rec': ['http://www.wikidata.org/entity/Q368994', 'stop wd'], 'Aion/Phanes': 'http://icondataset.org/aion/phanes', 'Aion/Phanes rec': ['http://www.iconclass.org/rkd/91A14%28AION%29/', 'stop wd'], 'Anteros': 'http://icondataset.org/anteros', 'Anteros rec': ['http://www.iconclass.org/rkd/92D1911/', 'http://www.wikidata.org/entity/Q572133', 'stop wd'], 'Antiphas, son of Laoocoon': 'http://icondataset.org/antiphas-son-of-laoocoon', 'Antiphas, son of Laoocoon rec': ['stop wd'], 'Antoninus Pius': 'http://icondataset.org/antoninus-pius', 'Ant

In [None]:
wd_character_query = { # nb:applying only to wikidata. Every API has its own parameters. 
        "action": "wbsearchentities",
        "format": "json",
        "language": "en",
        "search": "toBeReplaced"
    }


character = reconciliation_function(characterDict5, wd_character_query, wikidata_endpoint)

Apollo resulting q class is http://www.wikidata.org/entity/Q37340
Apostles resulting q class is http://www.wikidata.org/entity/Q4155679
Archangel Gabriel resulting q class is http://www.wikidata.org/entity/Q81989
Argus resulting q class is http://www.wikidata.org/entity/Q189821
Ariadne resulting q class is http://www.wikidata.org/entity/Q184874
Athene resulting q class is http://www.wikidata.org/entity/Q37122
bacchant resulting q class is http://www.wikidata.org/entity/Q2878203
Bacchus resulting q class is http://www.wikidata.org/entity/Q645312
Cacus resulting q class is http://www.wikidata.org/entity/Q754686
Cerberus resulting q class is http://www.wikidata.org/entity/Q83496
Christ resulting q class is http://www.wikidata.org/entity/Q302
Christ as a child resulting q class is http://www.wikidata.org/entity/Q104586046
crocodile resulting q class is http://www.wikidata.org/entity/Q2535664
Cybele resulting q class is http://www.wikidata.org/entity/Q188236
David resulting q class is http:

In [64]:
characterDict = open_json("character_wd_first_alignment.json")

In [65]:
to_be_removed = []
to_be_removed.append(("Christ as a child rec", "http://www.wikidata.org/entity/Q104586046"))
to_be_removed.append(("Four Saints rec", "http://www.wikidata.org/entity/Q3927386"))
to_be_removed.append(("Ganymede rec", "http://www.wikidata.org/entity/Q3169"))
to_be_removed.append(("God rec", "http://www.wikidata.org/entity/Q178885"))
to_be_removed.append(("God's hand rec", "http://www.wikidata.org/entity/Q58728036"))
to_be_removed.append(("Hercules rec", "http://www.wikidata.org/entity/Q10448"))
to_be_removed.append(("Herse rec", "http://www.wikidata.org/entity/Q16968"))
to_be_removed.append(("Jest rec", "http://www.wikidata.org/entity/Q371174"))
to_be_removed.append(("Joachim rec", "http://www.wikidata.org/entity/Q4926961"))
to_be_removed.append(("Judith rec", "http://www.wikidata.org/entity/Q5954149"))
to_be_removed.append(("Jupiter rec", "http://www.wikidata.org/entity/Q63851811"))
to_be_removed.append(("Laocoon rec", "http://www.wikidata.org/entity/Q151100"))
to_be_removed.append(("Leach rec", "http://www.wikidata.org/entity/Q472470"))
to_be_removed.append(("Marine snakes rec", "http://www.wikidata.org/entity/Q101635953"))
to_be_removed.append(("Mars rec", "http://www.wikidata.org/entity/Q111"))
to_be_removed.append(("Mercury rec", "http://www.wikidata.org/entity/Q308"))
to_be_removed.append(("Mida rec", "http://www.wikidata.org/entity/Q24769763"))
to_be_removed.append(("Mithras rec", "http://www.wikidata.org/entity/Q219903"))
to_be_removed.append(("Neptune rec", "http://www.wikidata.org/entity/Q332Q332"))
to_be_removed.append(("Olympian Gods rec", "http://www.wikidata.org/entity/Q3012134"))
to_be_removed.append(("Pan rec", "http://www.wikidata.org/entity/Q651690"))
to_be_removed.append(("Paris rec", "http://www.wikidata.org/entity/Q90"))
to_be_removed.append(("Perseus rec", "http://www.wikidata.org/entity/Q10511"))
to_be_removed.append(("Pluto rec", "http://www.wikidata.org/entity/Q339"))
to_be_removed.append(("Rachel rec", "http://www.wikidata.org/entity/Q935489"))
to_be_removed.append(("Rhea rec", "http://www.wikidata.org/entity/Q15050"))
to_be_removed.append(("Roman emperors rec", "http://www.wikidata.org/entity/Q7361956"))
to_be_removed.append(("Saint Basilius rec", "http://www.wikidata.org/entity/Q112163561"))
to_be_removed.append(("Saint Cosmas rec", "http://www.wikidata.org/entity/Q27697297"))
to_be_removed.append(("Saturn rec", "http://www.wikidata.org/entity/Q193"))
to_be_removed.append(("Sybil rec", "http://www.wikidata.org/entity/Q4851022"))
to_be_removed.append(("The resurrected rec", "http://www.wikidata.org/entity/Q4126299"))
to_be_removed.append(("The three Graces rec", "http://www.wikidata.org/entity/Q2268524"))
to_be_removed.append(("Thisbe rec", "http://www.wikidata.org/entity/Q109954"))
to_be_removed.append(("Tityus rec", "http://www.wikidata.org/entity/Q1933958"))
to_be_removed.append(("Venus Pudica rec", "http://www.wikidata.org/entity/Q618535"))
to_be_removed.append(("Venus Verticordia rec", "http://www.wikidata.org/entity/Q24055777"))
to_be_removed.append(("Venus rec", "http://www.wikidata.org/entity/Q313"))
to_be_removed.append(("Angels rec", "http://www.wikidata.org/entity/Q743309"))
to_be_removed.append(("David rec", "http://www.wikidata.org/entity/Q83155"))
to_be_removed.append(("Europa rec", "http://www.wikidata.org/entity/Q3143"))
to_be_removed.append(("Flora rec", "http://www.wikidata.org/entity/Q131449"))



characterDict2 = remove_add_terms(characterDict, to_be_removed, "remove")

# add: 
to_be_added = []
to_be_added.append(("Flora rec", "https://www.wikidata.org/entity/Q209644"))
to_be_added.append(("God rec", "https://www.wikidata.org/entity/Q825"))
to_be_added.append(("Hercules rec", "https://www.wikidata.org/entity/Q122248"))
to_be_added.append(("Joachim rec", "https://www.wikidata.org/entity/Q314700"))
to_be_added.append(("Judith rec", "https://www.wikidata.org/entity/Q28532552"))
to_be_added.append(("Laocoon rec", "https://www.wikidata.org/entity/Q22878"))
to_be_added.append(("Mars rec", "https://www.wikidata.org/entity/Q112"))
to_be_added.append(("Mercury rec", "https://www.wikidata.org/entity/Q1150"))
to_be_added.append(("Mithras rec", "http://www.wikidata.org/entity/Q6497135"))
to_be_added.append(("Neptune rec", "http://www.wikidata.org/entity/Q3954"))
to_be_added.append(("Olympian Gods rec", "https://www.wikidata.org/entity/Q101609"))
to_be_added.append(("Pan rec", "http://www.wikidata.org/entity/Q132582"))
to_be_added.append(("Paris rec", "http://www.wikidata.org/entity/Q167646"))
to_be_added.append(("Perseus rec", "http://www.wikidata.org/entity/Q130832"))
to_be_added.append(("Pluto rec", "http://www.wikidata.org/entity/Q152262"))
to_be_added.append(("Rachel rec", "http://www.wikidata.org/entity/Q207389"))
to_be_added.append(("Rhea rec", "http://www.wikidata.org/entity/Q108419"))
to_be_added.append(("Saturn rec", "http://www.wikidata.org/entity/Q134388"))
to_be_added.append(("The three Graces rec", "http://www.wikidata.org/entity/Q184353"))
to_be_added.append(("Thisbe rec", "http://www.wikidata.org/entity/Q10696861"))
to_be_added.append(("Tityus rec", "http://www.wikidata.org/entity/Q656887"))
to_be_added.append(("Venus rec", "http://www.wikidata.org/entity/Q47652v"))

characterDict3 = remove_add_terms(characterDict2, to_be_added, "add")

In [77]:

# Second reconciliation: if we have the WD term, we align also ULAN and VIAF

ulan_query = """SELECT ?term WHERE{<toBeReplaced> wdt:P245 ?term.}""" # verify if the resulting wd term is a link or is with wd prefix
viaf_query = """SELECT ?term WHERE{<toBeReplaced> wdt:P214 ?term.}""" 
variables = ["term"]


        # idea: if the wd term was reconciled, than we take the ulan or viaf alignment
#ulan_aligned = alignments_through_wd(peopleDict, ulan_query, variables, wikidata_endpoint, "http://vocab.getty.edu/ulan/")

ulan_aligned = alignments_through_wd(characterDict4, ulan_query, variables, wikidata_endpoint, "http://vocab.getty.edu/ulan/")
viaf_aligned = alignments_through_wd(characterDict4, viaf_query, variables, wikidata_endpoint, "http://viaf.org/viaf/")

print(viaf_aligned)

http://www.wikidata.org/entity/Q69488 Adam rec
http://www.wikidata.org/entity/Q82732 Aeneas rec
http://www.wikidata.org/entity/Q172549 Aeolus rec
http://www.wikidata.org/entity/Q368994 Aglauros rec
http://www.wikidata.org/entity/Q572133 Anteros rec
http://www.wikidata.org/entity/Q1429 Antoninus Pius rec
http://vocab.getty.edu/ulan/500115700
http://www.wikidata.org/entity/Q37340 Apollo rec
http://www.wikidata.org/entity/Q4155679 Apostles rec
http://www.wikidata.org/entity/Q81989 Archangel Gabriel rec
http://www.wikidata.org/entity/Q189821 Argus rec
http://www.wikidata.org/entity/Q184874 Ariadne rec
http://www.wikidata.org/entity/Q37122 Athene rec
http://www.wikidata.org/entity/Q2878203 bacchant rec
http://www.wikidata.org/entity/Q645312 Bacchus rec
http://www.wikidata.org/entity/Q754686 Cacus rec
http://www.wikidata.org/entity/Q83496 Cerberus rec
http://www.wikidata.org/entity/Q302 Christ rec
http://www.wikidata.org/entity/Q2535664 crocodile rec
http://www.wikidata.org/entity/Q188236 Cy

http://viaf.org/viaf/105157340587209920668
http://viaf.org/viaf/40151776766418012003
http://viaf.org/viaf/77146513237432210217
http://www.wikidata.org/entity/Q34201 Jupiter rec
http://viaf.org/viaf/3266770
http://www.wikidata.org/entity/Q63851811 Jupiter as an eagle rec
http://www.wikidata.org/entity/Q177854 Lorenzo de' Medici rec
http://viaf.org/viaf/54169908
http://www.wikidata.org/entity/Q345 Mary rec
http://viaf.org/viaf/207159474183027661574
http://viaf.org/viaf/86913753
http://www.wikidata.org/entity/Q126916 Minerva rec
http://viaf.org/viaf/13107718
http://www.wikidata.org/entity/Q9077 Moses rec
http://viaf.org/viaf/805492
http://www.wikidata.org/entity/Q66016 Muses rec
http://viaf.org/viaf/115156010065049582325
http://viaf.org/viaf/5727734
http://www.wikidata.org/entity/Q6497135 Mithras rec
http://viaf.org/viaf/72187436
http://www.wikidata.org/entity/Q332 Neptune rec
http://viaf.org/viaf/237238941
http://www.wikidata.org/entity/Q81422 Noah rec
http://viaf.org/viaf/11187922
http:

In [78]:
print(characterDict4)

{'Adam': 'http://icondataset.org/adam', 'Adam rec': ['https://iconclass.org/11I62(ADAM)', 'http://www.wikidata.org/entity/Q69488'], 'Aeneas': 'http://icondataset.org/aeneas', 'Aeneas rec': ['https://iconclass.org/96C(AENEAS)', 'http://www.wikidata.org/entity/Q82732', 'http://viaf.org/viaf/18151776751918010050', 'http://viaf.org/viaf/310624824'], 'Aeolus': 'http://icondataset.org/aeolus', 'Aeolus rec': ['http://www.wikidata.org/entity/Q172549', 'http://viaf.org/viaf/242146936577813780265', 'http://viaf.org/viaf/316444023'], 'Aglauros': 'http://icondataset.org/aglauros', 'Aglauros rec': ['http://www.wikidata.org/entity/Q368994'], 'Aion/Phanes': 'http://icondataset.org/aion/phanes', 'Aion/Phanes rec': ['http://www.iconclass.org/rkd/91A14%28AION%29/'], 'Anteros': 'http://icondataset.org/anteros', 'Anteros rec': ['http://www.iconclass.org/rkd/92D1911/', 'http://www.wikidata.org/entity/Q572133', 'http://viaf.org/viaf/203920381', 'http://viaf.org/viaf/69739212'], 'Antiphas, son of Laoocoon': 

In [80]:
save = store_in_json("Final/characterAlignedFinal.json", characterDict4)

store in csv: name, string that contains all the reconciled values separated by " @ "
use it to save as csv and filter the reconciled terms directly in the spreadsheet. In this way, the reconciliation is always up to date. 

In [None]:
# make it a function. 
# input: a list of tuples, where tup[0] is always the name
# output: a list of lists where the first term is the name, the second is a join of all the reconciled terms. 

def from_tup_to_csv(list_of_tup, file_name, first_line): 
    list_of_lists = []
    tot_dict = {}
    for tup in list_of_tup: 
        if tup[0] not in tot_dict: 
            tot_dict[tup[0]] = []
        tot_dict[tup[0]].append(tup[2])

    for item in tot_dict.items(): 
        list_of_lists.append([item[0], ' @ '.join(item[1])])
        
    with open(file_name, mode='w', newline='') as my_file:
        file = csv.writer(my_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
        file.writerow(first_line) 
        for l in list_of_lists: 
            file.writerow(l) 
    return(file_name) 

import csv

with open('character_wd_broader.csv', mode='w', newline='') as my_file:
  char_file = csv.writer(my_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
  char_file.writerow(['Name', 'WikidataBroader']) 
  for l in final_broader: 
    char_file.writerow(l) 
    
def store_csv(file_name, first_line, list_of_lists): 
    with open(file_name, mode='w', newline='') as my_file:
    file = csv.writer(my_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
    file.writerow(first_line) 
    for l in list_of_lists: 
        file.writerow(l) 
    return(file_name)
    

In [8]:
peopleDict = open_json("people_final_match.json") 
peopleList = []
headings = ["Name", "Link"]
for tup in peopleDict.items(): 
    if tup[0].endswith(" rec"): 
        col1 = tup[0].replace(" rec", "")
        col2 = ' @ '.join(tup[1])
        peopleList.append([col1, col2])
print(peopleList)
   
char = store_csv("PeopleAligned.csv", headings , peopleList)

[['A.E. Popp', 'http://viaf.org/viaf/20715488'], ["Abu Ma'sar", 'http://viaf.org/viaf/17355947 @ http://www.wikidata.org/entity/Q11373'], ['Achilles Bocchius', 'http://viaf.org/viaf/149878 @ http://www.wikidata.org/entity/Q3604350'], ['Adam Friedrich Oeser', 'http://vocab.getty.edu/ulan/500115770 @ http://www.wikidata.org/entity/Q215129 @ http://viaf.org/viaf/74126113'], ['Agesander', 'http://www.wikidata.org/entity/Q983432 @ http://vocab.getty.edu/ulan/500021781 @ http://viaf.org/viaf/35898854'], ['Athenodoros', 'http://vocab.getty.edu/ulan/500007132 @ http://www.wikidata.org/entity/Q280432 @ http://viaf.org/viaf/42230050'], ['Polydorus', 'http://vocab.getty.edu/ulan/500031985'], ['Agnolo Bronzino', 'http://vocab.getty.edu/ulan/500004362 @ http://www.wikidata.org/entity/Q7803 @ http://viaf.org/viaf/106963584 @ http://viaf.org/viaf/59354048'], ['Albrecht Dürer', 'http://vocab.getty.edu/ulan/500115493 @ http://www.wikidata.org/entity/Q5580 @ http://viaf.org/viaf/54146999'], ['Alexandre 

## Add the reconciled terms to the triples
procedure: open the serialized file containing all the dataset as a graph. If the internal link is in the graph, then add the triples. 

In [None]:
#g = Graph()
def reconciled_triples_addition(dictionary_json, newg, graph_ttl_serialized): 
  with open(dictionary_json, mode='r', encoding="utf-8") as jsonfile:
    dictionary = json.load(jsonfile)
  newg.parse(graph_ttl_serialized, format="ttl")
  for name in dictionary: 
    if not name.endswith(" rec"): 
      if dictionary[name] in g: # if the internal link is already in the graph
        name_rec = name+" rec"
        print(dictionary[name])
        if dictionary[name_rec] != []: # if there are reconciled terms
          for reconciled in dictionary[name_rec]:
            g.add((URIRef(dictionary[name]), owl.sameAs, URIRef(reconciled)))
            print(dictionary[name], "has reconciled term", reconciled)
  return newg
  

In [None]:
g = Graph()

In [None]:
for name in peopleDictFinal: 
  if not name.endswith(" rec"): 
    if peopleDictFinal[name] in g: 
      name_rec = name+" rec"
      if peopleDictFinal[name_rec] != []: 
        for reconciled in peopleDictFinal[name_rec]:
          g.add((URIRef(peopleDictFinal[name]), owl.sameAs, URIRef(reconciled)))

In [None]:
g.serialize(destination = "People_reconciled.ttl", format="turtle")

<Graph identifier=N86d4c16a0d0942e2badb6bffad8f9ca7 (<class 'rdflib.graph.Graph'>)>

In [None]:
# test for string search efficacy

wd_query = { # nb:applying only to wikidata. Every API has its own parameters. 
        "action": "wbsearchentities",
        "format": "json",
        "language": "en",
        "search": "Bargello National Museum"
    }

API_WD = "https://www.wikidata.org/w/api.php"
r =    requests.get(API_WD, params = wd_query).json()

print(r) # json with all the classes to be filtered
# res = wikidata_reconciliation(r, endpoint, q_class)
# people = reconciliation_function(peopleDict, wd_query, wikidata_endpoint, q_class="Q5")

{'searchinfo': {'search': 'Bargello National Museum'}, 'search': [{'id': 'Q388448', 'title': 'Q388448', 'pageid': 369638, 'display': {'label': {'value': 'Bargello National Museum', 'language': 'en'}, 'description': {'value': 'Art museum in Florence, Italy', 'language': 'en'}}, 'repository': 'wikidata', 'url': '//www.wikidata.org/wiki/Q388448', 'concepturi': 'http://www.wikidata.org/entity/Q388448', 'label': 'Bargello National Museum', 'description': 'Art museum in Florence, Italy', 'match': {'type': 'label', 'language': 'en', 'text': 'Bargello National Museum'}}], 'success': 1}
