# Install and import

In [None]:
pip install rdflib

In [None]:
from rdflib import URIRef, BNode, Literal, Graph
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD
from rdflib import Namespace, Literal, XSD

import csv

import re

!pip install unidecode
from unidecode import unidecode

!pip install requests
import requests

!pip install SPARQLWrapper

from SPARQLWrapper import SPARQLWrapper, JSON
import ssl

!pip install fuzzywuzzy
!pip install python-Levenshtein

import difflib
# !pip install python-Levenshtein
# !pip install thefuzz
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
# from thefuzz import fuzz
# from thefuzz import process
import json

# Access a Google Spreadsheet and retrieve data

In [None]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

In [None]:
def convert_to_dict(file):
  "Function which converts all a spreadsheet in a dictionary. Inside it, the tabs are lists of dictionaries (every row of each sheet is a dict)"
  spreadsheet = gc.open(file)
  worksheet_list = spreadsheet.worksheets()
  nameList = []
  FileName = []
  for item in worksheet_list:
    splitted = str(item).replace("'", "").split(" ")
    name = splitted[1]
    nameList.append(name)
  filesDict = {}
  n = 0
  for name in nameList:
    x = spreadsheet.get_worksheet(n)
    name2 = x.get_all_records()
    n+=1

    filesDict[name] = name2
 # for dictionary in filesDict:

  return filesDict

all_sheets = convert_to_dict("Panofsky")
# all_sheets = convert_to_dict("StudiesInIconology")


# Separate values and clean data
1) replaces values separated by " @ " with lists of values corresponding to the specified key (es: row {quality: [beauty, red]})
2) creates keys whose values have no spaces or special characters, thus ready to be used for the creation of URIs

In [None]:

def separate_clean(data): # input: una lista di dizionari
  splitted = []

  for dictionary in data:
    newDict = {}
    for key, value in dictionary.items():
      new = str(value).split(" @ ") # str because it reads dates as integers
      new2 = []
      for item in new:
        stripped = item.strip()
        new2.append(stripped)
      newDict[key] = new2
    splitted.append(newDict)

  final = []
  for dic in splitted:
    d2 = {}
    for field, value in dic.items():
      print(value) # dic.items returns a list containing a tuple for each field-value pair
    #  if value[0] != "": # aggiungiamo solo i campi non vuoti
      d2[field] = value # clean_labels(value) # aggiunge chiavi e valori già trovati
    #    if value != text_to_uri(value): # creiamo chiavi _uri solo per i valori che hanno spazi etc
      d2[field+"_uri"] = text_to_uri(value)
    final.append(d2)
  return final

def clean_labels(value):
    new_labels = []
    for val in value:
        new_labels.append(val.strip())
    return new_labels

def text_to_uri(value):
  cleaned_values = []
  for item in value:
    new = re.sub(r"\(", "", item)
    new2 = re.sub(r"\)", "", new)
    new3 = re.sub(r"\"", "_", new2)
    cleaned_value = new3.strip().replace(" ", "-").replace(".", "").replace(",", "").replace("'", "").lower()
    final_cleaned = unidecode(cleaned_value) # sostituisce le lettere accentate con le corrispettive lettere non accentate
    cleaned_values.append(final_cleaned)
  # usa regex: tutto quello che non è alfanumerico ci fai qualcosa. es escludi anche caratteri speciali, poi mapping o li elimini
  # Regex rgx = new Regex("[^a-zA-Z0-9 -]");
  # str = rgx.Replace(str, "");
  return cleaned_values

# l'output è una lista di dizionari con le chiavi duplicate, una per le label ed una per creare gli uri



In [None]:
artworks_cleaned = separate_clean(all_sheets["Artworks"])
books_cleaned = separate_clean(all_sheets["Books"])
booksContent_cleaned = separate_clean(all_sheets["BooksContent"])
people_cleaned= separate_clean(all_sheets["People"])
type_cleaned = separate_clean(all_sheets["Type"])
typeOfRelation_cleaned = separate_clean(all_sheets["TypeOfRelation"])
style_cleaned = separate_clean(all_sheets["Style"])
places_cleaned = separate_clean(all_sheets["Places"])
interpretations_cleaned = separate_clean(all_sheets["Interpretations"])
level1_cleaned = separate_clean(all_sheets["Level1"])
formalMotifRecLevel1_cleaned = separate_clean(all_sheets["FormalMotifRecLevel1"])
level2_cleaned = separate_clean(all_sheets["Level2"])
simulation_cleaned = separate_clean(all_sheets["Simulation"])
level3_cleaned =separate_clean(all_sheets["Level3"])
levelsEntryControl_cleaned = separate_clean(all_sheets["LevelsEntryControl"])
character_wd_cleaned = separate_clean(all_sheets["character_wd_broader"])
license_cleaned = separate_clean(all_sheets["License"])

# Verify if all the terms are in the controlled lists

In [None]:
# verificare se tutti i termini delle colonne controllate sono nei controlled
control = []
for column in levelsEntryControl_cleaned:
  val = column.values()
  for value in val:
    if value != [""]:
      final_v = " ".join(value)
      if final_v not in control:
        control.append(final_v)
 # control.add(val)



In [None]:
# function to add the terms to a control list, given the empty list, the column name and the file read as a list of dict
def controlled_list(control_list, col_name, list_dict):
  values_list = [d[col_name] for d in list_dict if d[col_name] != [""]]
  for l in values_list:
    for item in l:
      if item not in control_list:
        control_list.append(item)

In [None]:
people_list = controlled_list(control, "Name", people_cleaned)

# places - DuplicatesCheck	CityOrRegion
places_list = controlled_list(control, "Institution", places_cleaned)
places_list2 = controlled_list(control, "CityOrRegion", places_cleaned)
places_list3 = controlled_list(control, "Country", places_cleaned)
# styles
style_list = controlled_list(control, "Style", style_cleaned)
period_list = controlled_list(control, "Period", style_cleaned)

# books and BooksContent- TitleForCheckingDuplicates
book_list = controlled_list(control, "TitleForCheckingDuplicates", books_cleaned)
bookContent_list = controlled_list(control, "TitleForCheckingDuplicates", booksContent_cleaned)
#
print(control)

In [None]:
# function to check if the terms are in a controlled list, given the control list, the list of the
# column names to be verified, and the file read as a list of dictionaries.

def control_terms(control_list, col_name_list, list_dict):
  for name in col_name_list:
    for dic in list_dict:
      if name in dic:
        x = dic[name]
        for value in x:
          if value != "":
            if value not in control_list:
              print(value,"in column", name, "is not controlled")



In [None]:
artwork_check = ["CityOfConservation",	"PlaceOfConservation", "Author", "RelatedBook", "Style", "Period"]
level1 = ["NaturalElement",	"ExpressionalQuality",	"Quality",	"Action",	"Style", "PersonResponsible"]
level2 = ["Character",	"SourceActor",	"Place",	"Event",	"NamedObject",	"Symbol",	"Personification",	"Story",	"Allegory",	"Invenzione", "PersonResponsible"]
level3 = ["Concept", "CulturalPhenomenon", "PersonResponsible"]

artwork_check2 = control_terms(control, artwork_check, artworks_cleaned)

In [None]:
level1_check = control_terms(control, level1, level1_cleaned)

In [None]:
level2_check = control_terms(control, level2, level2_cleaned)

Adam "holds on” to the Tree of Life as long as he is still free not to accept the fatal fruit in column Allegory is not controlled


In [None]:
level3_check = control_terms(control, level3, level3_cleaned)

# Graph Creation

In [None]:
icon = Namespace("https://w3id.org/icon/ontology/")  #ontology name
sim =  Namespace("https://w3id.org/simulation/ontology/")
crm = Namespace("http://www.cidoc-crm.org/cidoc-crm/")
d = Namespace("https://w3id.org/icon/data/")
v = Namespace("http://iconvocabulary.org/")
dul = Namespace("http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#")
foaf = Namespace("http://xmlns.com/foaf/spec/")
cito = Namespace("http://purl.org/spar/cito/")
pro = Namespace("http://purl.org/spar/pro/")
skos = Namespace("http://www.w3.org/2004/02/skos/core#")
owl = Namespace("http://www.w3.org/2002/07/owl#")
dct = Namespace("http://purl.org/dc/terms/")
dc = Namespace("http://purl.org/dc/elements/1.1/")
prov = Namespace("http://www.w3.org/ns/prov#")
dcatapit = Namespace("http://dati.gov.it/onto/dcatapit")
dcat = Namespace("http://www.w3.org/ns/dcat#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
schema = Namespace("https://schema.org/")



g = Graph() #we create the graph
g.bind("icon", icon)
g.bind("sim", sim)
g.bind("rdf", RDF)
g.bind("crm", crm)
g.bind("rdfs", RDFS)
g.bind("d", d)
g.bind("dul", dul)
g.bind("foaf", foaf)
g.bind("cito", cito)
g.bind("pro", pro)
g.bind("skos", skos)
g.bind("owl", owl)
g.bind("dct", dct)
g.bind("prov", prov)
g.bind("dcatapit", dcatapit)
g.bind("dcat", dcat)
g.bind("prov", prov)
g.bind("xsd", xsd)
g.bind("schema", schema)

print(g)

[a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory']].


## Dataset metadata



In [None]:
g.add((URIRef(d), RDF.type, URIRef(dcatapit.Dataset)))
g.add((URIRef(d), RDF.type, URIRef(dcat.Dataset)))
#g.add((URIRef(d), dct.identifier, Literal("IconDatasetSubgraph.version2")))
g.add((URIRef(d), dct.identifier, Literal("IconDataset.version2")))
g.add((URIRef(d), dct.title, Literal("Icon Dataset", lang="en")))
#g.add((URIRef(d), dct.description, Literal("Subgraph of the Iconology dataset of the interpretations extracted from 'Studies in Iconology' by Erwin.")))

g.add((URIRef(d), dct.description, Literal("Dataset of the iconographical and iconological interpretations extracted from articles and books written by Panofsky.")))
g.add((URIRef(d), dct.theme, URIRef("http://publications.europa.eu/resource/authority/data-theme/EDUC")))
g.add((URIRef(d), dct.subject, URIRef("http://vocab.getty.edu/aat/300055859")))
g.add((URIRef(d), dct.subject, URIRef("http://vocab.getty.edu/aat/300054235")))
g.add((URIRef(d), dct.subject, URIRef("http://vocab.getty.edu/aat/500216547")))
g.add((URIRef(d), dct.issued, Literal("2023-10-13", datatype=XSD.Date)))
#g.add((URIRef(d), dct.modified, Literal("2021-06-1", datatype=XSD.Date)))
g.add((URIRef(d), dct.keyword, Literal("iconology")))
g.add((URIRef(d), dct.keyword, Literal("iconography")))
g.add((URIRef(d), dct.keyword, Literal("art interpretation")))
g.add((URIRef(d), dct.keyword, Literal("art history")))
g.add((URIRef(d), dct.keyword, Literal("artworks")))
g.add((URIRef(d), owl.versionInfo, Literal("2.1.0")))
g.add((URIRef(d), dct.creator, URIRef("https://orcid.org/0000-0002-5636-8328")))
g.add((URIRef("https://orcid.org/0000-0002-5636-8328"), RDF.type, URIRef(dcatapit.Agent)))
g.add((URIRef(d), dct.distribution, URIRef(d+"TurtleDistribution")))
g.add((URIRef(d+"TurtleDistribution"), RDF.type, dcatapit.Distribution))
# g.add((URIRef(d+"TurtleDistribution"), dct.format, URIRef("http://publications.europa.eu/resource/authority/file-type/RDF_TURTLE")))
g.add((URIRef(d+"TurtleDistribution"), dct.license, URIRef("http://creativecommons.org/licenses/by-sa/4.0/")))
g.add((URIRef(d+"TurtleDistribution"), dcat.downloadURL, URIRef("https://raw.githubusercontent.com/SofiBar/IconologyDataset/main/data/icondataset.ttl")))
g.add((URIRef(d+"TurtleDistribution"), dcat.accessURL,  URIRef("https://projects.dharc.unibo.it/icondataset/sparql")))

g.add((URIRef(d), prov.wasDerivedFrom, URIRef("http://www.worldcat.org/oclc/265570808")))
g.add((URIRef(d), prov.wasDerivedFrom, URIRef("http://www.worldcat.org/oclc/217473356")))
g.add((URIRef(d), prov.wasDerivedFrom, URIRef("https://doi.org/10.2307/1522803")))
g.add((URIRef(d), prov.wasDerivedFrom, URIRef("http://www.worldcat.org/oclc/803192922")))
g.add((URIRef(d), dct.language, URIRef("http://publications.europa.eu/resource/authority/language/ENG")))

# Dictionaries made out of names and links
to be used when the vocabulary has already the final reconciled values. The values are strings

In [None]:
def name_str_link_dict(colname, collink, file, field_string):
  myDict = {}
  for line in file:
    if line[colname][0] != "":
      name = line[colname][0]
      name_uri = line[colname+"_uri"][0]
      field = field_string.lower().replace(" ", "")
      lineLink = line[collink]
      for link in lineLink:
        if link == "" or link == "no":
          newlink = "https://w3id.org/icon/data/"+field+"/"+name_uri # consider to make it a list to uniform it (once the final format is decided)
          myDict.update({name : newlink})
        elif link != "":
        # if len(lineLink) > 1:
        #    myDict.update({name : lineLink})
        # elif len(lineLink) == 1:
          myDict.update({name : link})
  return myDict


In [None]:
styleDict = name_str_link_dict("Style", "Link", style_cleaned, "style")
print(styleDict)
periodDict = name_str_link_dict("Period", "PeriodLink", style_cleaned, "period")
print(periodDict)
relDict = name_str_link_dict("TypeOfRelation", "Link", typeOfRelation_cleaned, "relation")
print(relDict)
typeDict = name_str_link_dict("CulturalObjectType", "Link", type_cleaned, "type")
print(typeDict)
materialDict = name_str_link_dict("Material", "MaterialLink", type_cleaned, "material")
print(materialDict)
licenseDict = name_str_link_dict("Name", "Link", license_cleaned, "license")
print(licenseDict)

## Multiple reconciliation
We use this section to reconclile terms to multiple terms from more than one vocabulary. For example, if we know that a person can be reconciled with Getty ULAN, VIAF, Wikidata, we adopt the following methodology.

### Dictionaries for multiple reconciliation
We insert in the vocabulary two keys: 1) one with the name and the local URI, 2) the ther with the name + " rec" having as a value a list of reconciled terms. Example: `{"Raffaello Sanzio" : "www.icondataset.org/people/raffaello-sanzio", "Raffaello Sanzio rec" : []}`. If reconciled terms are still present in the dicitonary, they are added to the " rec" key.  Then, the value of the first key will be linked to each link of the " rec" key through the relation "owl:sameAs".

In [None]:
def name_link_dict(colname, collink, file, entity_type= None):
  myDict = {}
  for line in file:
    name = line[colname][0]
    name_uri = line[colname+"_uri"][0]
    lineLink = line[collink]
    if name != "":
      if entity_type != None:
        field = entity_type.lower()
        newLink = "https://w3id.org/icon/data/"+field+"/"+name_uri
      else:
        newLink = "https://w3id.org/icon/data/"+name_uri
      myDict.update({name : newLink})
      myDict.update({name+" rec": []}) # we create keys with a list for reconciled terms
      for link in lineLink:
        if link.startswith("htt"):
          if link not in myDict[name+" rec"]:
            myDict[name+" rec"].append(link)
 # with open(entity_type+"Dict"+".json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
   # json.dump(myDict, jsonfile)
  return myDict

In [None]:
peopleDict = name_link_dict("Name", "Link", people_cleaned, "person")
print(peopleDict)

In [None]:
characterDict = name_link_dict("Character", "LinkCharacter", levelsEntryControl_cleaned)
print(characterDict)

with open("characterDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(characterDict, jsonfile)

In [None]:
naturalElementDict = name_link_dict("NaturalElement", "LinkNaturalElement", levelsEntryControl_cleaned)
print(naturalElementDict)

with open("naturalElementDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(naturalElementDict, jsonfile)

In [None]:
expressionalQualityDict = name_link_dict("ExpressionalQuality", "LinkExpressionalQuality", levelsEntryControl_cleaned)
print(expressionalQualityDict)

with open("expressionalQualityDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(expressionalQualityDict, jsonfile)

In [None]:
qualityDict = name_link_dict("Quality", "LinkQuality", levelsEntryControl_cleaned)
print(qualityDict)

with open("qualityDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(qualityDict, jsonfile)

In [None]:
actionDict = name_link_dict("Action", "LinkAction", levelsEntryControl_cleaned)
print(actionDict)

with open("actionDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(actionDict, jsonfile)

In [None]:
# place - nb: second level places!

placeDict = name_link_dict("Place", "LinkPlace", levelsEntryControl_cleaned)
print(placeDict)

with open("placeDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(placeDict, jsonfile)

In [None]:
# Event

eventDict = name_link_dict("Event", "LinkEvent", levelsEntryControl_cleaned)
print(eventDict)

with open("eventDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(eventDict, jsonfile)

In [None]:
# NamedObject

namedObjectDict = name_link_dict("NamedObject", "LinkNamedObject", levelsEntryControl_cleaned)
print(namedObjectDict)

with open("namedObjectDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(namedObjectDict, jsonfile)

In [None]:
# Symbol

symbolDict = name_link_dict("Symbol", "LinkSymbol", levelsEntryControl_cleaned)
print(actionDict)

with open("symbolDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(symbolDict, jsonfile)

In [None]:
#  Personification
personificationDict = name_link_dict("Personification", "LinkPersonification", levelsEntryControl_cleaned)
print(actionDict)

with open("personificationDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(personificationDict, jsonfile)

In [None]:
#  PartOfStory

storyDict = name_link_dict("PartOfStory", "LinkStory", levelsEntryControl_cleaned)
print(storyDict)

with open("storyDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(storyDict, jsonfile)

In [None]:
# partOfAllegory

allegoryDict = name_link_dict("partOfAllegory", "LinkAllegory", levelsEntryControl_cleaned)
print(allegoryDict)

with open("allegoryDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(allegoryDict, jsonfile)

In [None]:
#  partOfInvenzione

invenzioneDict = name_link_dict("partOfInvenzione", "LinkInvenzione", levelsEntryControl_cleaned)
print(invenzioneDict)

with open("invenzioneDict.json", mode="w", encoding="utf-8") as jsonfile: # we store all the dicts in a json to act on them in a second moment
    json.dump(invenzioneDict, jsonfile)

# Date dictionary

In [None]:
p = re.compile(r'^\d{4}$')
q = re.compile(r'^\d{3}$')
r = re.compile(r'^\d$')
dateKeys = []
dateDict = {}
convertDict = {"I":["001", "100"], "II":["101", "200"], "III":["201", "300"], "IV":["301", "400"], "V":["401", "500"], "VI":["501", "600"], "VII":["601", "700"], "VIII":["701", "800"], "IX":["801", "900"], "X":["901", "1000"], "XI":["1001", "1100"], "XII":["1101", "1200"], "XIII":["1201", "1300"], "XIV":["1301", "1400"], "XV":["1401", "1500"], "XVI":["1501", "1600"], "XVII":["1601", "1700"], "XVIII":["1701", "1800"], "XIX":["1801", "1900"], "XX":["1901", "2000"] } # utilizzato per creare una corrispondenza tra i secoli in numeri romani e la durata in numeri arabi
centuryRestriction = {"first decade":[0, 9], "second decade":[10, 19], "third decade":[20, 29], "fourth decade":[30, 39], "fifth decade":[40, 49], "sixth decade":[50, 59], "seventh decade":[60, 69], "eighth decade":[70, 79], "nineth decade":[80, 89], "tenth decade":[90, 99], "first quarter":[0, 24], "second quarter":[25, 49], "third quarter":[50, 74], "fourth quarter":[75, 99], "first half":[0, 49], "second half":[50, 99] }
# convertDict : nella prima posizione della lista c'è la data di inizio, nella seconda la data di fine
# centuryRestriction: dato che creo le date sommando i valori alla start date che è sempre il primo anno del secolo, ho tolto 1 per far venire il risultato corretto: prima decade 0-9 invece di 1-10


In [None]:
def date_dict_update(colname, data_cleaned):
  for dic in data_cleaned:
    date = dic[colname][0]
    if date != "" and date not in dateKeys:
      dateKeys.append(date)

  start = "-01-01T00:00:00"
  end = "-12-31T23:59:59"


  for date in dateKeys:
    if len(date) <= 4 and (re.search(p,date) or re.search(q,date)) : # se ci sono 4 caratteri nella stringa . devi dire che ci sono solo digit
      dateDict[date] = [date+start , date+end]
    elif len(date.split('-')) == 2: # devo sistemare i dati aggiungendo gli anni del secolo (es: non "1308-11" ma "1308-1311")
      startDate = date.split('-')[0]
      endDate = date.split('-')[1]
      dateDict[date] = [startDate+start , endDate+end]
    #before, completed, after, post
    elif date.startswith('before') or date.startswith('completed') :
      endDate = date.split(" ")[1]
      startDate = None
      dateDict[date] = [startDate, endDate+end]
    elif date.startswith('after') or date.startswith('post') :
      startDate = date.split(" ")[1]
      endDate = None
      dateDict[date] = [startDate+start , endDate]


  # centuries
    if len(date.split(" ")) > 1:
      for century in convertDict:
        first = date.split(" ")[0]
        second = date.split(" ")[1]
        if first == century: # se è una chiave del dizionario di conversione dei secoli, allora è un secolo
          dateDict[date] = [convertDict[first][0]+start , convertDict[first][1]+end]
        elif first.split("-")[0] == century: # casistica: "XI-XII Century"
          fromCentury = first.split("-")[0]
          toCentury = first.split("-")[1]
          dateDict[date] = [convertDict[fromCentury][0]+start , convertDict[toCentury][1]+end]
        elif first.startswith('Early') or date.startswith('first third of') : # ! upper and lower case differences
          for element in date.split(" "):
            if "B.C." in date.split(" "):
              if element == century:
                endDate = int(convertDict[century][0]) + 66
                dateDict[date] = [convertDict[century][1]+start, str(endDate)+end]
            else:
              if element == century:
                endDate = int(convertDict[century][1]) - 66  # rendendolo un integer posso sommare e sottrarre per avere gli anni corrispondenti
              dateDict[date] = [convertDict[century][0]+start , str(endDate)+end]
        elif first.startswith('Middle') or date.startswith('Second third of') :
          for element in date.split(" "):
            if element == century:
              startDate = int(convertDict[century][0]) + 34
              endDate = int(convertDict[century][1]) - 33
              dateDict[date] = [str(startDate)+start , str(endDate)+end]
        elif first.startswith('Late') or date.startswith('end of'):
          for element in date.split(" "):
            if element == century:
              startDate = int(convertDict[century][0]) + 67
              dateDict[date] = [str(startDate)+start , convertDict[century][1]+end]
        for restriction in centuryRestriction:
          if date.split(" of ")[0].lower() == restriction:
              for element in date.split(" "):
                if element == century: # se trova il secolo
                  startDate = int(convertDict[century][0]) + int(centuryRestriction[restriction][0])
                  endDate = int(convertDict[century][0]) + int(centuryRestriction[restriction][1])
                  dateDict[date] = [str(startDate)+start, str(endDate)+end] # difformità di lettere maiuscole e minuscole nelle chiavi


  for el in dateDict:
    if el.endswith("B.C."):
      newStart = "-"+dateDict[el][0].replace("B.C.", "").replace(" -", "-")
      newEnd = "-"+dateDict[el][1].replace("B.C.", "").replace(" -", "-")
      dateDict[el] = [newStart, newEnd]

    # add zeros to dates having less than 4 digits to adhere to the ISO standard:
  for date in dateDict:
    date_list = dateDict[date]
    for whole_date in date_list:
      if whole_date != None:
        print(whole_date)
        i = date_list.index(whole_date)
        s = whole_date.split("-")
        if s[0] == "":
          date = s[1]
        else:
          date = s[0]
        len_date = len(date)
        if len_date < 4: # dates shorter
          print(date)
          if whole_date.startswith("-"):
            whole_date2 = whole_date.replace("-", "", 1)
            new_date = "-"+str(0)*(4-len_date)+whole_date2
          else:
            new_date = str(0)*(4-len_date)+whole_date
          # replace the old date with the new one
          date_list[i] = new_date
          print(date_list)


 # actualList = dateDict.keys()
 # if actualList == dateKeys:
 #   print(yes)
 # else:
  #  print(dateKeys - actualList)



  return dateDict



# check se sono stati inseriti tutti
# da aggiungere: data a cavallo tra bc e ad


updatedWithArtwork = date_dict_update("Date",artworks_cleaned)

In [None]:
#fileBooks = open("Panofsky - Books.csv", "r")
#dataBooks = csv.DictReader(fileBooks)

updatedWithArtwork = date_dict_update("Date",artworks_cleaned)
updatedWithBook = date_dict_update("Date",books_cleaned)
updatedWithPeople = date_dict_update("Date",people_cleaned)
# updatedWithInterpretations = date_dict_update("Date", interpretations_cleaned)
print(dateDict)

# Places dictionary

In [None]:
placesDict = {}
placesDict.update({"Countries": {}, "Cities": {}, "Cities Institutions": {}})
# problema: le città omonime non sono distinte!
# struttura del dizionario: ogni chiave ha il link corrispondente.
# sono divisi per countries, cities, cities institutions. La struttura dell'ultima è cities institutions: {city1 : {abbey: [link]}}
# il link è una lista perché poi nelle triple, se si tratta di un museo o collezione, viene aggiunto in posizione 1 il link dell'istituzione che sta in quel luogo.

for line in places_cleaned:
  institution_uri = line["Institution_uri"][0]
  institutionLink = line["InstitutionLink"][0]
  institution = line["Institution"][0]
  institutionType = line["InstitutionType"][0]
  city = line["CityOrRegion"][0]
  city_uri = line["CityOrRegion_uri"][0]
  cityLink = line["CityLink"][0]
  country = line["Country"][0]
  country_uri = line["Country_uri"][0]
  countryLink = line["CountryLink"][0]

  if city !="":
    if city not in placesDict["Cities"]:
      placesDict["Cities Institutions"].update({city: {}})
    if cityLink == "" or cityLink == "no":
      placesDict["Cities"][city] = d+"place/"+city_uri
    else:
      placesDict["Cities"][city] = cityLink
  if country !="":
    if country not in placesDict["Countries"]:
      if countryLink == "" or countryLink == "no":
        placesDict["Countries"].update({country : d+"place/"+country_uri})
      else:
        placesDict["Countries"].update({country : countryLink})
  if institution !="" and city !="":
    if institution not in placesDict["Cities Institutions"][city]: # se non è già inserito: così nel file exel basta scrivere il link solo la prima volta
      if institutionLink == "" or institutionLink == "no":
        newUri = d+"place/"+city_uri+"-"+institution_uri # to distinguish institutions in different places with same name (eg. Cathedral)
        placesDict["Cities Institutions"][city][institution] = [newUri]

      else:
        placesDict["Cities Institutions"][city][institution] = [institutionLink]

   #   if institutionType == "Collection":
    #    collectionUri = d+"collection/"+city_uri+"-"+institution_uri
    #    placesDict["Cities"][city+"_institution"][institution].append(collectionUri)
    #  if institutionType == "Museum":
     #   museumUri = d+"museum/"+city_uri+"-"+institution_uri
     #   placesDict["Cities"][city+"_institution"][institution].append(museumUri)
     # if institutionType == "Building":
     #   museumUri = d+"museum/"+city_uri+"-"+institution_uri
     #   placesDict["Cities"][city+"_institution"][institution].append(museumUri)




print(placesDict)



# Books




In [None]:
for line in books_cleaned:
  ID = line["ID"][0]
  production = d+"PROD"+ID
  label = line["TitleForCheckingDuplicates"][0]
  objType = line["ObjectType"][0]
  signature	= line["Signature"][0]
  signature_uri = line["Signature_uri"][0]
  contentID = line["ContentID"][0]
  lineEditor = line["EditorOrPrinter"]
  date = line["Date"][0]
  date_uri = line["Date_uri"][0]
  dateCertainty = line["DateCertainty"][0]
  place = line["Object PlaceOfProduction"][0]
  description = line["Description"][0]
  # lineLink = line["Link"]
  # e22 production e place sono specificati nella tab artwork
  if ID != "":
    g.add((URIRef(d+ID), RDF.type, crm.E22_Human_Made_Object))
    if label !="":
      g.add((URIRef(d+ID), RDFS.label, Literal(label)))
    if objType != "":
      g.add((URIRef(d+ID), crm.P2_has_type, URIRef(typeDict[objType])))
      g.add((URIRef(typeDict[objType]), RDFS.label, Literal(objType)))
      g.add((URIRef(typeDict[objType]), crm.P2_has_type, URIRef("http://vocab.getty.edu/aat/300028051")))
    else:
      g.add((URIRef(d+ID), crm.P2_has_type, URIRef("http://vocab.getty.edu/aat/300028051")))

    if signature != "":
      g.add((URIRef(d+ID), crm.P1_is_identified_by, URIRef(d+signature_uri)))
      g.add((URIRef(d+signature_uri), RDF.type, crm.E42_Identifier))
      g.add((URIRef(d+signature_uri), RDFS.label, Literal(signature)))
    if contentID !="" and contentID  != "#N/A":
      g.add((URIRef(d+ID), crm.P128_carries, URIRef(d+contentID)))

      if date !="":
        g.add((URIRef(production), crm.P4_has_time_span, URIRef(d+date_uri)))
        if dateCertainty == "c.":
          g.add((URIRef(d+date), crm.P79_beginning_is_qualified_by, URIRef(d+"circa")))
          g.add((URIRef(d+date), crm.P80_end_is_qualified_by, URIRef(d+"circa")))
          g.add((URIRef(d+"circa"), RDFS.label, Literal("ca.")))
          g.add((URIRef(d+"circa"), RDF.type, crm.E62_String))
      if place !="":
        g.add((URIRef(production), crm.P7_took_place_at, URIRef(placesDict["Cities"][place])))
      for editor in lineEditor:
        if editor !="":
          g.add((URIRef(production), crm.P14_carried_out_by, URIRef(peopleDict[editor])))
 #   if lineAuthor =="" and title !="":
 #     g.add((URIRef(d+ID), crm.P102_has_title, URIRef(d+title_uri)))
    if description != "":
      g.add((URIRef(production), RDFS.comment, Literal(description)))

## Books content

In [None]:

for line in booksContent_cleaned:
  ID = line["ID"][0]
  label = line["TitleForCheckingDuplicates"][0]
  production = d+"CRE"+ID
  lineAuthor	= line["ObjectAuthor"]
  title	= line["Title"][0]
  title_uri	= line["Title_uri"][0]

  # lineLink = line["Link"]
  # e22 production e place sono specificati nella tab artwork
  if ID != "":
    g.add((URIRef(d+ID), RDF.type, crm.E73_Information_Object))
    g.add((URIRef(d+ID),  RDFS.label, Literal(label)))
    for author in lineAuthor:
      if author !="":
        g.add((URIRef(production), crm.P94_has_created, URIRef(d+ID)))
        g.add((URIRef(production), RDF.type, crm.E65_Creation))
        g.add((URIRef(production), crm.P14_carried_out_by, URIRef(peopleDict[author])))

    if title !="":
      g.add((URIRef(d+ID), crm.P102_has_title, URIRef(d+"title/"+title_uri)))
      g.add((URIRef(d+"title/"+title_uri),  RDF.type, crm.E35_Title))
      g.add((URIRef(d+"title/"+title_uri),  RDFS.label, Literal(title)))

# Artworks


In [None]:
for line in artworks_cleaned:
  lineID = line["ID"][0]
  ID=d+"OBJ"+lineID
  artwork = d+"ART"+lineID
  production = d+"PROD"+lineID
  certainty = line["Certainty"]
  lineTitleLabel = line["Title"]
  lineTitle = line["Title_uri"]
  # title = d+"title/"+lineTitle
  lineDateLabel = line["Date"][0]
  lineDate = line["Date_uri"][0]
  startDate = None
  endDate = None
  linePlaceLabel = line["PlaceOfConservation"][0]
  linePlace = line["PlaceOfConservation_uri"][0]
  lineCityLabel = line["CityOfConservation"][0]
  lineCity = line["CityOfConservation_uri"][0]
  lineMaterial = line["Material"]
  lineAuthor = line["Author_uri"]
  lineAuthorLabel = line["Author"]
  lineDepiction = line["ImageLink"]
  lineLicense = line["ImageLicense"]
  lineSource = line["SourceLink"]
  lineWithImage = line["PageWithImage"]
  license = line["ImageLicense"][0]
  copyright = line["ImageRights"][0]
  lineType = line["Type"]
  lineRel = line["TypeOfRelation"][0]
  relArtwork = line["RelatedArtwork"][0]
  relBook = line["RelatedBook"][0]
  relID = line["IDext"][0]
  lineSpecObj = line["SpecificLocation_uri"][0]
  lineBook = line["BookISBNorDOI"]
  lineStyle = line["Style"]
  period = line["Period"][0]
  objLabel = line["TitleForCheckingDuplicates"]
  lineBuyer = line["MadeFor-CommissionOrGift"]
  lineUri = line["URI"]


  if lineID != "" and lineRel != "detail of":
    g.add((URIRef(production), crm.P108_has_produced, URIRef(ID)))
    g.add((URIRef(production), RDF.type, crm.E12_Production))
    g.add((URIRef(ID), crm.P65_shows_visual_item, URIRef(artwork)))
    g.add((URIRef(artwork), RDF.type, icon.Artwork))
    g.add((icon.Artwork, RDFS.label, Literal("Artwork")))
    if lineType == "building" or lineType == "building(church)":
      g.add((URIRef(ID), RDF.type, crm.E24_Physical_Human_Made_Thing))
    if lineRel == "feature of":
      g.add((URIRef(ID), RDF.type, crm.E25_Human_Made_Feature))
    else:
      g.add((URIRef(ID), RDF.type, crm.E22_Human_Made_Object))

# feature and detailed of modeled in the same way, with the difference that detail of doesn't have its own production

  if lineRel == "detail of": # we don't add a creation for the details of wider artworks, but we create a visual object for them to which the interpretation is linked
    g.add((URIRef(ID), RDF.type, crm.E25_Human_Made_Feature))
    if lineSpecObj !="":
      g.add((URIRef(d+ID+"-"+lineSpecObj), crm.P56_bears_feature, URIRef(ID)))
    else:
      g.add((URIRef(d+relID), crm.P56_bears_feature, URIRef(ID)))
    g.add((URIRef(ID), crm.P65_shows_visual_item, URIRef(artwork)))
    g.add((URIRef(artwork), RDF.type, icon.Artwork))

  if len(objLabel) == 1:
    if objLabel != "":
      g.add((URIRef(ID), RDFS.label, Literal(objLabel[0])))
  elif len(objLabel) > 1:
    label = ", ".join(objLabel)
    g.add((URIRef(ID), RDFS.label, Literal(label)))




# title
  if lineTitle != [""]:
    if len(lineTitle) == 1:
      title = d+"title/"+lineTitle[0]
      titleLabel = lineTitleLabel[0]
    elif len(lineTitle) > 1:
      title = d+"title/"+"-".join(lineTitle)
      titleLabel = ", ".join(lineTitleLabel)

    g.add((URIRef(ID), crm.P102_has_title, URIRef(title)))
    g.add((URIRef(title), RDFS.label, Literal(titleLabel)))
    g.add((URIRef(title), RDF.type, crm.E35_Title))

 #author
  for author in lineAuthorLabel:
    if author:
      g.add((URIRef(production), crm.P14_carried_out_by, URIRef(peopleDict[author])))
     # g.add((URIRef(peopleDict[author]), RDFS.label, Literal(author)))
      if certainty[0] == "?":
        g.add((URIRef(production+"assignment"), RDF.type, crm.E13_Attribute_Assignment))
        g.add((URIRef(production+"assignment"), crm.P140_assigned_attribute_to, URIRef(production)))
        g.add((URIRef(production+"assignment"), crm.P141_assigned, URIRef(peopleDict[author])))
        g.add((URIRef(production+"assignment"), RDFS.comment,  Literal("uncertain author attribution")))
        if len(certainty) > 1 and certainty[1] !="":
          g.add((URIRef(production+"assignment"), crm.P14_carried_out_by, URIRef(peopleDict[certainty[1]])))
        # !! non c'è un termine per esprimere incertezza di attribuzione in lod nei Getty: si esprime con relazioni del CONA (attributed to, probably of) che non sono ancora nell'ontologia. per info: https://www.getty.edu/research/tools/vocabularies/cona_intro.pdf , https://www.getty.edu/research/tools/vocabularies/guidelines/cona_3_6_1_work_type.html#3_6_1_16
  # buyer
  for buyer in lineBuyer:
    if buyer !="":
      g.add((URIRef(production), crm.P17_was_motivated_by, URIRef(peopleDict[buyer])))

 # picture
  for picture in lineDepiction:
    if picture != "" and picture != "no" and picture != "?":
      g.add((URIRef(ID), foaf.depiction, URIRef(picture)))
      if license != "" and license != "?" and license != "no":
        if license in licenseDict:
          g.add((URIRef(picture), dct.license, URIRef(licenseDict[license])))
        else:
          g.add((URIRef(picture), dct.license, Literal(license)))
      for page in lineWithImage:
        if page != "" and page != "?" and page !="no":
          g.add((URIRef(picture), dc.source, URIRef(page)))
      if copyright != "":
        g.add((URIRef(picture), dct.rights, Literal(copyright)))



  for type in lineType:
    if type != "":
      if isinstance(typeDict[type], list):
          for t in typeDict[type]:
            g.add((URIRef(ID), crm.P2_has_type, URIRef(t)))
            g.add((URIRef(t), RDFS.label, Literal(type)))
      else:
        g.add((URIRef(ID), crm.P2_has_type, URIRef(typeDict[type])))
        g.add((URIRef(typeDict[type]), RDFS.label, Literal(type)))

# place crm.P53_has_former_or_current_location
  if linePlaceLabel != "" and lineCityLabel !="":
    for key in placesDict["Cities Institutions"][lineCityLabel]:
      if linePlaceLabel.lower() == key.lower():
        g.add((URIRef(ID), crm.P53_has_former_or_current_location, URIRef(placesDict["Cities Institutions"][lineCityLabel][key][0])))
        # g.add((URIRef(placesDict["Cities Institutions"][lineCityLabel][key][0]), RDFS.label, Literal(linePlaceLabel)))
  elif lineCityLabel != "" and linePlaceLabel == "":
    if lineCityLabel in placesDict["Cities"]:
      g.add((URIRef(ID), crm.P53_has_former_or_current_location, URIRef(placesDict["Cities"][lineCityLabel])))
      # g.add((URIRef(placesDict["Cities"][lineCity]), RDFS.label, Literal(lineCity)
    if lineCityLabel in placesDict["Countries"]:
      g.add((URIRef(ID), crm.P53_has_former_or_current_location, URIRef(placesDict["Countries"][lineCityLabel])))

  # material
  for material in lineMaterial:
    if material !="":
      g.add((URIRef(ID), crm.P45_consists_of, URIRef(materialDict[material])))
      g.add((URIRef(materialDict[material]), RDF.type, crm.E57_Material))
      g.add((URIRef(materialDict[material]), RDFS.label, Literal(material)))


  # book of the Panofsky bibliography
  for book in lineBook:
    if book != "":
      b = book.strip()
      g.add((URIRef(ID), crm.P67_refers_to, URIRef(b)))
      g.add((URIRef(b), RDF.type, crm.E73_Information_Object))


  # style
  for style in lineStyle:
    if style !="":
      g.add((URIRef(artwork), crm.P2_has_type, URIRef(styleDict[style])))
      g.add((URIRef(styleDict[style]), RDFS.label, Literal(style)))
      g.add((URIRef(styleDict[style]), crm.P2_has_type, URIRef("http://vocab.getty.edu/aat/300015646")))

      g.add((URIRef("http://vocab.getty.edu/aat/300015646"), RDFS.label, Literal("Styles and Periods")))

  # period

  if period !="":
    g.add((URIRef(production), crm.P10_falls_within, URIRef(periodDict[period])))
    g.add((URIRef(periodDict[period]), RDFS.label, Literal(period)))
    g.add((URIRef(periodDict[period]), RDF.type, crm.E4_Period))


  # page source - rdfs:seeAlso
  for source in lineSource:
    if source != "" and source != "no":
      g.add((URIRef(ID), foaf.page, URIRef(source)))


  for img in lineWithImage:
    if img != "" and img != "no":
      g.add((URIRef(ID), RDFS.seeAlso, URIRef(img)))

  # page with image when we do not have image uri- rdfs:seeAlso ?
  for uri in lineUri:
    if uri != "":
      g.add((URIRef(ID), owl.sameAs, URIRef(uri)))

# related object
  if lineRel != "":
    if relArtwork !="":
      related = d+"OBJ"+relID

      if lineRel == "pendant of":
        whole_art = d+"OBJ"+lineID+"-"+relID
        g.add((URIRef(whole_art), RDF.type, crm.E22_Human_Made_Object))
        g.add((URIRef(whole_art), crm.P46_is_composed_of, URIRef(ID)))
        g.add((URIRef(whole_art), crm.P46_is_composed_of, URIRef(related)))
        g.add((URIRef(whole_art), crm.P2_has_type, URIRef("http://vocab.getty.edu/aat/300179422")))
        g.add((URIRef("http://vocab.getty.edu/aat/300179422"), RDFS.label, Literal("Pendants")))

      g.add((URIRef(related), RDF.type, crm.E22_Human_Made_Object))

    elif relBook !="":
      related = d+relID
      bookProduction = d+"PROD"+relID
      g.add((URIRef(related), RDF.type, crm.E22_Human_Made_Object))
      g.add((URIRef(bookProduction), crm.P108_has_produced, URIRef(related)))
      g.add((URIRef(bookProduction), RDF.type, crm.E12_Production))
      if lineDateLabel != "":
        g.add((URIRef(bookProduction), crm.P4_has_time_span, URIRef(d+lineDate))) # i dettagli verranno aggiunti sotto: stessa data sia per libro che per opera
    if lineSpecObj != "":
      g.add((URIRef(ID), URIRef(relDict[lineRel]), URIRef(related+"-"+lineSpecObj))) # specObj, anche se generico, viene legato all'oggetto cui si riferisce
      g.add((URIRef(related+"-"+lineSpecObj), URIRef(relDict["feature of"]), URIRef(related)))
    else:
      g.add((URIRef(ID), URIRef(relDict[lineRel]), URIRef(related)))


  # date
  if lineDateLabel != "":
    g.add((URIRef(production), crm.P4_has_time_span, URIRef(d+lineDate)))
    g.add((URIRef(d+lineDate), RDF.type, crm.E52_Time_Span))

  if certainty == "c.":
    g.add((URIRef(d+lineDate), crm.P79_beginning_is_qualified_by, URIRef(d+"circa")))
    g.add((URIRef(d+lineDate), crm.P80_end_is_qualified_by, URIRef(d+"circa")))
    g.add((URIRef(d+"circa"), RDFS.label, Literal("ca.")))
    g.add((URIRef(d+"circa"), RDF.type, crm.E62_String))



# print(g.serialize(format="turtle").decode(UTF-8))
#g.serialize(destination = "OutputPanofskyArtwork.ttl", format="turtle")

# Dictionary of artwork components

We create an artwork dictionary in which the uri of artistic motif, composition, and image are given for each artwork. This way, whenever an object of a given type with a given number of a given artwork is to be taken, it will already have the identifying link in the dictionary. The count in the columns of the csv file corresponds to the key in each artwork's internal dictionary.
Struttura: `dizionario_opere = {artwork1 : {"artisticMotif": {1 : www.dataset.org/artwork1-am1, etc}, "Composition": {}, "Image" : {}, "Intrinsic": {}}}`

If interpretation is present, we register the respective am, comp, or image with the interpretation in the ID (value), and the key will be the interpretation number+"-"+object number. For example these are the artistic motifs of interpretation 1 and 2 of ART2176:  `'1-1': 'https://w3id.org/icon/data/ART1276-1-AM1', '2-1': 'https://w3id.org/icon/data/ART1276-2-AM1'`

In [None]:
artworkDict = {}

for line in artworks_cleaned:
  lineArtwork = line["ID"][0]
  artwork = "ART"+lineArtwork
  if not artwork.endswith("ext"):
    artworkDict.update({artwork : {"ArtisticMotif": {}, "Composition": {}, "Image":{}, "Intrinsic":{}}})
print(artworkDict)

{'ART1000': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1001': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1002': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1003': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1004': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1005': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1006': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1007': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1008': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1009': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1010': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'ART1011': {'ArtisticMotif': {}, 'Composition': {}, 'Image': {}, 'Intrinsic': {}}, 'AR

In [None]:

'''
for line in level1_cleaned:
  lineArtwork = line["Artwork"][0]
  lineAM = line["ArtisticMotifCount"][0]
  lineComp = line["Composition"][0]
  am = d+lineArtwork+"-"+"AM"+str(lineAM)
  comp = d+lineArtwork+"-"+"COMP"+str(lineComp)
  if lineComp == "":
    artworkDict[lineArtwork]["ArtisticMotif"][lineAM] = am
  elif lineComp != "":
    artworkDict[lineArtwork]["Composition"][lineComp] = comp

print(artworkDict)
'''

In [None]:


for line in level1_cleaned:
  lineArtwork = line["Artwork"][0]
  lineAM = line["ArtisticMotifCount"][0]
  lineComp = line["Composition"][0]
  interLine = line["Interpretation"]
  for inter in interLine:
    if inter != "":
     # print("Interpretation: ", inter, "Artwork: ", lineArtwork)
      if lineAM != "":
        am_id = inter+"-"+lineAM
        am = d+lineArtwork+"-"+inter+"-"+"AM"+str(lineAM)
      #  print("am with interpretation: ", am_id, am)
      if lineComp != "":
        comp_id = inter+"-"+lineComp
        comp = d+lineArtwork+"-"+inter+"-"+"COMP"+str(lineComp)
       # print("comp with interpretation: ", comp_id, comp)

    else:
      am_id = lineAM
      # print("am without interpretation: ", am_id)
      comp_id = lineComp
     # print("am without interpretation: ", comp_id)
      am = d+lineArtwork+"-"+"AM"+str(lineAM)
      comp = d+lineArtwork+"-"+"COMP"+str(lineComp)

    if lineComp == "":
      artworkDict[lineArtwork]["ArtisticMotif"][am_id] = am
     # print(lineArtwork, inter, am_id, am)
    elif lineComp != "":
      artworkDict[lineArtwork]["Composition"][comp_id] = comp
     # print(lineArtwork, inter, comp_id, comp)



In [None]:
'''
for line in level2_cleaned:
  lineArtwork = line["Artwork"][0]
  lineImg = line["Count"][0]
  img = d+lineArtwork+"-"+"IMG"+str(lineImg)
  artworkDict[lineArtwork]["Image"].update({lineImg : img})
'''


In [None]:
for line in level2_cleaned:
  lineArtwork = line["Artwork"][0]
  lineImg = line["Count"][0]
 # img = d+lineArtwork+"-"+"IMG"+str(lineImg)

  interLine = line["Interpretation"]
  #print("Artwork: ", lineArtwork, lineImg, ", Img: ", lineImg, "Interpr: ", str(interLine))
  for inter in interLine:
    if inter != "":
      img_id = inter+"-"+lineImg
      img = d+lineArtwork+"-"+inter+"-"+"IMG"+str(lineImg)
    else:
      img_id = lineImg
      img = d+lineArtwork+"-"+"IMG"+str(lineImg)
    artworkDict[lineArtwork]["Image"].update({img_id : img})


for art in artworkDict:
  see_img = artworkDict[art]["Image"]
  if see_img != {}:
    print(see_img)

In [None]:
for line in level3_cleaned:
  lineArtwork = line["Artwork"][0]
  intrinsic = line["Count"][0]
  #intr = d+lineArtwork+"-"+"INTRINSIC"+str(intrinsic)
  #artworkDict[lineArtwork]["Intrinsic"].update({intrinsic : intr})

  interLine = line["Interpretation"]
  # print("Artwork: ", lineArtwork, "intrinsic: ", intrinsic, ", Interpr: ", str(interLine))
  for inter in interLine:
    if inter != "":
      intr_id = inter+"-"+intrinsic
      intr = d+lineArtwork+"-"+inter+"-"+"INTRINSIC"+str(intrinsic)
    else:
      intr_id = intrinsic
      intr = d+lineArtwork+"-"+"INTRINSIC"+str(intrinsic)
    artworkDict[lineArtwork]["Intrinsic"].update({intr_id : intr})


for art in artworkDict:
  see_img = artworkDict[art]["Intrinsic"]
  if see_img != {}:
    print(see_img)



# Dizionario fenomeni

In [None]:
phenomenaDict = {} # key = label, value = id
for line in levelsEntryControl_cleaned:
  phenomenonLabel = line["CulturalPhenomenon"][0]
  phenomenonID = line["CulturalPhenomenonID"][0]
  if phenomenonLabel !="" and phenomenonID !="":
    phenomenaDict[phenomenonLabel] = d+phenomenonID
print(phenomenaDict)

# Level 1

In [None]:
for line in level1_cleaned:
  count = str(line["ArtisticMotifCount"][0])
  lineComposition = line["Composition"][0]
  lineCompByAm = line["ComposedByArtisticMotif"]
  artwork = line["Artwork"][0]
  lineInterpretation = line["Interpretation"]
  text = line["LinguisticObject"][0]


  naturalElement = line["NaturalElement_uri"][0]
  naturalElementLabel = line["NaturalElement"][0]
  expressionalQuality = line["ExpressionalQuality_uri"][0]
  expressionalQualityLabel = line["ExpressionalQuality"][0]
  lineQuality = line["Quality_uri"] # la quality di dul
  lineQualityLabel = line["Quality"]
  action = line["Action_uri"][0]
  actionLabel = line["Action"][0]
  compStructure = line["CompositionalStructure_uri"][0]
  compStructureLabel = line["CompositionalStructure"][0]
  linguisticObject = line["LinguisticObject_uri"][0]
  linguisticObjectLabel = line["LinguisticObject"][0]
  lineStyle = line["Style"]
  lineEvidence = line["Evidence_uri"]
  lineEvidenceLabel = line["Evidence"]
  evidenceSpecificLocation = line["EvidenceSpecificLocation_uri"][0]
  evidenceSpecificLocationLabel = line["EvidenceSpecificLocation"][0]
  linePerson = line["PersonResponsible_uri"]
  linePersonLabel = line["PersonResponsible"] # sarà da aggiungere l'uri nel dizionario delle persone
  lineReference = line["InformationObject"]

  for interpretation in lineInterpretation:
    if interpretation !="":
     # interpr_id = d+"rec"+artwork+"-"+interpretation
      preicRec = artwork+"-"+"PREICREC"+interpretation+"-"+count
      preicDesc = artwork+"-"+"DESC"+interpretation
     # g.add((URIRef(d+preicRec), crm.P9_consists_of, URIRef(interpr_id)))
     # g.add((URIRef(interpr_id), RDF.type, icon.Recognition))
      if lineComposition != "":
        comp_id = interpretation+"-"+lineComposition
        for CompByAm in lineCompByAm:
          if CompByAm != "":
            CompByAm_id = interpretation+"-"+CompByAm
      elif lineComposition == "":
        count_id = interpretation+"-"+count

    else:
      preicRec = artwork+"-"+"PREICREC"+count
      preicDesc = artwork+"-"+"DESC" # per individuare i contesti dell'interpretazione: aggiungere un ID alle fonti ed usarlo per creare il nome della fonte
      if lineComposition != "":
        comp_id = lineComposition
        for CompByAm in lineCompByAm:
            if CompByAm != "":
              CompByAm_id = CompByAm
      elif lineComposition == "":
        count_id = count

    g.add((URIRef(d+preicRec), icon.aboutWorkOfArt, URIRef(d+artwork)))
    g.add((URIRef(d+preicRec), RDF.type, icon.PreiconographicalRecognition))
    g.add((icon.PreiconographicalRecognition, RDFS.label, Literal("Preiconographical Recognition")))
  #  g.add((URIRef(d+preicRec), icon.isListedIn, URIRef(d+preicDesc)))
    g.add((URIRef(d+preicRec), icon.preiconographicallyCompliesWith, URIRef(d+preicDesc)))
    g.add((URIRef(d+preicDesc), RDF.type, icon.InterpretationDescription))
    g.add((icon.InterpretationDescription, RDFS.label, Literal("Interpretation Description")))


    if lineComposition != "":
      comp = artworkDict[artwork]["Composition"][comp_id]
      g.add((URIRef(comp), RDF.type, icon.Composition))
      g.add((icon.Composition, RDFS.label, Literal("Composition")))
      g.add((URIRef(d+preicRec), icon.recognizedComposition, URIRef(comp)))
      for CompByAm in lineCompByAm:
        if CompByAm != "":
          if CompByAm_id not in artworkDict[artwork]["ArtisticMotif"]:
            print(artwork, "composition",comp_id,"has not", CompByAm_id)
          else:
            artmotif = artworkDict[artwork]["ArtisticMotif"][CompByAm_id]
            g.add((URIRef(comp), icon.hasPart, URIRef(artmotif)))
      if compStructure != "":
        g.add((URIRef(comp), icon.hasCompositionalStructure, URIRef(d+compStructure)))
        g.add((URIRef(d+compStructure), RDF.type, icon.CompositionalStructure))
        g.add((icon.CompositionalStructure, RDFS.label, Literal("Compositional Structure")))
        g.add((URIRef(d+compStructure), RDFS.label, Literal(compStructureLabel)))

    elif lineComposition == "":
      item = artworkDict[artwork]["ArtisticMotif"][count_id]
      g.add((URIRef(item), RDF.type, icon.ArtisticMotif))
      g.add((icon.ArtisticMotif, RDFS.label, Literal("Artistic Motif")))
      g.add((URIRef(d+preicRec), icon.recognizedArtisticMotif, URIRef(item)))

      if naturalElement != "":
        g.add((URIRef(item), icon.hasFactualMeaning, URIRef(d+naturalElement)))
        g.add((URIRef(d+naturalElement), RDF.type, icon.NaturalElement))
        g.add((icon.NaturalElement, RDFS.label, Literal("Natural Element")))
        g.add((URIRef(d+naturalElement), RDFS.label, Literal(naturalElementLabel)))
        g.add((URIRef(d+artwork), icon.preiconographicallyDepicts, URIRef(d+naturalElement)))
      if expressionalQuality != "":
        g.add((URIRef(item), icon.hasExpressionalMeaning, URIRef(d+expressionalQuality)))
        g.add((URIRef(d+expressionalQuality), RDF.type, icon.ExpressionalQuality))
        g.add((icon.ExpressionalQuality, RDFS.label, Literal("Expressional Quality")))
        g.add((URIRef(d+expressionalQuality), RDFS.label, Literal(expressionalQualityLabel)))
        g.add((URIRef(d+artwork), icon.preiconographicallyDepicts, URIRef(d+expressionalQuality)))
      n = 0
      for quality in lineQuality:
        if quality != "":
          g.add((URIRef(item), dul.hasQuality, URIRef(d+"quality/"+quality)))
          g.add((URIRef(d+"quality/"+quality), RDF.type, dul.Quality))
          g.add((URIRef(d+"quality/"+quality), RDFS.label, Literal(lineQualityLabel[n])))
          n+=1

      if action != "":
        g.add((URIRef(item), icon.hasFactualMeaning, URIRef(d+action)))
        g.add((URIRef(d+action), RDF.type, icon.Action))
        g.add((icon.Action, RDFS.label, Literal("Action")))
        g.add((URIRef(d+action), RDFS.label, Literal(actionLabel)))
        g.add((URIRef(d+artwork), icon.preiconographicallyDepicts, URIRef(d+action)))
  # add text and style
      if linguisticObject != "":
        g.add((URIRef(item), crm.P138_represents, URIRef(d+"linguistic-obj/"+linguisticObject)))
        g.add((URIRef(d+"linguistic-obj/"+linguisticObject), RDF.type, crm.E33_Linguistic_Object))
        g.add((crm.E33_Linguistic_Object, RDFS.label, Literal("Linguistic Object")))
        g.add((URIRef(d+"linguistic-obj/"+linguisticObject), RDFS.label, Literal(linguisticObjectLabel)))
        g.add((URIRef(d+artwork), icon.preiconographicallyDepicts, URIRef(d+"linguistic-obj/"+linguisticObject)))
      for style in lineStyle:
        if style !="":
          g.add((URIRef(item), crm.P2_has_type, URIRef(styleDict[style])))
          g.add((URIRef(styleDict[style]), RDFS.label, Literal(style)))
          g.add((URIRef(styleDict[style]), crm.P2_has_type, URIRef("http://vocab.getty.edu/aat/300015646")))
          g.add((URIRef("http://vocab.getty.edu/aat/300015646"), RDFS.label, Literal("Style")))

    for person in linePersonLabel:
      if person != "":
        g.add((URIRef(d+preicRec), crm.P14_carried_out_by, URIRef(peopleDict[person])))
    for evid in lineEvidence:
      if evid !="":
        if "http" in str(evid):
          i = lineEvidence.index(evid)
          evidence = lineEvidenceLabel[i]
        elif "art" in str(evid):
          evidence = d+evid.upper()
        else:
          evidence = d+evid
        if evidenceSpecificLocation !="":
          if "http" not in str(evidenceSpecificLocation):
            g.add((URIRef(d+preicRec), cito.citesAsEvidence, URIRef(evidence+"-"+evidenceSpecificLocation)))
            g.add((URIRef(evidence), crm.P106_is_composed_of, URIRef(evidence+"-"+evidenceSpecificLocation)))
            g.add((URIRef(evidence+"-"+evidenceSpecificLocation), RDFS.label, Literal(evidenceSpecificLocationLabel)))
          else:
            g.add((URIRef(evidence), crm.P106_is_composed_of, URIRef(evidenceSpecificLocation)))
            g.add((URIRef(d+preicRec), cito.citesAsEvidence, URIRef(evidenceSpecificLocation)))
        g.add((URIRef(d+preicRec), cito.citesAsEvidence, URIRef(evidence)))
    for info in lineReference:
      if info !="":
        g.add((URIRef(d+preicRec), cito.citesForInformation, URIRef(info)))
        g.add((URIRef(info), RDF.type, crm.E73_Information_Object))

# idea: aggiungere colonna type dove si indica, se presente, il link AAT con la voce del tipo di oggetto corrispondente es: cloth
# print(g.serialize(format="turtle").decode(UTF-8))
# g.serialize(destination = "lev1.ttl", format="turtle")



## Motif Recognition

In [None]:
#  motif recognition
for line in formalMotifRecLevel1_cleaned:
  count = str(line["Count"][0])
  fromAm = line["ArtisticMotifCount1"][0]
  fromComp = line["CompositionCount1"][0]
  copiedByAm = line["ArtisticMotifCount2"][0]
  copiedByComp = line["CompositionCount2"][0]
  fromArtwork = line["fromArtwork1"][0]
  copiedByArtwork = line["toArtwork2"][0]
  lineSupportTo = line["GivesSupportToInterpretation"]

  motifRec = copiedByArtwork+"-"+"MOTIFREC"+count
  #preicDesc1 = fromArtwork+"-"+"PREICDESC"
  preicDesc2 = copiedByArtwork+"-"+"DESC" # lo inseriamo nella preicdesc? per ora sì

  lineEvidence = line["Evidence_uri"]
  lineEvidenceLabel = line["Evidence"]
  evidenceSpecificLocation = line["EvidenceSpecificLocation_uri"][0]
  evidenceSpecificLocationLabel = line["EvidenceSpecificLocation"][0]
  linePerson = line["PersonResponsible_uri"]
  linePersonLabel = line["PersonResponsible"] # sarà da aggiungere l'uri nel dizionario delle persone
  lineReference = line["InformationObject_uri"]
  lineReferenceLabel = line["InformationObject"]

 # g.add((URIRef(d+motifRec), icon.aboutWorkOfArt, URIRef(d+fromArtwork)))
  g.add((URIRef(d+motifRec), icon.aboutWorkOfArt, URIRef(d+copiedByArtwork)))
  g.add((URIRef(d+motifRec), RDF.type, icon.FormalMotifRecognition))
  g.add((icon.FormalMotifRecognition, RDFS.label, Literal("Formal Motif Recognition")))
  # g.add((URIRef(d+preicDesc1), RDF.type, icon.PreiconographicalDescription))
  g.add((URIRef(d+motifRec), icon.preiconographicallyCompliesWith, URIRef(d+preicDesc2)))
  g.add((URIRef(d+preicDesc2), RDF.type, icon.InterpretationDescription))

  if fromAm != "":
    if fromAm not in artworkDict[fromArtwork]["ArtisticMotif"]:
      print("not in artworkdict: ", fromArtwork, fromAm)
    else:
      myAm = artworkDict[fromArtwork]["ArtisticMotif"][fromAm]
      g.add((URIRef(d+motifRec), icon.hasPrototypicalMotif, URIRef(myAm)))
      g.add((URIRef(myAm), RDF.type, icon.ArtisticMotif))
  if fromComp != "":
    if fromComp not in artworkDict[fromArtwork]["Composition"]:
      print(fromArtwork, "has not composition", fromComp)
    else:
      myComp = artworkDict[fromArtwork]["Composition"][fromComp]
      g.add((URIRef(d+motifRec), icon.hasPrototypicalMotif, URIRef(myComp)))
      g.add((URIRef(myComp), RDF.type, icon.Composition))
  if copiedByAm != "":
    if copiedByAm not in artworkDict[copiedByArtwork]["ArtisticMotif"]:
      print("not in artworkdict: ", copiedByArtwork, copiedByAm)
    else:
      byAm = artworkDict[copiedByArtwork]["ArtisticMotif"][copiedByAm]
      g.add((URIRef(d+motifRec), icon.hasCopiedMotif, URIRef(byAm)))
      g.add((URIRef(byAm), RDF.type, icon.ArtisticMotif))
  if copiedByComp != "":
    if copiedByComp not in artworkDict[copiedByArtwork]["Composition"]:
      print(copiedByArtwork, "has not composition", copiedByComp)
    else:
      byComp = artworkDict[copiedByArtwork]["Composition"][copiedByComp]
    g.add((URIRef(d+motifRec), icon.hasCopiedMotif, URIRef(byComp)))
    g.add((URIRef(byComp), RDF.type, icon.Composition))


  for support in lineSupportTo:
    if support !="":
      g.add((URIRef(d+motifRec), cito.givesSupportTo, URIRef(d+support)))

  for person in linePersonLabel:
    if person !="":
      g.add((URIRef(d+motifRec), crm.P14_carried_out_by, URIRef(peopleDict[person])))
  for evid in lineEvidence:
    if evid !="":
      if "http" in str(evid):
        i = lineEvidence.index(evid)
        evidence = lineEvidenceLabel[i]
      elif "art" in str(evid):
        evidence = d+evid.upper()
      else:
        evidence = d+evid
      if evidenceSpecificLocation !="":
        if "http" not in str(evidenceSpecificLocation):
          g.add((URIRef(d+motifRec), cito.citesAsEvidence, URIRef(evidence+"-"+evidenceSpecificLocation)))
          g.add((URIRef(evidence+"-"+evidenceSpecificLocation), RDFS.label, Literal(evidenceSpecificLocationLabel)))
          g.add((URIRef(evidence), crm.P106_is_composed_of, URIRef(evidence+"-"+evidenceSpecificLocation)))
        else:
          g.add((URIRef(evidence), crm.P106_is_composed_of, URIRef(evidenceSpecificLocation)))
          g.add((URIRef(d+motifRec), cito.citesAsEvidence, URIRef(evidenceSpecificLocation)))

      g.add((URIRef(d+motifRec), cito.citesAsEvidence, URIRef(evidence)))

  for info in lineReference:
    if info !="":
      g.add((URIRef(d+motifRec), cito.citesForInformation, URIRef(info)))
      g.add((URIRef(info), RDF.type, crm.E73_Information_Object))

# print(g.serialize(format="turtle").decode(UTF-8))
# g.serialize(destination = "Output7.ttl", format="turtle")

# Level 2

In [None]:
for line in level2_cleaned:
  count = str(line["Count"][0])
  countAm = line["ArtisticMotif"][0]
  countComp = line["Composition"][0]
  artwork = line["Artwork"][0]
  lineRecAttribute = line["RecognizingAttribute"]
  #image = artwork+"-"+"IMG"+count

  character = line["Character_uri"][0]
  characterLabel = line["Character"][0]
  actor = line["SourceActor"][0]
  place = line["Place_uri"][0]
  placeLabel = line["Place"][0]
  event = line["Event_uri"][0]
  eventLabel = line["Event"][0]
  namedObject = line["NamedObject_uri"][0]
  namedObjectLabel = line["NamedObject"][0]
  externalObj = line["ExternalObjects"][0]
  symbol = line["Symbol_uri"][0]
  symbolLabel = line["Symbol"][0]
  personification = line["Personification_uri"][0]
  personificationLabel = line["Personification"][0]
  lineStory = line["Story_uri"]
  lineStoryLabel = line["Story"]
  lineAllegory = line["Allegory_uri"]
  lineAllegoryLabel = line["Allegory"]
  lineInvenzione = line["Invenzione_uri"]
  lineInvenzioneLabel = line["Invenzione"]
  roleLabel = line["Role"][0]
  role = line["Role_uri"][0]
  lineSupportTo = line["GivesSupportToInterpretation"]
  lineEvidence = line["Evidence_uri"]
  lineEvidenceLabel = line["Evidence"]
  evidenceSpecificLocation = line["EvidenceSpecificLocation_uri"][0]
  evidenceSpecificLocationLabel = line["EvidenceSpecificLocation"][0]
  linePerson = line["PersonResponsible_uri"]
  linePersonLabel = line["PersonResponsible"] # sarà da aggiungere l'uri nel dizionario delle persone
  lineReference = line["InformationObject"]
  lineInterpretation = line["Interpretation"]

  for interpretation in lineInterpretation:
    if interpretation !="":

     # interpr_id = d+"rec"+artwork+"-"+interpretation
      icRec = artwork+"-"+"ICREC"+interpretation+"-"+count
      icDesc = artwork+"-"+"DESC"+interpretation
      image_id = interpretation+"-"+count
      image = artworkDict[artwork]["Image"][image_id]
     # g.add((URIRef(d+icRec), crm.P9_consists_of, URIRef(interpr_id)))
     # g.add((URIRef(interpr_id), RDF.type, icon.Recognition))
     # g.add((icon.Recognition, RDFS.label, Literal("Recognition")))
      if countAm != "" and countAm != "no":
        countAm_id = interpretation+"-"+countAm
      if countComp != "" and countComp != "no":
        countComp_id = interpretation+"-"+countComp
      for recAttribute in lineRecAttribute:
        if recAttribute !="":
          recAttribute_id = interpretation+"-"+recAttribute

    else:
      icRec = artwork+"-"+"ICREC"+count
      icDesc = artwork+"-"+"DESC"

      image = artworkDict[artwork]["Image"][count]
      if countAm != "" and countAm != "no":
        countAm_id = countAm
      if countComp != "" and countComp != "no":
        countComp_id = countComp
      for recAttribute in lineRecAttribute:
        if recAttribute !="":
          recAttribute_id = recAttribute


    g.add((URIRef(d+icRec), icon.aboutWorkOfArt, URIRef(d+artwork)))
    g.add((URIRef(d+icRec), RDF.type, icon.IconographicalRecognition))
    g.add((icon.IconographicalRecognition, RDFS.label, Literal("Iconographical Recognition")))
    g.add((URIRef(d+icRec), icon.iconographicallyCompliesWith, URIRef(d+icDesc)))
    g.add((URIRef(d+icDesc), RDF.type, icon.InterpretationDescription))
    g.add((icon.InterpretationDescription, RDFS.label, Literal("Interpretation Description")))
    if countAm != "" and countAm != "no":
      if countAm_id not in artworkDict[artwork]["ArtisticMotif"]:
        print(artwork, "has not am ", countAm)
      else:
        am = artworkDict[artwork]["ArtisticMotif"][countAm_id]
        g.add((URIRef(d+icRec), icon.refersToArtisticMotif, URIRef(am)))
        g.add((URIRef(am), RDF.type, icon.ArtisticMotif))
        g.add((icon.ArtisticMotif, RDFS.label, Literal("Artistic Motif")))
    if countComp != "" and countComp != "no":
      if countComp_id not in artworkDict[artwork]["Composition"]:
        print(artwork, "has not comp ", countComp_id)
      else:
        comp = artworkDict[artwork]["Composition"][countComp_id]
        g.add((URIRef(d+icRec), icon.refersToArtisticMotif, URIRef(comp)))
        g.add((URIRef(comp), RDF.type, icon.Composition))
        g.add((icon.Composition, RDFS.label, Literal("Composition")))
    if countAm == "" and countComp == "":
      n = 0
      for story in lineStory:
        if story != "":
          g.add((URIRef(d+icRec), icon.recognizedInvenzione, URIRef(d+"story/"+story)))
          g.add((URIRef(d+"story/"+story), RDF.type, icon.Story))
          g.add((icon.Story, RDFS.label, Literal("Story")))
          g.add((URIRef(d+"story/"+story), RDFS.label, Literal(lineStoryLabel[n])))
          g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(d+"story/"+story)))
          n+=1
      n = 0
      for allegory in lineAllegory:
        if allegory != "":
          g.add((URIRef(d+icRec), icon.recognizedInvenzione, URIRef(d+"allegory/"+allegory)))
          g.add((URIRef(d+"allegory/"+allegory), RDF.type, icon.Allegory))
          g.add((icon.Allegory, RDFS.label, Literal("Allegory")))
          g.add((URIRef(d+"allegory/"+allegory), RDFS.label, Literal(lineAllegoryLabel[n])))
          g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(d+"allegory/"+allegory)))
          n +=1
      n = 0
      for invenzione in lineInvenzione:
        if invenzione != "":
          g.add((URIRef(d+icRec), icon.recognizedInvenzione, URIRef(d+"inv/"+invenzione)))
          g.add((URIRef(d+"inv/"+invenzione), RDF.type, icon.Invenzione))
          g.add((icon.Invenzione, RDFS.label, Literal("Invenzione")))
          g.add((URIRef(d+"inv/"+invenzione), RDFS.label, Literal(lineInvenzioneLabel[n])))
          g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(d+"inv/"+invenzione)))
          n+=1
    else:
      g.add((URIRef(d+icRec), icon.recognizedImage, URIRef(image)))
      g.add((URIRef(image), RDF.type, icon.Image))
      g.add((icon.Image, RDFS.label, Literal("Image")))

    for recAttribute in lineRecAttribute:
      if recAttribute != "":
        if recAttribute_id not in artworkDict[artwork]["ArtisticMotif"]:
          print(artwork, "has not am ", recAttribute_id)
        else:
          attr = artworkDict[artwork]["ArtisticMotif"][recAttribute_id]
          g.add((URIRef(image), icon.hasRecAttribute, URIRef(attr)))
    if character != "":
      if characterLabel in peopleDict:
        g.add((URIRef(image), icon.hasCharacter, URIRef(peopleDict[characterLabel])))
        g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(peopleDict[characterLabel])))
        g.add((URIRef(peopleDict[characterLabel]), RDF.type, icon.Character))
      else:
        g.add((URIRef(image), icon.hasCharacter, URIRef(d+character)))
        g.add((URIRef(d+character), RDF.type, icon.Character))
        g.add((icon.Character, RDFS.label, Literal("Character")))
        g.add((URIRef(d+character), RDFS.label, Literal(characterLabel)))
        g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(d+character)))
    if actor !="":
      g.add((URIRef(image), icon.hasCharacter, URIRef(peopleDict[actor])))
      g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(peopleDict[actor])))
      g.add((URIRef(peopleDict[actor]), RDF.type, icon.Character))
    # g.add((URIRef(d+image), icon.bearsPortraitOf, URIRef(peopleDict[actor]))) # to be added and discussed.
    if role != "":
      # dcat:hadRole: The function of an entity or agent with respect to another entity or resource.
      g.add((URIRef(image), dcat.hadRole, URIRef(d+"role/"+role)))
      g.add((URIRef(d+"role/"+role), RDF.type, dcat.Role))
      g.add((URIRef(d+"role/"+role), RDFS.label, Literal(roleLabel)))

    if place !="":
      g.add((URIRef(image), icon.hasPlace, URIRef(d+place)))
      g.add((URIRef(d+place), RDFS.label, Literal(placeLabel)))
      g.add((URIRef(d+place), RDF.type, icon.Place))
      g.add((icon.Place, RDFS.label, Literal("Place")))
      g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(d+place)))
    if event !="":
      g.add((URIRef(image), icon.hasEvent, URIRef(d+event)))
      g.add((URIRef(d+event), RDFS.label, Literal(eventLabel)))
      g.add((URIRef(d+event), RDF.type, icon.Event))
      g.add((icon.Event, RDFS.label, Literal("Event")))
      g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(d+event)))
    if namedObject !="":
      g.add((URIRef(image), icon.hasNamedObject, URIRef(d+namedObject)))
      g.add((URIRef(d+namedObject), RDFS.label, Literal(namedObjectLabel)))
      g.add((URIRef(d+namedObject), RDF.type, icon.NamedObject))
      g.add((icon.NamedObject, RDFS.label, Literal("NamedObject")))
      g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(d+namedObject)))
    if externalObj !="":
      # we finally decided to model it as a named object and not as a crm:P138
      g.add((URIRef(image), icon.hasNamedObject, URIRef(d+"OBJ"+externalObj)))
      # g.add((URIRef(d+image), crm.P138_represents, URIRef(d+"OBJ"+externalObj)))
      g.add((URIRef(d+"OBJ"+externalObj), RDF.type, icon.NamedObject))
      g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(d+"OBJ"+externalObj)))
    if symbol !="":
      g.add((URIRef(image), icon.hasSymbol, URIRef(d+symbol)))
      g.add((URIRef(d+symbol), RDFS.label, Literal(symbolLabel)))
      g.add((URIRef(d+symbol), RDF.type, icon.Symbol))
      g.add((icon.Symbol, RDFS.label, Literal("Symbol")))
      g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(d+symbol)))
    if personification !="":
      g.add((URIRef(image), icon.hasPersonification, URIRef(d+"pers/"+personification)))
      g.add((URIRef(d+"pers/"+personification), RDFS.label, Literal(personificationLabel)))
      g.add((URIRef(d+"pers/"+personification), RDF.type, icon.Personification))
      g.add((icon.Personification, RDFS.label, Literal("Personification")))
      g.add((URIRef(d+artwork), icon.iconographicallyDepicts, URIRef(d+"pers/"+personification)))

    for support in lineSupportTo:
      if support !="":
        g.add((URIRef(d+icRec), cito.givesSupportTo, URIRef(d+support)))

    for person in linePersonLabel:
      if person !="":
        g.add((URIRef(d+icRec), crm.P14_carried_out_by, URIRef(peopleDict[person])))
    for evid in lineEvidence:
      if evid !="":
        if "http" in str(evid):
          i = lineEvidence.index(evid)
          evidence = lineEvidenceLabel[i]
        elif "art" in str(evid):
          evidence = d+evid.upper()
        else:
          evidence = d+evid
        if evidenceSpecificLocation !="":
            if "http" not in str(evidenceSpecificLocation):
              g.add((URIRef(d+icRec), cito.citesAsEvidence, URIRef(evidence+"-"+evidenceSpecificLocation)))
              g.add((URIRef(evidence), crm.P106_is_composed_of, URIRef(evidence+"-"+evidenceSpecificLocation)))
              g.add((URIRef(evidence+"-"+evidenceSpecificLocation), RDFS.label, Literal(evidenceSpecificLocationLabel)))
            else:
              g.add((URIRef(evidence), crm.P106_is_composed_of, URIRef(evidenceSpecificLocation)))
              g.add((URIRef(d+icRec), cito.citesAsEvidence, URIRef(evidenceSpecificLocation)))
        g.add((URIRef(d+icRec), cito.citesAsEvidence, URIRef(evidence)))
    for info in lineReference:
      if info !="":
        g.add((URIRef(d+icRec), cito.citesForInformation, URIRef(info)))
        g.add((URIRef(info), RDF.type, crm.E73_Information_Object))

# idea: aggiungere colonna type dove si indica, se presente, il link AAT con la voce del tipo di oggetto corrispondente es: cloth
# print(g.serialize(format="turtle").decode(UTF-8))
# g.serialize(destination = "Output4.ttl", format="turtle")

In [None]:


def part_of(list1, list2, file):
  for row in file:
    for name in list1:
      img = row[name+"_uri"][0]
      for invenzione in list2:
        lineInv = row[invenzione+"_uri"]
        for inv in lineInv:
          if img !="" and inv !="":
            if invenzione == "Story":
              g.add((URIRef(d+"story/"+inv), icon.composedOf, URIRef(d+img)))

            if invenzione == "Allegory":
              g.add((URIRef(d+"allegory/"+inv), icon.composedOf, URIRef(d+img)))

            if invenzione == "Invenzione":
              g.add((URIRef(d+"inv/"+inv), icon.composedOf, URIRef(d+img)))

            elif invenzione == "Composition":
              g.add((URIRef(d+inv), icon.partOf, URIRef(d+img)))
            elif invenzione == "IDext":
              id = d+"OBJ"+img
              relation = row["TypeOfRelation"][0]

              g.add((URIRef(id), relDict[relation], URIRef(d+inv))) # da capire se è utile e nel caso come inserire la variabile dei tipi di relazione

In [None]:
img_list = ["Character", "Place", "Event", "NamedObject", "Symbol", "Personification"]
inv_list = ["Story", "Allegory", "Invenzione"]

comp_list = ["Composition"]
am_list = ["ArtisticMotif"]

art_list = ["ID"]
ext_list = ["IDext"]

img_invenzione = part_of(img_list, inv_list, level2_cleaned)

## Simulation

In [None]:
# simulation

for line in simulation_cleaned:
  name = line["Subject_uri"][0]
  nameLabel = line["Subject"][0]
  lineSimulacrum = line["Simulacrum_uri"]
  lineSimulacrumLabel = line["Simulacrum"]
  lineRealityCounterpart = line["RealityCounterpart_uri"]
  lineRealityLabel = line["RealityCounterpart"]
  type_uri = line["Type_uri"][0]
  typeLabel = line["Type"][0]
  lineContext = line["Context_uri"]
  lineContextLabel = line["Context"]
  lineEvidence = line["Source_uri"]
  evidenceSpecificLocation = line["SourceSpecificLocation_uri"][0]
  evidenceSpecificLocationLabel = line["SourceSpecificLocation"][0]
  lineEvidenceLink = line["SourceLink"]

  if name != "":
    g.add((URIRef(d+name), RDF.type, sim.Simulation))
    g.add((URIRef(d+name), RDFS.label, Literal(nameLabel)))
    g.add((sim.Simulation, RDFS.label, Literal("Simulation")))

    for simulacrum in lineSimulacrum:
      if simulacrum !="":
        g.add((URIRef(d+name), sim.hasSimulacrum, URIRef(d+simulacrum)))
        g.add((URIRef(d+simulacrum), RDF.type, sim.Simulacrum))
        for simulacrumLabel in lineSimulacrumLabel:
          if not ((URIRef(d+simulacrum), RDFS.label, None)) in g:

           g.add((URIRef(d+simulacrum), RDFS.label, Literal(simulacrumLabel)))

    for reality in lineRealityCounterpart:
      if reality != "":
        g.add((URIRef(d+name), sim.hasRealityCounterpart, URIRef(d+reality)))
        g.add((URIRef(d+reality), RDF.type, sim.RealityCounterpart))
        for realityLabel in lineRealityLabel:
          if not ((URIRef(d+reality), RDFS.label, None)) in g:
            g.add((URIRef(d+reality), RDFS.label, Literal(realityLabel)))

    g.add((URIRef(d+name), sim.hasSimulationType, URIRef(d+type_uri)))
    g.add((URIRef(d+type_uri), RDFS.label, Literal(typeLabel)))

    for context in lineContext:
      if context != "":
        g.add((URIRef(d+name), sim.hasSimulationContext, URIRef(d+context)))
        i = lineContext.index(context)
        g.add((URIRef(d+context), RDFS.label, Literal(lineContextLabel[i])))

    for evid in lineEvidence:
      if evid !="":
        if "http" in str(evid):
          evidence = evid
          #i = lineEvidence.index(evid)
          #evidence = lineEvidenceLabel[i]
        elif "art" in str(evid):
          evidence = d+evid.upper()
        else:
          evidence = d+evid
        if evidenceSpecificLocation !="":
          if "http" not in str(evidenceSpecificLocation):
            g.add((URIRef(d+name), cito.citesAsEvidence, URIRef(evidence+"-"+evidenceSpecificLocation)))
            g.add((URIRef(evidence), crm.P106_is_composed_of, URIRef(evidence+"-"+evidenceSpecificLocation)))
            g.add((URIRef(evidence+"-"+evidenceSpecificLocation), RDFS.label, Literal(evidenceSpecificLocationLabel)))
          else:
            g.add((URIRef(evidence), crm.P106_is_composed_of, URIRef(evidenceSpecificLocation)))
            g.add((URIRef(d+name), cito.citesAsEvidence, URIRef(evidenceSpecificLocation)))
        g.add((URIRef(d+name), cito.citesAsEvidence, URIRef(evidence)))



# g.serialize(destination = "OutputSimulation3.ttl", format="turtle")

# Level 3

In [None]:
# multiple art codes in column artwork
# tbd: add automatic uri generation for cf
for line in level3_cleaned:
  count = str(line["Count"][0])
 # lineSupporting = line["has"]
  lineSupportTo = line["GivesSupportToInterpretation"]
  lineArtwork = line["Artwork"]
  concept = line["Concept_uri"][0]
  conceptLabel = line["Concept"][0]
  phenomenon = line["CulturalPhenomenon_uri"][0]
  phenomenonLabel = line["CulturalPhenomenon"][0]
  lineHasAm = line["ArtisticMotif"]
  lineHasComp = line["Composition"]
  lineHasImg = line["Image"]
  lineEvidence = line["Evidence_uri"]
  disagree = line["DisagreesWith"][0]
  lineEvidenceLabel = line["Evidence"]
  evidenceSpecificLocationLabel = line["EvidenceSpecificLocation"][0]
  evidenceSpecificLocation = line["EvidenceSpecificLocation_uri"][0]
  linePerson = line["PersonResponsible_uri"]
  linePersonLabel = line["PersonResponsible"] # sarà da aggiungere l'uri nel dizionario delle persone
  lineReference = line["InformationObject"]
  lineReferenceLabel = line["InformationObject"]
  lineInterpretation = line["Interpretation"]




 # if len(lineArtwork) == 1:
  for artwork in lineArtwork:
    for interpretation in lineInterpretation:
      if interpretation !="":
        iconolRec = artwork+"-"+"ICONOLREC"+interpretation+"-"+count
        iconolDesc = artwork+"-"+"DESC"+interpretation
        intrinsic_id = interpretation+"-"+count
        intrinsic = artwork+"-"+"INTRINSIC"+interpretation+"-"+count
       # intrinsic = artworkDict[artwork]["Intrinsic"][intrinsic_id]

        for am in lineHasAm:
          if am !="" and "AM" not in am:
            am_id = interpretation+"-"+am
        for img in lineHasImg:
          if img !="" and "IMG" not in img:
            img_id = interpretation+"-"+img
        for comp in lineHasComp:
          if comp !="" and "COMP" not in comp:
            comp_id = interpretation+"-"+comp

      else:
        intrinsic = artwork+"-"+"INTRINSIC"+count
       # intrinsic = artworkDict[artwork]["Intrinsic"][count]
        iconolRec = artwork+"-"+"ICONOLREC"+count
        iconolDesc = artwork+"-"+"DESC"
        for am in lineHasAm:
          if am !="" and "AM" not in am:
            am_id = am
        for img in lineHasImg:
          if img !="" and "IMG" not in img:
            img_id = img
        for comp in lineHasComp:
          if comp !="" and "COMP" not in comp:
            comp_id = comp

      g.add((URIRef(d+iconolRec), icon.aboutWorkOfArt, URIRef(d+artwork)))
      g.add((URIRef(d+iconolRec), RDF.type, icon.IconologicalRecognition))
      g.add((icon.IconologicalRecognition, RDFS.label, Literal("Iconological Recognition")))
      g.add((URIRef(d+iconolRec), icon.iconologicallyCompliesWith, URIRef(d+iconolDesc)))
      g.add((URIRef(d+iconolDesc), RDF.type, icon.InterpretationDescription))
      g.add((icon.InterpretationDescription, RDFS.label, Literal("Interpretation Description")))
      g.add((URIRef(d+iconolRec), icon.recognizedIntrinsicMeaning, URIRef(d+intrinsic)))
      g.add((URIRef(d+intrinsic), RDF.type, icon.IntrinsicMeaning))
      g.add((icon.IntrinsicMeaning, RDFS.label, Literal("Intrinsic Meaning")))

      if concept != "":
        g.add((URIRef(d+intrinsic), icon.recognizedConcept, URIRef(d+concept)))
        g.add((URIRef(d+concept), RDF.type, crm.E28_Conceptual_Object))
        g.add((crm.E28_Conceptual_Object, RDFS.label, Literal("Conceptual Object")))
        g.add((URIRef(d+concept), RDFS.label, Literal(conceptLabel)))
        g.add((URIRef(d+artwork), icon.iconologicallyRepresents, URIRef(d+concept)))

      if phenomenon !="":
        g.add((URIRef(d+intrinsic), icon.recognizedCulturalPhenomenon, URIRef(phenomenaDict[phenomenonLabel])))
        g.add((URIRef(phenomenaDict[phenomenonLabel]), RDF.type, icon.CulturalPhenomenon))
        g.add((icon.CulturalPhenomenon, RDFS.label, Literal("Cultural Phenomenon")))
        g.add((URIRef(phenomenaDict[phenomenonLabel]), RDFS.label, Literal(phenomenonLabel)))
        g.add((URIRef(d+artwork), icon.iconologicallyRepresents, URIRef(phenomenaDict[phenomenonLabel])))
      for am in lineHasAm:
        if am !="":
          if "AM" in am:
            g.add((URIRef(d+intrinsic), icon.hasArtisticMotif, URIRef(d+am)))
          else:
            if am_id not in artworkDict[artwork]["ArtisticMotif"]:
              print(artwork, "has not am ", am)
            else:
              g.add((URIRef(d+intrinsic), icon.hasArtisticMotif, URIRef(artworkDict[artwork]["ArtisticMotif"][am_id])))
      for comp in lineHasComp:
        if comp !="":
          if "COMP" in comp:
            g.add((URIRef(d+intrinsic), icon.hasComposition, URIRef(d+comp)))
          else:
            if comp_id not in artworkDict[artwork]["Composition"]:
              print(artwork, "has not composition ", comp_id)
            else:
              g.add((URIRef(d+intrinsic), icon.hasComposition, URIRef(artworkDict[artwork]["Composition"][comp_id])))
      for img in lineHasImg:
        if img !="":
          if "IMG" in img:
            g.add((URIRef(d+intrinsic), icon.hasImage, URIRef(d+img)))
          else:
            if img_id not in artworkDict[artwork]["Image"]:
              print(artwork, "has not image ", img_id)
            else:
              g.add((URIRef(d+intrinsic), icon.hasImage, URIRef(artworkDict[artwork]["Image"][img_id])))

      for support in lineSupportTo:
        if support !="":
          g.add((URIRef(d+iconolRec), cito.givesSupportTo, URIRef(d+support)))
      if disagree !="":
        g.add((URIRef(d+iconolRec), cito.disagreesWith, URIRef(d+disagree)))
    #  for item in lineSupporting:
    #    if item != "":
    #     splitted = item.split("-")[1]
    #      if splitted.startswith('AM'):
    #       g.add((URIRef(d+intrinsic), icon.hasArtisticMotif, URIRef(d+item)))
    #     elif splitted.startswith('COMP'):
    #       g.add((URIRef(d+intrinsic), icon.hasComposition, URIRef(d+item)))
    #     elif splitted.startswith('IMG'):
    #       g.add((URIRef(d+intrinsic), icon.hasImage, URIRef(d+item)))
      for person in linePersonLabel:
        if person !="":
          g.add((URIRef(d+iconolRec), crm.P14_carried_out_by, URIRef(peopleDict[person])))

      for evid in lineEvidence:
        if evid !="":
          if "http" in str(evid):
            i = lineEvidence.index(evid)
            evidence = lineEvidenceLabel[i]
          elif "art" in str(evid):
            evidence = d+evid.upper()
          else:
            evidence = d+evid
          if evidenceSpecificLocation !="":
            if "http" not in str(evidenceSpecificLocation):
              g.add((URIRef(d+iconolRec), cito.citesAsEvidence, URIRef(evidence+"-"+evidenceSpecificLocation)))
              g.add((URIRef(evidence), crm.P106_is_composed_of, URIRef(evidence+"-"+evidenceSpecificLocation)))
              g.add((URIRef(evidence+"-"+evidenceSpecificLocation), RDFS.label, Literal(evidenceSpecificLocationLabel)))
            else:
              g.add((URIRef(evidence), crm.P106_is_composed_of, URIRef(evidenceSpecificLocation)))
              g.add((URIRef(d+iconolRec), cito.citesAsEvidence, URIRef(evidenceSpecificLocation)))
          g.add((URIRef(d+iconolRec), cito.citesAsEvidence, URIRef(evidence)))

      for info in lineReference:
        if info !="":
          inf = info.strip()
          g.add((URIRef(d+iconolRec), cito.citesForInformation, URIRef(inf)))
          g.add((URIRef(inf), RDF.type, crm.E73_Information_Object))



# print(g.serialize(format="turtle").decode(UTF-8))
# g.serialize(destination = "lastOutput.ttl", format="turtle")

# Additional details

In [None]:
for row in levelsEntryControl_cleaned:
  characterLabel = row["Character"][0]
  character = row["Character_uri"][0]
  broaderCharacterLabel = row["BroaderCharacter"][0]
  broaderCharacter = row["BroaderCharacter_uri"][0]
  sameAsCharacter = row["sameAsCharacter_uri"][0]
  sameAsCharacterLabel = row["sameAsCharacter"][0]
  typeCharacter = row["typeCharacter"] # these are links
  # story
  storyLabel = row["PartOfStory"][0]
  story = row["PartOfStory_uri"][0]
  broaderStoryLabel = row["BroaderStory"][0]
  broaderStory = row["BroaderStory_uri"][0]
  storyCategoryLabel = row["StoryCategoryLabel"][0]
  storyCategoryUri = row["StoryCategoryLabel_uri"][0]
  storyCategory = row["StoryCategory"][0]
  #cultural phenomena
  cfID = row["CulturalPhenomenonID"][0]
  cfTypeLabel = row["CulturalPhenomenonType"]
  cfType = row["CulturalPhenomenonType_uri"]

# types of characters, stories and cultural phenomena ; broader concepts

  if character != "":
    g.add((URIRef(d+character), RDF.type, icon.Character))
    g.add((icon.Character, RDFS.label, Literal("Character")))
    if broaderCharacter != "":
      g.add((URIRef(d+character), skos.broader, URIRef(d+broaderCharacter)))
      g.add((URIRef(d+broaderCharacter), RDFS.label, Literal(broaderCharacterLabel)))
    if sameAsCharacter != "":
      g.add((URIRef(d+character), owl.sameAs, URIRef(d+sameAsCharacter)))
      g.add((URIRef(d+sameAsCharacter), RDFS.label, Literal(sameAsCharacterLabel)))
    for t in typeCharacter:
      if t != "":
        g.add((URIRef(d+character), crm.P2_has_type, URIRef(t)))
  if story !="":
    g.add((URIRef(d+"story/"+story), RDF.type, icon.Story))
    g.add((icon.Story, RDFS.label, Literal("Story")))
    if broaderStory != "":
      g.add((URIRef(d+"story/"+story), skos.broader, URIRef(d+"story/"+broaderStory)))
      g.add((URIRef(d+"story/"+broaderStory), RDFS.label, Literal(broaderStoryLabel)))
    if storyCategory != "":
      g.add((URIRef(d+"story/"+story), crm.P2_has_type, URIRef(storyCategory)))
      g.add((URIRef(storyCategory), RDFS.label, Literal(storyCategoryLabel)))
      g.add((URIRef(storyCategory), RDF.type, crm.E55_Type))
    if storyCategory == "" and storyCategoryLabel !="":
      g.add((URIRef(d+"story/"+story), skos.broader, URIRef(d+"story/"+storyCategoryUri)))
      g.add((URIRef(d+"story/"+storyCategoryUri), RDFS.label, Literal(storyCategoryLabel)))
      g.add((URIRef(d+"story/"+storyCategoryUri), RDF.type, crm.E55_Type))
  if cfID !="":
    for t in cfType:
      if t !="":
        g.add((URIRef(d+cfID), crm.P2_has_type, URIRef(d+t)))
        i = cfType.index(t)
        g.add((URIRef(d+t), RDFS.label, Literal(cfTypeLabel[i])))
        g.add((URIRef(d+t), RDF.type, crm.E55_Type))

# character types
for row in character_wd_cleaned:
  character = row["Name"][0]
  lineWdCategory = row["WikidataBroader"]
  lineWdLabel = row["BroaderLabel"]

  for wd in lineWdCategory:
    if wd !="":
      i = lineWdCategory.index(wd)
      print(character, wd, i)
      wd_label = lineWdLabel[i]
      g.add((URIRef(wd), RDFS.label, Literal(wd_label)))


# Interpretations details

In [None]:

for line in interpretations_cleaned:
  date = line["Date"][0]
  interpretation = line["IDCount"][0]
  artwork = line["Artwork"][0]
  desc_id = d+artwork+"-"+"DESC"+interpretation
 # preic = d+artwork+"-"+"PREICDESC"+interpretation
 # ic = d+artwork+"-"+"ICDESC"+interpretation
 # iconol = d+artwork+"-"+"ICONOLDESC"+interpretation
  personList = line["PersonResponsible"]
# Notes
 # possible_list = [preic, ic, iconol] # we add the details directly to the descriptions already existing
 # for candidate in possible_list:
  if (desc_id, None, None) in g:
    if date !="":
      g.add((URIRef(desc_id), crm.P3_has_note, Literal("datation: "+str(date))))




# Books conversion to triples

In [None]:
for line in books_cleaned:
  ID = line["ID"][0]
  label = line["TitleForCheckingDuplicates"][0]
  production = d+"PROD"+ID
  objType = line["ObjectType"][0]
  signature	= line["Signature"][0]
  signature_uri = line["Signature_uri"][0]
  contentID = line["ContentID"][0]
  lineEditor = line["EditorOrPrinter"]
  date = line["Date"][0]
  date_uri = line["Date_uri"][0]
  dateCertainty = line["DateCertainty"][0]
  place = line["Object PlaceOfProduction"][0]
  description = line["Description"][0]
  # lineLink = line["Link"]
  # e22 production e place sono specificati nella tab artwork
  if ID != "":
    if (None, None, URIRef(d+ID)) in g:
      print(ID)
      g.add((URIRef(d+ID), RDF.type, crm.E22_Human_Made_Object))
      g.add((URIRef(d+ID),  RDFS.label, Literal(label)))
      if objType != "":
        g.add((URIRef(d+ID), crm.P2_has_type, URIRef(typeDict[objType])))
        g.add((URIRef(typeDict[objType]), RDFS.label, Literal(objType)))
        g.add((URIRef(typeDict[objType]), crm.P2_has_type, URIRef("http://vocab.getty.edu/page/aat/300028051")))
      else:
        g.add((URIRef(d+ID), crm.P2_has_type, URIRef("http://vocab.getty.edu/page/aat/300028051")))

      if signature != "":
        g.add((URIRef(d+ID), crm.P1_is_identified_by, URIRef(d+signature_uri)))
        g.add((URIRef(d+signature_uri), RDF.type, crm.E42_Identifier))
        g.add((URIRef(d+signature_uri), RDFS.label, Literal(signature)))
      if contentID !="" and contentID  != "#N/A":
        g.add((URIRef(d+ID), crm.P128_carries, URIRef(d+contentID)))
  #    for author in lineAuthor:
  #      if author !="":
  #        if title !="":
  #          text = "_".join(lineAuthor).replace(" ", "_").lower().strip()+"-"+title_uri
  #          production = d+"PROD"+text
  #          g.add((URIRef(d+ID), crm.P128_carries, URIRef(d+text)))
  #          g.add((URIRef(d+text), crm.P102_has_title, URIRef(d+title_uri)))
  #          g.add((URIRef(d+title_uri),  RDF.type, crm.E35_Title))
  #          g.add((URIRef(d+title_uri),  RDFS.label, Literal(title)))
  #         g.add((URIRef(d+text), RDF.type, crm.E73_Information_Object))
  #         g.add((URIRef(production), crm.P94_has_created, URIRef(d+text)))
  #         g.add((URIRef(production), RDF.type, crm.E65_Creation))
  #         g.add((URIRef(production), crm.P14_carried_out_by, URIRef(peopleDict[author])))

        if date !="":
          g.add((URIRef(production), crm.P4_has_time_span, URIRef(d+date_uri)))
          if dateCertainty == "c.":
            g.add((URIRef(d+date), crm.P79_beginning_is_qualified_by, URIRef(d+"circa")))
            g.add((URIRef(d+date), crm.P80_end_is_qualified_by, URIRef(d+"circa")))
            g.add((URIRef(d+"circa"), RDFS.label, Literal("ca.")))
            g.add((URIRef(d+"circa"), RDF.type, crm.E62_String))

        if place !="":
          g.add((URIRef(production), crm.P7_took_place_at, URIRef(placesDict["Cities"][place])))
  #   if lineAuthor =="" and title !="":
  #     g.add((URIRef(d+ID), crm.P102_has_title, URIRef(d+title_uri)))
      if description != "":
        g.add((URIRef(production), RDFS.comment, Literal(description)))

## Books content conversion to triples

In [None]:

for line in booksContent_cleaned:
  ID = line["ID"][0]
  label = line["TitleForCheckingDuplicates"][0]
  production = d+"PROD"+ID
  lineAuthor	= line["ObjectAuthor"]
  title	= line["Title"][0]
  title_uri	= line["Title_uri"][0]

  # lineLink = line["Link"]
  # e22 production e place sono specificati nella tab artwork
  if ID != "":
    if (None, None, URIRef(d+ID)) in g:
      print(ID)
      g.add((URIRef(d+ID), RDF.type, crm.E73_Information_Object))
      g.add((crm.E73_Information_Object,  RDFS.label, Literal("Information Object")))
      g.add((URIRef(d+ID),  RDFS.label, Literal(label)))
      for author in lineAuthor:
        if author !="":
          g.add((URIRef(production), crm.P94_has_created, URIRef(d+ID)))
          g.add((URIRef(production), RDF.type, crm.E65_Creation))
          g.add((URIRef(production), crm.P14_carried_out_by, URIRef(peopleDict[author])))

      if title !="":
        g.add((URIRef(d+ID), crm.P102_has_title, URIRef(d+title_uri)))
        g.add((URIRef(d+"/title/"+title_uri),  RDF.type, crm.E35_Title))
        g.add((URIRef(d+"/title/"+title_uri),  RDFS.label, Literal(title)))

# People conversion to triples

In [None]:
for line in people_cleaned:
  lineName = line["Name"][0] # cambia mettendo key_uri
 # print(lineName, peopleDict[lineName])
  role = line["Role"][0]
  role_uri = line["Role_uri"][0]
  roleLabel = line["Role"][0]
  time = line["Date"][0]
 # certainty = line["Date"][0] # da aggiungere al csv
  roleInTime = role_uri+time
  groupLabel = line["Group"][0]
  groupLink = line["GroupLink"][0]
  group = line["Group_uri"][0]
#  if (peopleDict[lineName], None, None) not in g:
 #   del peopleDict[lineName] # after this point, the dictionary will contain only the selected people. Run the multiple reconciliation after this cell.
  if (None, None, URIRef(peopleDict[lineName])) in g: # we filter out names that are in the spreadsheet not related to the selected artworks
    g.add((URIRef(peopleDict[lineName]), RDFS.label, Literal(lineName)))
    print(lineName)
    if lineName.endswith("School") or lineName.startswith("School"):
      g.add((URIRef(peopleDict[lineName]), RDF.type, crm.E39_Group))
    else:
      g.add((URIRef(peopleDict[lineName]), RDF.type, crm.E21_Person))
      g.add((crm.E21_Person, RDFS.label, Literal("Person")))

    if role != "" and time !="":
      g.add((URIRef(peopleDict[lineName]), pro.holdsRoleInTime, URIRef(d+roleInTime)))
      g.add((URIRef(d+roleInTime), RDF.type, pro.RoleInTime))
      g.add((URIRef(d+roleInTime), RDFS.label, Literal(roleLabel+"-"+time)))
      g.add((URIRef(d+roleInTime), pro.withRole, URIRef(d+role_uri)))
      g.add((URIRef(d+role_uri), RDFS.label, Literal(roleLabel)))
      g.add((URIRef(d+role_uri), RDF.type, pro.Role))
      g.add((URIRef(d+roleInTime), crm.P4_has_time_span, URIRef(d+time)))
      g.add((URIRef(d+time), RDF.type, crm.E52_Time_Span))
   # startDate = dateDict[time][0]
   # endDate = dateDict[time][1]
   # g.add((URIRef(d+time), crm.P82a_begin_of_the_begin, Literal(startDate)))
   # g.add((URIRef(d+time), crm.P82b_end_of_the_end, Literal(endDate)))


#  if certainty == "c.":
 #   g.add((URIRef(d+lineDate), crm.P79_beginning_is_qualified_by, Literal("ca.")))
 #   g.add((URIRef(d+lineDate), crm.P80_end_is_qualified_by, Literal("ca.")))


    if groupLabel != "":
      if groupLink != "":
        g.add((URIRef(groupLink), crm.P107_has_current_or_former_member, URIRef(peopleDict[lineName])))
        g.add((URIRef(groupLink), RDF.type, crm.E74_Group))
        g.add((URIRef(groupLink), RDFS.label, Literal(groupLabel)))
  #  elif groupLink == "":
   #   newLink = d+"people/"+group
   #   g.add((URIRef(newLink), crm.P107_has_current_or_former_member, URIRef(peopleDict[lineName])))
    #  g.add((URIRef(newLink), RDF.type, crm.E74_Group))

# g.serialize(destination = "Output.ttl", format="turtle")

# Places conversion to triples

In [None]:
# triples
for line in places_cleaned:
  institution_uri = line["Institution_uri"][0]
  institutionLink = line["InstitutionLink"][0]
  institution = line["Institution"][0]
  institutionType = line["InstitutionType"][0]
  city = line["CityOrRegion"][0]
  city_uri = line["CityOrRegion_uri"][0]
  cityLink = line["CityLink"][0]
  country = line["Country"][0]
  country_uri = line["Country_uri"][0]
  countryLink = line["CountryLink"][0]

  if institution !="" and city !="" and (None, None, URIRef(placesDict["Cities Institutions"][city][institution][0])) in g:
 # if city in g or country in g or institution in g:
    print("institution: ", placesDict["Cities Institutions"][city][institution][0])
    if institution !="" and city !="":
      #newUri = d+"places/"+city_uri+"-"+institution_uri
      g.add((URIRef(placesDict["Cities Institutions"][city][institution][0]), RDF.type, crm.E53_Place)) # le collections possono essere sia un place che un curated holding?
      g.add((URIRef(placesDict["Cities Institutions"][city][institution][0]), RDFS.label, Literal(institution+", "+city)))
      if institutionType == "Collection":
        collectionUri = d+"collection/"+city_uri+"-"+institution_uri
        g.add((URIRef(collectionUri), RDF.type, crm.E78_Curated_Holding)) # le collections possono essere sia un place che un curated holding?
        g.add((URIRef(collectionUri), RDFS.label, Literal(institution)))
        g.add((URIRef(collectionUri), crm.P53_has_current_or_former_location, URIRef(placesDict["Cities Institutions"][city][institution][0])))
        if collectionUri not in placesDict["Cities Institutions"][city][institution]:
          placesDict["Cities Institutions"][city][institution].append(collectionUri)
      elif institutionType == "Museum":
        museumUri = d+"museum/"+city_uri+"-"+institution_uri
        g.add((URIRef(museumUri), RDF.type, crm.E74_Group))
        g.add((URIRef(museumUri), RDFS.label, Literal(institution)))
        g.add((URIRef(museumUri), crm.P53_has_current_or_former_location, URIRef(placesDict["Cities Institutions"][city][institution][0])))
        if museumUri not in placesDict["Cities Institutions"][city][institution]:
          placesDict["Cities Institutions"][city][institution].append(museumUri)

        g.add((URIRef(placesDict["Cities Institutions"][city][institution][0]), crm.P89_falls_within, URIRef(placesDict["Cities"][city])))
        g.add((URIRef(placesDict["Cities"][city]), RDFS.label, Literal(city)))
        if country !="":
          g.add((URIRef(placesDict["Cities Institutions"][city][institution][0]), crm.P89_falls_within, URIRef(placesDict["Countries"][country])))
        # g.add((URIRef(placesDict["Cities"][city+"_institution"][institution][0]), crm.P89_falls_within, URIRef(placesDict["Countries"][country])))
          g.add((URIRef(placesDict["Cities"][city]), crm.P89_falls_within, URIRef(placesDict["Countries"][country])))
          g.add((URIRef(placesDict["Countries"][country]), RDFS.label, Literal(country)))
  else:
    if city !="" and (None, None, URIRef(placesDict["Cities"][city])) in g:
      print("city: ", placesDict["Cities"][city])
      if country !="":
        g.add((URIRef(placesDict["Cities"][city]), crm.P89_falls_within, URIRef(placesDict["Countries"][country])))
      g.add((URIRef(placesDict["Cities"][city]), RDFS.label, Literal(city)))
      g.add((URIRef(placesDict["Cities"][city]), RDF.type, crm.E53_Place))
    if country !="" and (None, None, URIRef(placesDict["Countries"][country])) in g:
      print("country: ", placesDict["Countries"][country])
      g.add((URIRef(placesDict["Countries"][country]), RDF.type, crm.E53_Place))
      g.add((URIRef(placesDict["Countries"][country]), RDFS.label, Literal(country)))
print(placesDict["Cities Institutions"])

# Dates conversion to triples

In [None]:
# we add the triples regarding the dates in the dictionary

 # keys.replace(" ", "-").lower().strip()
for dateName in dateDict:
  keys = text_to_uri([dateName]) # input e output: lista con un solo valore
  for key in keys:
    if (URIRef(d+key), None, None) in g:
      print(key)
      g.add((URIRef(d+key), RDF.type, crm.E52_Time_Span))
      g.add((URIRef(d+key), RDFS.label, Literal(dateName)))
      startDate = dateDict[dateName][0]
      endDate = dateDict[dateName][1]
      g.add((URIRef(d+key), crm.P82a_begin_of_the_begin, Literal(startDate)))
      g.add((URIRef(d+key), crm.P82b_end_of_the_end, Literal(endDate)))

# g.serialize(destination = "OutputDate2.ttl", format="turtle")

# Addition of reconciled terms

In [None]:
# list of dictionaries to add a reconciliation
to_rec_dict_list = [peopleDict, characterDict, naturalElementDict, expressionalQualityDict, qualityDict, actionDict, placeDict, eventDict, symbolDict, personificationDict, storyDict, allegoryDict, invenzioneDict]


In [None]:
# check that the links are ok

def clean_rec_icon(rec_list):
  for rec in rec_list: # for dictionary in the dictionary list
    for key in rec:
      if key.endswith(" rec"):
        for link in rec[key]:
          if link.startswith("https://iconclass.org/"):
            if " " in link:
              new = link.replace(" ", "%20")
              rec[key].remove(link)
              rec[key].append(new)

  return rec_list

clear_icon = clean_rec_icon(to_rec_dict_list)

In [None]:
#g = Graph()
# input dictionary: all the dictionaries for multiple reconciliation, with "<name> rec" keys
def reconciled_triples_addition(dictionary, g):
  for name in dictionary:
    if not name.endswith(" rec"):
      if (URIRef(dictionary[name]), None, None) in g: # if the internal link is already in the graph
        name_rec = name+" rec"
        if dictionary[name_rec] != []: # if there are reconciled terms
          for reconciled in dictionary[name_rec]:
            g.add((URIRef(dictionary[name]), owl.sameAs, URIRef(reconciled)))
  return g


In [None]:
to_rec_dict_list = [peopleDict, characterDict, naturalElementDict, expressionalQualityDict, qualityDict, actionDict, placeDict, eventDict, symbolDict, personificationDict, storyDict, allegoryDict, invenzioneDict]

for dic in to_rec_dict_list:
  rec = reconciled_triples_addition(dic, g)

# Inverse properties

In [None]:
inverse_prop_dict = {"associatedForm":	"isPureFormOf", "hasCompositionalStructure":	"isCompositionalStructureOf", "hasExpressionalMeaning":	"isExpressionalMeaningOf", "hasFactualMeaning":	"isFactualMeaningOf", "hasPart":	"partOf", "hasPrototyicalMotif":	"isPrototypicalMotifOf", "hasCopiedMotif":	"isCopiedMotifOf", "recognizedComposition":	"isRecognizedCompositionOf", "recognizedArtisticMotif":	"isRecognizedArtisticMotifOf", "hasCharacter":	"isCharacterOf", "hasEvent":	"isEventOf", "hasNamedObject":	"isNamedObjectOf", "hasPersonification":	"isPersonificationOf", "hasPlace":	"isPlaceOf", "hasSymbol":	"isSymbolOf", "recognizedInvenzione":	"isRecognOf", "hasRecAttribute":	"isRecAttributeOf", "refersToArtisticMotif" : "isIconogrphicallyReferredBy", "recognizedImage":	"isIconographicallyRecognizedBy", "composedOf":	"takesPartIn", "recognizedIntrinsicMeaning":	"isIntrinsicMeaningOf", "aboutWorkOfArt":	"hasRecogniton", "hasArtsticMotif":	"isArtisticMotifOf", "hasComposition":	"isCompositionOf", "hasImage":	"isImageOf", "recognizedCulturalPhenomenon":	"isCulturalPhenomenonOf", "recognizedConceptualObject":	"isConceptualObjectOf", "hasInvenzione":	"isInvenzioneOf"}

In [None]:
test_dict = {"hasFactualMeaning":	"isFactualMeaningOf"}
for pair in inverse_prop_dict.items():
  for s, p, o in g:
    if (s, p, o) and p == URIRef(icon+pair[0]):
   #   print("this works. Your triple is: ", s, URIRef(icon+pair[0]), o)
    #  print("Inverse: ", o, URIRef(icon+pair[1]), s)
      g.add((o, URIRef(icon+pair[1]), s))



# Serialization

In [None]:
g.serialize(destination = "161023output2.ttl", format="turtle")
g.serialize(destination = "161023output2.rdf", format="xml")
# v = g.serialize(format="xml")

<Graph identifier=Na15c0bbdba7243e9b19abdfb1d49a1c2 (<class 'rdflib.graph.Graph'>)>