#Iconology Dataset Validation

## Imports and functions

In [None]:
!pip install rdflib

import rdflib
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD
from rdflib import Namespace
from rdflib import URIRef

In [None]:
import csv
def store_csv(file_name, first_line, list_of_lists):
    with open(file_name, mode='w', newline='', encoding='UTF-8') as my_file:
        file = csv.writer(my_file, delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL)
        file.writerow(first_line)
        for l in list_of_lists:
            file.writerow(l)
    return(file_name)


In [None]:
g = rdflib.Graph() # we create a new graph
g.parse("https://w3id.org/icon/data/", format="ttl")

## Accuracy
### Syntactic validity
The syntactic validity was checked through TurtleValidator, available at https://github.com/IDLabResearch/TurtleValidator . <br/>
The validation ended with 0 errors.

In [None]:
!npm install -g turtle-validator

In [None]:
!ttl https://raw.githubusercontent.com/SofiBar/IconologyDataset/main/data/icondataset.ttl



### Literals validity
We check the date validity by verifying if it complies with the ISO8601 standard. To do that, we express the ISO standard as a regular expression and we check if the dates in the dataset are expressed through it.

In [None]:
import re
example = ["1023-01-01T00:00:00", "102-01-01T00:00:00", "-20-01-01T00:00:00"]
expression = re.compile(r"^\d\d\d\d-(0[1-9]|1[012])-([012]\d|3[01])T([01]\d|2[0-3]):([0-5]\d):([0-5]\d)$")
ex3 = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?([Zz]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?$", re.IGNORECASE)
match = re.search(ex3,example[0])
if match:
  print("ok")

In [None]:
def check_date_regex(input_list):
    for text in input_list:
      if text.startswith("-"): # negative dates
        input_text = text.replace("-", "", 1)
      else:
        input_text = text
      s = input_text.split("-")
      date = s[0]
      pattern = re.compile(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?([Zz]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?$", re.IGNORECASE)

      mat = pattern.match(input_text)
      if not mat:
        print("not matched: ", input_text, text)

    return mat

t = check_date_regex(example)

Extract dates from the dataset

In [None]:
date_list = []
q_dates = '''

PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>

SELECT DISTINCT ?date WHERE {

 ?time crm:P82a_begin_of_the_begin | crm:P82b_end_of_the_end ?date.

}
'''

dateRes = g.query(q_dates)

for row in dateRes:
  d = str(row[0])

  if d !="None":
    date_list.append(d)
print(date_list)

In [7]:
check = check_date_regex(date_list)

### Semantic Validity of Triples
Measure to determine if the meanings of triples with literal values in the object position of the Knowledge Graph are semantically sound. A triple is considered to be semantically valid if it is sourced from a reliable source (e.g. Name Authority File), if it is common knowledge, or if the stated property can be directly observed or measured.


Adapted metric: select triples of interpretations about 10 random artworks having literals and verify if they hold truth by comparing them with the textual source and the artwork's image.

In [None]:
art_list = []
q_art = '''
PREFIX d: <https://w3id.org/icon/data/>
PREFIX icon: <https://w3id.org/icon/ontology/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>

SELECT DISTINCT ?art ?source ?label WHERE {

 ?int a icon:InterpretationDescription.
 ?rec icon:aboutWorkOfArt ?art.
 ?obj crm:P65_shows_visual_item ?art;
  crm:P67_refers_to ?source;
  rdfs:label ?label.

}ORDER BY RAND() LIMIT 10
'''

dateRes = g.query(q_art)

for row in dateRes:

  art = str(row[0])
  source = str(row[1])
  title = str(row[2])
  print(inter)
  art_list.append("<"+art+">")

In [None]:
# metadata verification
metadata_list = []
for art in art_list:

  q_art_metadata =  '''
  PREFIX d: <https://w3id.org/icon/data/>
  PREFIX icon: <https://w3id.org/icon/ontology/>
  PREFIX sim: <https://w3id.org/simulation/ontology/>
  PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
  PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>

  SELECT DISTINCT (group_concat(distinct ?source ;separator=", ") as ?s) ?label WHERE {
    VALUES ?complies {icon:preiconographicallyCompliesWith icon:iconographicallyCompliesWith icon:iconologicallyCompliesWith}

  ?rec icon:aboutWorkOfArt '''+art+'''.
  ?obj crm:P65_shows_visual_item ?art;
    crm:P67_refers_to ?source;
    rdfs:label ?label.

  } GROUP BY ?label
  '''

  dateRes = g.query(q_art_metadata)

  for row in dateRes:

    source = str(row[0])
    title = str(row[1])
    print(art, source, title)
    metadata_list.append([art, source, title])




In [None]:
metadata_first_line = ["Artwork", "Source", "Title"]
metadata = store_csv("semantic_validity_metadata.csv", metadata_first_line, metadata_list)

In [None]:
  # content verification
content_list = []
for art in art_list:
    q_art_detail = '''
    PREFIX d: <https://w3id.org/icon/data/>
    PREFIX icon: <https://w3id.org/icon/ontology/>
    PREFIX sim: <https://w3id.org/simulation/ontology/>
    PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
    PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>

    SELECT DISTINCT  ?subject ?sLabel WHERE {
    VALUES ?rel {icon:preiconographicallyDepicts icon:iconographicallyDepicts icon:iconologicallyRepresents}

    ?rec icon:aboutWorkOfArt '''+art+'''.
    ?art ?rel ?subject.
    ?subject rdfs:label ?sLabel.


    }
    '''

    dateRes = g.query(q_art_detail)

    for row in dateRes:
      s = str(row[0])
      sLabel = str(row[1])
      print(art, s, sLabel)
      content_list.append([art, s, sLabel])

In [None]:
content_first_line = ["Artwork", "Subject", "Subject Label"]
content = store_csv("semantic_validity_content.csv", content_first_line, content_list)

In [None]:
# save as 2 csv, make the verification on them

## Contextual category

### Column Completeness
the degree by which the attributes of a class, which are defined on the schema level, exist on the instance level of the KG. <br/>
**Metric:** ratio of the number of instances that have both the class k and a value for the relation r, to the total number of instances that have class k. We apply this metric to the core classes, namely: cultural objects, artworks, recognitions


In [None]:
column_dict = {}
# column_dict.update({"production": {"relations": {"crm:P14_carried_out_by" : "","crm:P17_was_motivated_by" : "", "crm:P4_has_time_span" : "", "crm:P10_falls_within" : "", "crm:P108_has_produced" : ""}, "total": ""}})
column_dict.update({"crm:E22_Human_Made_Object": {"relations": {"foaf:depiction" : "","crm:P102_has_title" : "", "crm:P2_has_type" : "", "crm:P65_shows_visual_item" : "", "crm:P53_has_former_or_current_location" : "", "crm:P45_consists_of": "", "crm:P67_refers_to":""}, "total": ""}})
column_dict.update({"icon:Artwork": {"relations": {"icon:preiconographicallyDepicts" : "","icon:iconographicallyDepicts" : "", "icon:iconologicallyRepresents" : ""}, "total": ""}})
column_dict.update({"icon:PreiconographicalRecognition": {"relations": {"icon:aboutWorkOfArt" : "", "crm:P14_carried_out_by" : "","cito:citesForInformation" : "", "cito:citesAsEvidence" : "", "cito:givesSupportTo" : "", "icon:recognizedArtisticMotif | icon:recognizedComposition" : ""}, "total": ""}})
column_dict.update({"icon:FormalMotifRecognition": {"relations": {"icon:aboutWorkOfArt" : "", "crm:P14_carried_out_by" : "","cito:citesForInformation" : "", "cito:citesAsEvidence" : "", "cito:givesSupportTo" : "", "icon:hasCopiedMotif" : "", "icon:hasPrototypicalMotif" : ""}, "total": ""}})
column_dict.update({"icon:IconographicalRecognition": {"relations": {"icon:aboutWorkOfArt" : "", "crm:P14_carried_out_by" : "","cito:citesForInformation" : "", "cito:citesAsEvidence" : "", "cito:givesSupportTo" : "", "icon:recognizedImage | icon:recognizedInvenzione" : ""}, "total": ""}})
column_dict.update({"icon:IconologicalRecognition": {"relations": {"icon:aboutWorkOfArt" : "", "crm:P14_carried_out_by" : "","cito:citesForInformation" : "", "cito:citesAsEvidence" : "",  "icon:recognizedIntrinsicMeaning": ""}, "total": ""}})
# column_dict.update({"preiconographical": {"relations": {"icon:preiconographicallyCompliesWith" : ""}, "total": ""}})


In [None]:
for key in column_dict:
  q_tot =''' PREFIX d: <https://w3id.org/icon/data/>
  PREFIX icon: <https://w3id.org/icon/ontology/>
  PREFIX sim: <https://w3id.org/simulation/ontology/>
  PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
  PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>

  SELECT DISTINCT (count(distinct ?instance) as ?tot) WHERE {

  ?instance a '''+key+'''.
  }
  '''

  res = g.query(q_tot)

  for row in res:
    tot = int(row[0])
    column_dict[key]["total"] = tot

  for rel in column_dict[key]["relations"]:
    q_rel =''' PREFIX d: <https://w3id.org/icon/data/>
    PREFIX icon: <https://w3id.org/icon/ontology/>
    PREFIX sim: <https://w3id.org/simulation/ontology/>
    PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
    PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
    prefix foaf: <http://xmlns.com/foaf/spec/>
    SELECT DISTINCT (count(distinct ?instance) as ?tot) WHERE {

    ?instance a '''+key+'''; '''+rel+''' ?obj.
    }
    '''

    res2 = g.query(q_rel)

    for row in res2:
      tot = int(row[0])
      column_dict[key]["relations"][rel] = tot

column_dict

In [None]:
column_list = []
for entity in column_dict:

  tot = column_dict[entity]["total"]
  partial_scores = []
  for relation in column_dict[entity]["relations"]:
    # we calculate the partial score for every relation
    partial_score = int(column_dict[entity]["relations"][relation]) / tot
    partial_scores.append(partial_score)
  tot_score = 0
  for score in partial_scores:
    tot_score +=score
    # we store the entity score in a list of lists
  final = tot_score / len(partial_scores)
  column_list.append([entity, final])
column_list

In [None]:
column1 = store_csv("column_completeness_wide.csv", ["Entity", "Score"], column_list)

We repeat the analysis by including only the highly recommended relations

In [None]:
column_dict2 = {}
# column_dict.update({"production": {"relations": {"crm:P14_carried_out_by" : "","crm:P17_was_motivated_by" : "", "crm:P4_has_time_span" : "", "crm:P10_falls_within" : "", "crm:P108_has_produced" : ""}, "total": ""}})
column_dict2.update({"crm:E22_Human_Made_Object": {"relations": {"crm:P102_has_title" : "", "crm:P65_shows_visual_item" : "", "crm:P67_refers_to":""}, "total": ""}})
column_dict2.update({"icon:Artwork": {"relations": {"icon:preiconographicallyDepicts" : "","icon:iconographicallyDepicts" : "", "icon:iconologicallyRepresents" : ""}, "total": ""}})
column_dict2.update({"icon:PreiconographicalRecognition": {"relations": {"icon:aboutWorkOfArt" : "", "crm:P14_carried_out_by" : "","cito:citesForInformation" : "", "icon:recognizedArtisticMotif | icon:recognizedComposition" : ""}, "total": ""}})
column_dict2.update({"icon:FormalMotifRecognition": {"relations": {"icon:aboutWorkOfArt" : "", "crm:P14_carried_out_by" : "","cito:citesForInformation" : "", "icon:hasCopiedMotif" : "", "icon:hasPrototypicalMotif" : ""}, "total": ""}})
column_dict2.update({"icon:IconographicalRecognition": {"relations": {"icon:aboutWorkOfArt" : "", "crm:P14_carried_out_by" : "","cito:citesForInformation" : "", "icon:recognizedImage | icon:recognizedInvenzione" : ""}, "total": ""}})
column_dict2.update({"icon:IconologicalRecognition": {"relations": {"icon:aboutWorkOfArt" : "", "crm:P14_carried_out_by" : "","cito:citesForInformation" : "",  "icon:recognizedIntrinsicMeaning": ""}, "total": ""}})
# column_dict.update({"preiconographical": {"relations": {"icon:preiconographicallyCompliesWith" : ""}, "total": ""}})


In [None]:
for key in column_dict2:
  q_tot =''' PREFIX d: <https://w3id.org/icon/data/>
  PREFIX icon: <https://w3id.org/icon/ontology/>
  PREFIX sim: <https://w3id.org/simulation/ontology/>
  PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
  PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>

  SELECT DISTINCT (count(distinct ?instance) as ?tot) WHERE {

  ?instance a '''+key+'''.
  }
  '''

  res = g.query(q_tot)

  for row in res:
    tot = int(row[0])
    column_dict2[key]["total"] = tot

  for rel in column_dict2[key]["relations"]:
    q_rel =''' PREFIX d: <https://w3id.org/icon/data/>
    PREFIX icon: <https://w3id.org/icon/ontology/>
    PREFIX sim: <https://w3id.org/simulation/ontology/>
    PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
    PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
    prefix foaf: <http://xmlns.com/foaf/spec/>
    SELECT DISTINCT (count(distinct ?instance) as ?tot) WHERE {

    ?instance a '''+key+'''; '''+rel+''' ?obj.
    }
    '''

    res2 = g.query(q_rel)

    for row in res2:
      tot = int(row[0])
      column_dict2[key]["relations"][rel] = tot

column_dict2

In [None]:
column_list2 = []
for entity in column_dict2:

  tot = column_dict2[entity]["total"]
  partial_scores = []
  for relation in column_dict2[entity]["relations"]:
    # we calculate the partial score for every relation
    partial_score = int(column_dict2[entity]["relations"][relation]) / tot
    partial_scores.append(partial_score)
  tot_score = 0
  for score in partial_scores:
    tot_score +=score
    # we store the entity score in a list of lists
  final = tot_score / len(partial_scores)
  column_list2.append([entity, final])
column_list2

In [None]:
column2 = store_csv("column_completeness_narrow.csv", ["Entity", "Score"], column_list2)

## Ease of understanding

### Description of resources
Describing resources in a human-understandable way, e.g. via rdfs:label or rdfs:comment

Metric: number of described resources  via rdfs:label or rdfs:comment / number of all considered resources. Applied only on actual instances, excluding reification classes

In [None]:
q_inst = '''
PREFIX d: <https://w3id.org/icon/data/>
PREFIX icon: <https://w3id.org/icon/ontology/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>

SELECT DISTINCT (count(distinct ?instance) as ?tot) WHERE {


?instance a ?type. # exclude literals

FILTER (?type != icon:ArtisticMotif)
FILTER (?type != icon:Image)
FILTER (?type != icon:IntrinsicMeaning)
FILTER (?type != icon:Composition)
FILTER (?type != icon:PreiconographicalRecognition)
FILTER (?type != icon:FormalMotifRecognition)
FILTER (?type != icon:IconographicalRecognition)
FILTER (?type != icon:IconologicalRecognition)
FILTER (?type != icon:InterpretationDescription)
FILTER (?type != crm:E12_Production)
FILTER (?type != crm:E52_Time_Span)
FILTER (?type != crm:E13_Attribute_Assignment)

}
'''

dateRes = g.query(q_inst)

for row in dateRes:
  inst_tot = int(row[0])
  print(int(row[0]))

In [None]:
q_inst_desc = '''
PREFIX d: <https://w3id.org/icon/data/>
PREFIX icon: <https://w3id.org/icon/ontology/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>

SELECT DISTINCT (count(distinct ?instance) as ?tot) WHERE {
VALUES ?rel {icon:preiconographicallyDepicts icon:iconographicallyDepicts icon:iconologicallyRepresents}
VALUES ?complies {icon:preiconographicallyCompliesWith icon:iconographicallyCompliesWith icon:iconologicallyCompliesWith}

?instance a ?type; # exclude literals
  rdfs:label | rdfs:comment ?desc.

FILTER (?type != icon:ArtisticMotif)
FILTER (?type != icon:Image)
FILTER (?type != icon:IntrinsicMeaning)
FILTER (?type != icon:Composition)
FILTER (?type != icon:PreiconographicalRecognition)
FILTER (?type != icon:FormalMotifRecognition)
FILTER (?type != icon:IconographicalRecognition)
FILTER (?type != icon:IconologicalRecognition)
FILTER (?type != icon:InterpretationDescription)
FILTER (?type != crm:E12_Production)
FILTER (?type != crm:E52_Time_Span)
FILTER (?type != crm:E13_Attribute_Assignment)

}
'''

dateRes = g.query(q_inst_desc)

for row in dateRes:
  inst_desc = int(row[0])
  print(int(row[0]))

In [None]:
# res
tot_desc_inst = inst_desc / inst_tot
print(tot_desc_inst)
# 0.82

# Accessibility



## Accessibility

### Dereferencing possibility of resources
URIs must be resolvable via HTTP requests and thus useful information should be returned in RDF. Metric: Over a sample of 20 URIs, we check if the URIs are accessible via HTTP requests and if they return useful RDF data by analyzing the HTTP status codes.

In [None]:
uri_list = []
q_uri = '''
PREFIX d: <https://w3id.org/icon/data/>
PREFIX icon: <https://w3id.org/icon/ontology/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
prefix foaf: <http://xmlns.com/foaf/spec/>

SELECT DISTINCT ?uri WHERE {

?uri ?rel ?obj.

}ORDER BY RAND() LIMIT 10
'''

res = g.query(q_uri)

for row in res:
  print(row[0])
  uri_list.append([row[0]])

In [None]:
q_uri2 = '''
PREFIX d: <https://w3id.org/icon/data/>
PREFIX icon: <https://w3id.org/icon/ontology/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
prefix foaf: <http://xmlns.com/foaf/spec/>

SELECT DISTINCT ?uri WHERE {

?uri ?rel ?obj.
FILTER NOT EXISTS {FILTER (regex(str(?uri), "icon", "i")) }
FILTER (?rel !=	<http://purl.org/dc/terms/license>)
FILTER (?rel !=	<http://purl.org/dc/elements/1.1/source>)
FILTER (?rel !=	<http://purl.org/dc/terms/rights>)
}ORDER BY RAND() LIMIT 10
'''

res = g.query(q_uri2)

for row in res:
  print(row[0])
  uri_list.append([row[0]])

In [None]:
uri = store_csv("deferencing_validation.csv", ["URI"], uri_list)

## Interlinking

### Interlinking via owl:sameAs


Ratio to which instances have at least one owl:sameAs link to
an external knowledge graph. We calculate it for actual instances



In [None]:
q_same = '''
PREFIX d: <https://w3id.org/icon/data/>
PREFIX icon: <https://w3id.org/icon/ontology/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT DISTINCT (count(distinct ?instance) as ?tot) WHERE {

?instance a ?type; owl:sameAs ?ext.

FILTER (?type != icon:ArtisticMotif)
FILTER (?type != icon:Image)
FILTER (?type != icon:IntrinsicMeaning)
FILTER (?type != icon:Composition)
FILTER (?type != icon:PreiconographicalRecognition)
FILTER (?type != icon:FormalMotifRecognition)
FILTER (?type != icon:IconographicalRecognition)
FILTER (?type != icon:IconologicalRecognition)
FILTER (?type != icon:InterpretationDescription)
FILTER (?type != crm:E12_Production)
FILTER (?type != crm:E52_Time_Span)
FILTER (?type != crm:E13_Attribute_Assignment)

}
'''

dateRes = g.query(q_same)

for row in dateRes:
  same_desc = int(row[0])
  print(int(row[0]))

In [None]:
tot_same_as = same_desc / inst_tot
tot_same_as

### Validity of external URIs

We measure the validity of external URIs by evaluating the
URIs from an URI sample set w.r.t. whether there is a timeout,
a client error (HTTP response 4xx) or a server error (HTTP
response 5xx).
Metrics: Random sample of 5 external URI for artwork pictures, 5 for
Iconclass, 5 for Getty Vocabularies

In [None]:
url_list = []
q_depiction = '''
PREFIX d: <https://w3id.org/icon/data/>
PREFIX icon: <https://w3id.org/icon/ontology/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
prefix foaf: <http://xmlns.com/foaf/spec/>

SELECT DISTINCT ?url WHERE {

?art foaf:depiction ?url.

}ORDER BY RAND() LIMIT 10
'''

dateRes = g.query(q_depiction)

for row in dateRes:
  print(row[0])
  url_list.append([row[0]])

In [None]:

q_iconclass = '''
PREFIX d: <https://w3id.org/icon/data/>
PREFIX icon: <https://w3id.org/icon/ontology/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
prefix foaf: <http://xmlns.com/foaf/spec/>

SELECT DISTINCT ?url WHERE {

{?s ?r ?url. } UNION {?url ?r ?o}

FILTER regex(str(?url), "iconclass", "i")
}ORDER BY RAND() LIMIT 5
'''

dateRes = g.query(q_iconclass)

for row in dateRes:
  print(row[0])
  url_list.append([row[0]])

In [None]:
q_getty = '''
PREFIX d: <https://w3id.org/icon/data/>
PREFIX icon: <https://w3id.org/icon/ontology/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
prefix foaf: <http://xmlns.com/foaf/spec/>

SELECT DISTINCT ?url WHERE {

?s ?r ?url.

FILTER regex(str(?url), "vocab.getty", "i")
}ORDER BY RAND() LIMIT 5
'''

dateRes = g.query(q_getty)

for row in dateRes:
  print(row[0])
  url_list.append([row[0]])

In [None]:
q_same_as = '''
PREFIX d: <https://w3id.org/icon/data/>
PREFIX icon: <https://w3id.org/icon/ontology/>
PREFIX sim: <https://w3id.org/simulation/ontology/>
PREFIX dul: <http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#>
PREFIX crm: <http://www.cidoc-crm.org/cidoc-crm/>
prefix foaf: <http://xmlns.com/foaf/spec/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

SELECT DISTINCT ?url WHERE {

?s owl:sameAs ?url.

}ORDER BY RAND() LIMIT 5
'''

res = g.query(q_same_as)

for row in res:
  print(row[0])
  url_list.append([row[0]])

In [None]:
# store url list in csv

url = store_csv("validity_ext_uri.csv", ["URL"], url_list)

#SHACL Validation


In [None]:
# uris for creating shacl shapes
# on astrea website: https://astrea.linkeddata.es/
ont_shacl= '''
https://w3id.org/icon/ontology/
http://www.cidoc-crm.org/cidoc-crm/
https://w3id.org/simulation/ontology/
http://purl.org/spar/cito
http://www.w3.org/ns/dcat#
http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#
http://www.w3.org/2002/07/owl#
http://purl.org/spar/pro
http://www.w3.org/ns/prov#
http://www.w3.org/2000/01/rdf-schema#
http://www.w3.org/2004/02/skos/core#
'''


In [None]:
# ontologies which created errors in the SHACL generation, and therefore removed
ont_removed_shacl = '''
http://xmlns.com/foaf/spec/
http://www.w3.org/2001/XMLSchema
http://dati.gov.it/onto/dcatapit
'''