# Interactive notebook supporting quality assesment of the EDAM ontology

Constrainsts are written through the SHACL standard. 
W3C standard: 
Book: https://book.validatingrdf.com

Author: Alban Gaignard

In [16]:
from rdflib import ConjunctiveGraph, URIRef
from rdflib.namespace import RDF
from pyshacl import validate
from jinja2 import Template
import requests
import json
#import warnings

In [17]:
#!wget "https://edamontology.org/EDAM_1.25.owl"

#warnings.filterwarnings('error')

#try:
kg = ConjunctiveGraph()
kg.parse("EDAM_1.25.owl")
print(len(kg))
#except Warning as w:
#    print('WARNNN')

36884


In [71]:
shape = """
        @prefix ns: <https://edamontology.org/> .
        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
        @prefix owl: <http://www.w3.org/2002/07/owl#> .
        @prefix sh: <http://www.w3.org/ns/shacl#> .
        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

        ns:shape_deprecated a sh:NodeShape ;
            sh:targetClass  owl:Class ;

            sh:property [
                sh:path owl:deprecated ;
                sh:datatype xsd:boolean ;
                sh:nodeKind sh:Literal ; 
                sh:severity sh:Violation ; 
                sh:in   ( 0 1 "true" "false" ) ;
                sh:message "owl:deprecated is not well formed"
            ] ;
        .
    """

shape_graph = ConjunctiveGraph()
shape_graph.parse(data=shape, format="turtle")

r = validate(
        data_graph=kg,
        data_graph_format="turtle",
        shacl_graph=shape_graph,
        shacl_graph_format="turtle",
    )

conforms, results_graph, results_text = r

print(results_text)

Validation Report
Conforms: False
Results (1116):
Constraint Violation in DatatypeConstraintComponent (http://www.w3.org/ns/shacl#DatatypeConstraintComponent):
	Severity: sh:Violation
	Source Shape: [ sh:datatype xsd:boolean ; sh:in ( Literal("0", datatype=xsd:integer) Literal("1", datatype=xsd:integer) Literal("true") Literal("false") ) ; sh:message Literal("owl:deprecated is not well formed") ; sh:nodeKind sh:Literal ; sh:path owl:deprecated ; sh:severity sh:Violation ]
	Focus Node: :operation_2502
	Value Node: Literal("true")
	Result Path: owl:deprecated
	Message: owl:deprecated is not well formed
Constraint Violation in DatatypeConstraintComponent (http://www.w3.org/ns/shacl#DatatypeConstraintComponent):
	Severity: sh:Violation
	Source Shape: [ sh:datatype xsd:boolean ; sh:in ( Literal("0", datatype=xsd:integer) Literal("1", datatype=xsd:integer) Literal("true") Literal("false") ) ; sh:message Literal("owl:deprecated is not well formed") ; sh:nodeKind sh:Literal ; sh:path owl:depre

We can obtain a human-readable report. 

What about a machine-readable report ? 

In [16]:
print(results_graph.serialize(format="turtle"))

@prefix edam: <http://edamontology.org/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a sh:ValidationReport ;
    sh:conforms false ;
    sh:result [ a sh:ValidationResult ;
            sh:focusNode edam:data_2028 ;
            sh:resultMessage "Value is not Literal with datatype xsd:boolean" ;
            sh:resultPath owl:deprecated ;
            sh:resultSeverity sh:Violation ;
            sh:sourceConstraintComponent sh:DatatypeConstraintComponent ;
            sh:sourceShape _:nbf11a0b808564eff969103666372d036b1 ;
            sh:value "true" ],
        [ a sh:ValidationResult ;
            sh:focusNode edam:data_1865 ;
            sh:resultMessage "Value is not Literal with datatype xsd:boolean" ;
            sh:resultPath owl:deprecated ;
            sh:resultSeverity sh:Violation ;
            sh:sourceConstraintComp

We can query it and generate another a human-oriented representation. 

In [17]:
report_query = """
    SELECT ?node ?path ?severity WHERE {
        ?v rdf:type sh:ValidationReport ;
            sh:result ?r .
        ?r sh:focusNode ?node ;
            sh:sourceShape ?s .
                ?s sh:path ?path ;
                   sh:severity ?severity .
            }
        """

results = results_graph.query(report_query)

In [19]:
for r in results:
    if "#Warning" in r["severity"]:
        print(f'WARNING: Property {r["path"]} should be fixed for {r["node"]}')
    if "#Violation" in r["severity"]:
        print(f'ERROR: Property {r["path"]} must be fixed for {r["node"]}')

ERROR: Property http://www.w3.org/2002/07/owl#deprecated must be fixed for http://edamontology.org/operation_2502
ERROR: Property http://www.w3.org/2002/07/owl#deprecated must be fixed for http://edamontology.org/topic_0783
ERROR: Property http://www.w3.org/2002/07/owl#deprecated must be fixed for http://edamontology.org/topic_3533
ERROR: Property http://www.w3.org/2002/07/owl#deprecated must be fixed for http://edamontology.org/data_1388
ERROR: Property http://www.w3.org/2002/07/owl#deprecated must be fixed for http://edamontology.org/data_3496
ERROR: Property http://www.w3.org/2002/07/owl#deprecated must be fixed for http://edamontology.org/operation_0351
ERROR: Property http://www.w3.org/2002/07/owl#deprecated must be fixed for http://edamontology.org/topic_0172
ERROR: Property http://www.w3.org/2002/07/owl#deprecated must be fixed for http://edamontology.org/data_1330
ERROR: Property http://www.w3.org/2002/07/owl#deprecated must be fixed for http://edamontology.org/data_2396
ERROR:

In [18]:
q = """
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>
PREFIX edam:<http://edamontology.org/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT ?entity ?value WHERE {
  
  ?entity owl:deprecated ?value .
  FILTER ( 
        ?value NOT IN ( 0, 1, false, true)
        #?value IN (false, true)
  )      
}
ORDER BY ?entity
"""

results = kg.query(q)
print(len(results))

for r in results:
    print(r)

1104
(rdflib.term.URIRef('http://edamontology.org/data_0581'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontology.org/data_0583'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontology.org/data_0831'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontology.org/data_0832'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontology.org/data_0835'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontology.org/data_0843'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontology.org/data_0848'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontology.org/data_0851'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontology.org/data_0852'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontology.org/data_0853'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontology.org/data_0854'), rdflib.term.Literal('true'))
(rdflib.term.URIRef('http://edamontolo