In [16]:
from SPARQLWrapper import SPARQLWrapper, JSON
from rdflib import ConjunctiveGraph, URIRef
from rdflib.namespace import RDF
from pyshacl import validate
from jinja2 import Template
import requests
import json
import pandas as pd

# Step 1
## loading Bioschemas markup into an RDF Knowledge Graph

In [57]:
bioschemas_markup = """
@prefix ex: <http://bioschemas.validation.tutorial/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix schema: <http://schema.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .

ex:myTool rdf:type schema:SoftwareApplication, prov:SoftwareAgent ;
    schema:description "This tool does ... " ;
    schema:license <https://spdx.org/licenses/MIT.html> ;
    schema:codeRepository <http://github.com/> .
"""

kg = ConjunctiveGraph()

kg.parse(data=bioschemas_markup, format="turtle")
print(kg.serialize(format="turtle"))

@prefix ex: <http://bioschemas.validation.tutorial/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix schema: <http://schema.org/> .

ex:myTool a schema:SoftwareApplication,
        prov:SoftwareAgent ;
    schema:codeRepository <http://github.com/> ;
    schema:description "This tool does ... " ;
    schema:license <https://spdx.org/licenses/MIT.html> .




In [58]:
for s, p, o in kg.triples((None, None, None)):
    print(f'{s} --> {p} --> {o}')

http://bioschemas.validation.tutorial/myTool --> http://www.w3.org/1999/02/22-rdf-syntax-ns#type --> http://www.w3.org/ns/prov#SoftwareAgent
http://bioschemas.validation.tutorial/myTool --> http://www.w3.org/1999/02/22-rdf-syntax-ns#type --> http://schema.org/SoftwareApplication
http://bioschemas.validation.tutorial/myTool --> http://schema.org/license --> https://spdx.org/licenses/MIT.html
http://bioschemas.validation.tutorial/myTool --> http://schema.org/description --> This tool does ... 
http://bioschemas.validation.tutorial/myTool --> http://schema.org/codeRepository --> http://github.com/


# Step 2
## writing and executing a simple SHACL shape

a SHACL shape checking that a `schema:description` is provided

In [47]:
shape_ok = """
        @prefix ns: <https://fair-checker.france-bioinformatique.fr#> .
        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
        @prefix sc: <http://schema.org/> .
        @prefix bsc: <https://bioschemas.org/> .
        @prefix dct: <http://purl.org/dc/terms/> .
        @prefix sh: <http://www.w3.org/ns/shacl#> .
        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
        @prefix edam: <http://edamontology.org/> .
        @prefix biotools: <https://bio.tools/ontology/> .

        ns:shape_1 a sh:NodeShape ;
            sh:targetClass  sc:SoftwareApplication ;

            sh:property [
                sh:path sc:description ;
                sh:minCount 1 ;
                sh:severity sh:Violation
            ] ;
        .
    """

In [48]:
shape_graph = ConjunctiveGraph()
shape_graph.parse(data=shape_ok, format="turtle")

r = validate(
        data_graph=kg,
        data_graph_format="turtle",
        shacl_graph=shape_graph,
        shacl_graph_format="turtle",
    )

conforms, results_graph, results_text = r

print(results_text)

Validation Report
Conforms: True



Now, we verify that 
- a `schema:description`, AND a `schema:name` MUST BE present
- a `schema:citation` SHOULD BE present

In [49]:
shape_more_strict = """
        @prefix ns: <https://fair-checker.france-bioinformatique.fr#> .
        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
        @prefix sc: <http://schema.org/> .
        @prefix bsc: <https://bioschemas.org/> .
        @prefix dct: <http://purl.org/dc/terms/> .
        @prefix sh: <http://www.w3.org/ns/shacl#> .
        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
        @prefix edam: <http://edamontology.org/> .
        @prefix biotools: <https://bio.tools/ontology/> .

        ns:shape_1 a sh:NodeShape ;
            sh:targetClass  sc:SoftwareApplication ;

            sh:property [
                sh:path sc:description ;
                sh:minCount 1 ;
                sh:severity sh:Violation
            ] ;
            
            sh:property [
                sh:path sc:name ;
                sh:minCount 1 ;
                sh:severity sh:Violation
            ] ;
            
            sh:property [
                sh:path sc:citation ;
                sh:minCount 1 ;
                sh:severity sh:Warning
            ] ;
        .
    """

In [50]:
shape_graph = ConjunctiveGraph()
shape_graph.parse(data=shape_more_strict, format="turtle")

r = validate(
        data_graph=kg,
        data_graph_format="turtle",
        shacl_graph=shape_graph,
        shacl_graph_format="turtle",
    )

conforms, results_graph, results_text = r

print(results_text)

Validation Report
Conforms: False
Results (2):
Constraint Violation in MinCountConstraintComponent (http://www.w3.org/ns/shacl#MinCountConstraintComponent):
	Severity: sh:Violation
	Source Shape: [ sh:minCount Literal("1", datatype=xsd:integer) ; sh:path sc:name ; sh:severity sh:Violation ]
	Focus Node: ex:myTool
	Result Path: sc:name
	Message: Less than 1 values on ex:myTool->sc:name
Validation Result in MinCountConstraintComponent (http://www.w3.org/ns/shacl#MinCountConstraintComponent):
	Focus Node: ex:myTool
	Result Path: sc:citation
	Message: Less than 1 values on ex:myTool->sc:citation



We can obtain a human-readable report. 

What about a machine-readable report ? 

In [51]:
print(results_graph.serialize(format="turtle"))

@prefix sc: <http://schema.org/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a sh:ValidationReport ;
    sh:conforms false ;
    sh:result [ a sh:ValidationResult ;
            sh:focusNode <http://bioschemas.validation.tutorial/myTool> ;
            sh:resultMessage "Less than 1 values on ex:myTool->sc:citation" ;
            sh:resultPath sc:citation ;
            sh:sourceConstraintComponent sh:MinCountConstraintComponent ;
            sh:sourceShape [ sh:minCount 1 ;
                    sh:path sc:citation ;
        [ a sh:ValidationResult ;
            sh:focusNode <http://bioschemas.validation.tutorial/myTool> ;
            sh:resultMessage "Less than 1 values on ex:myTool->sc:name" ;
            sh:resultPath sc:name ;
            sh:resultSeverity sh:Violation ;
            sh:sourceConstraintComponent sh:MinCountConstraintComponent ;
            sh:sourceShape [ sh:minCount 1 ;
                    sh:path sc:name ;
     

We can query it and generate another a human-oriented representation. 

In [55]:
report_query = """
    SELECT ?node ?path ?severity WHERE {
        ?v rdf:type sh:ValidationReport ;
            sh:result ?r .
        ?r sh:focusNode ?node ;
            sh:sourceShape ?s .
                ?s sh:path ?path ;
                   sh:severity ?severity .
            }
        """

results = results_graph.query(report_query)

In [56]:
for r in results:
    if "#Warning" in r["severity"]:
        print(f'WARNING: Property {r["path"]} should be provided for {r["node"]}')
    if "#Violation" in r["severity"]:
        print(f'ERROR: Property {r["path"]} must be provided for {r["node"]}')

ERROR: Property http://schema.org/name must be provided for http://bioschemas.validation.tutorial/myTool


# Step 3
## Generate shapes  based on templates

In [60]:
minimal_properties = ["sc:name", "sc:description"]
recommended_properties = ["sc:citation", "sc:"]

shape_template = """
        @prefix ns: <https://fair-checker.france-bioinformatique.fr#> .
        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
        @prefix sc: <http://schema.org/> .
        @prefix bsc: <https://bioschemas.org/> .
        @prefix dct: <http://purl.org/dc/terms/> .
        @prefix sh: <http://www.w3.org/ns/shacl#> .
        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
        @prefix edam: <http://edamontology.org/> .
        @prefix biotools: <https://bio.tools/ontology/> .

        ns:shape_1 a sh:NodeShape ;
            sh:targetClass  sc:SoftwareApplication ;

            {% for min_prop in minimal_properties %}
            sh:property [
                sh:path {{min_prop}} ;
                sh:minCount 1 ;
                sh:severity sh:Violation
            ] ;
            {% endfor %}

            {% for rec_prop in recommended_properties %}
            sh:property [
                sh:path {{rec_prop}} ;
                sh:minCount 1 ;
                sh:severity sh:Warning
            ] ;
            {% endfor %}
        .
"""

template = Template(shape_template)
shape = template.render(
        minimal_properties=minimal_properties,
        recommended_properties=recommended_properties,
)

print(shape)


        @prefix ns: <https://fair-checker.france-bioinformatique.fr#> .
        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
        @prefix sc: <http://schema.org/> .
        @prefix bsc: <https://bioschemas.org/> .
        @prefix dct: <http://purl.org/dc/terms/> .
        @prefix sh: <http://www.w3.org/ns/shacl#> .
        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
        @prefix edam: <http://edamontology.org/> .
        @prefix biotools: <https://bio.tools/ontology/> .

        ns:shape_1 a sh:NodeShape ;
            sh:targetClass  sc:SoftwareApplication ;

            
            sh:property [
                sh:path sc:name ;
                sh:minCount 1 ;
                sh:severity sh:Violation
            ] ;
            
            sh:property [
                sh:path sc:description ;
                sh:minCount 1 ;
                sh:severity sh:Violation
            ] ;
         

# Step 4
## trying the bioschemas-shacl-validator

In [66]:
%cd ../bioschemas_shacl
!python main.py -u "https://bio.tools/jaspar"

/Users/gaignard-a/Documents/Dev/bioschemas/bioschemas-validation/bioschemas_shacl


Current google-chrome version is 96.0.4664
Get LATEST chromedriver version for 96.0.4664 google-chrome
Driver [/Users/gaignard-a/.wdm/drivers/chromedriver/mac64/96.0.4664.45/chromedriver] found in cache
[92m──────────── [0m[1;31mBioschemas validation for URL [0m[1;4;31mhttps://bio.tools/jaspar[0m[92m ────────────[0m
95 retrieved triples in KG
@prefix biotools: <https://bio.tools/ontology/> .
@prefix bsc: <http://bioschemas.org/> .
@prefix bsct: <http://bioschemas.org/types/> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix edam: <http://edamontology.org/> .
@prefix ns1: <http://ogp.me/ns#> .
@prefix sc: <http://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<file:///Users/gaignard-a/Documents/Dev/bioschemas/bioschemas-validation/bioschemas_shacl/> ns1:description "{{ngMeta['og:description']}}" ;
    ns1:image "{{ngMeta['og:image']}}" ;
    ns1:title "{{ngMeta['og:title']}

ERROR: Property http://schema.org/name must be provided for https://bio.tools/jaspar
ERROR: Property http://schema.org/description must be provided for https://bio.tools/jaspar
ERROR: Property http://schema.org/url must be provided for https://bio.tools/jaspar
[1m{[0m
    [32m'https://bio.tools/jaspar'[0m: [1m{[0m
        [32m'type'[0m: [32m'http://schema.org/SoftwareApplication'[0m,
            [32m'http://schema.org/additionalType for https://bio.tools/jaspar'[0m,
            [32m'http://schema.org/applicationCategory for [0m
[32mhttps://bio.tools/jaspar'[0m,
            [32m'http://schema.org/author for https://bio.tools/jaspar'[0m,
            [32m'http://schema.org/license for https://bio.tools/jaspar'[0m,
            [32m'http://schema.org/softwareVersion for https://bio.tools/jaspar'[0m
        [1m][0m,
        [32m'errors'[0m: [1m[[0m
            [32m'http://schema.org/name for https://bio.tools/jaspar'[0m,
            [32m'http://schema.org/descr

In [67]:
!python main.py -u "https://bgee.org/?page=gene&gene_id=ENSG00000274928"



Current google-chrome version is 96.0.4664
Get LATEST chromedriver version for 96.0.4664 google-chrome
Driver [/Users/gaignard-a/.wdm/drivers/chromedriver/mac64/96.0.4664.45/chromedriver] found in cache
[92m─ [0m[1;31mBioschemas validation for URL [0m[1;4;31mhttps://bgee.org/?[0m[1;4;31mpage[0m[1;4;31m=[0m[1;4;31mgene[0m[1;4;31m&[0m[1;4;31mgene_id[0m[1;4;31m=[0m[1;4;31mENSG00000…[0m[92m ─[0m
@prefix bsc: <https://bioschemas.org/> .
@prefix ns2: <http://www.w3.org/1999/xhtml/vocab#> .
@prefix sc: <http://schema.org/> .

<file:///Users/gaignard-a/Documents/Dev/bioschemas/bioschemas-validation/bioschemas_shacl/> ns2:license <https://creativecommons.org/publicdomain/zero/1.0/> .

<http://mar2016.archive.ensembl.org/Homo_sapiens/Gene/Summary?g=ENSG00000274928> a bsc:Gene .

<https://bgee.org/?page=species&species_id=9606> bsc:name "Homo sapiens" .

<https://oncomx.org/searchview/?gene=KRT89P> a bsc:Gene .

<https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id