diff --git a/pyshacl/cli.py b/pyshacl/cli.py index c28d62e..b8ac6c1 100644 --- a/pyshacl/cli.py +++ b/pyshacl/cli.py @@ -76,6 +76,13 @@ def __call__(self, parser, namespace, values, option_string=None): default=False, help='Enable features from the SHACL-JS Specification.', ) +parser.add_argument( + '--iterate-rules', + dest='iterate_rules', + action='store_true', + default=False, + help="Run Shape's SHACL Rules iteratively until the data_graph reaches a steady state.", +) parser.add_argument('--abort', dest='abort', action='store_true', default=False, help='Abort on first error.') parser.add_argument( '-d', '--debug', dest='debug', action='store_true', default=False, help='Output additional runtime messages.' @@ -151,6 +158,11 @@ def main(): validator_kwargs['advanced'] = True if args.js: validator_kwargs['js'] = True + if args.iterate_rules: + if not args.advanced: + sys.stderr.write("Iterate-Rules option only works when you enable Advanced Mode.\n") + else: + validator_kwargs['iterate_rules'] = True if args.abort: validator_kwargs['abort_on_error'] = True if args.shacl_file_format: diff --git a/pyshacl/extras/js/rules.py b/pyshacl/extras/js/rules.py index e119080..8498219 100644 --- a/pyshacl/extras/js/rules.py +++ b/pyshacl/extras/js/rules.py @@ -3,12 +3,14 @@ import typing from pyshacl.consts import SH +from pyshacl.errors import ReportableRuntimeError from pyshacl.rules.shacl_rule import SHACLRule from .js_executable import JSExecutable if typing.TYPE_CHECKING: + from pyshacl.pytypes import GraphLike from pyshacl.shape import Shape from pyshacl.shapes_graph import ShapesGraph @@ -18,26 +20,45 @@ class JSRule(SHACLRule): __slots__ = ('js_exe',) - def __init__(self, shape: 'Shape', rule_node): - super(JSRule, self).__init__(shape, rule_node) + def __init__(self, shape: 'Shape', rule_node, **kwargs): + super(JSRule, self).__init__(shape, rule_node, **kwargs) shapes_graph = shape.sg # type: ShapesGraph self.js_exe = JSExecutable(shapes_graph, rule_node) - def apply(self, data_graph): + def apply(self, data_graph: 'GraphLike') -> int: focus_nodes = self.shape.focus_nodes(data_graph) # uses target nodes to find focus nodes - applicable_nodes = self.filter_conditions(focus_nodes, data_graph) - sets_to_add = [] - for a in applicable_nodes: - args_map = {"this": a} - results = self.js_exe.execute(data_graph, args_map, mode="construct") - triples = results['_result'] - if triples is not None and isinstance(triples, (list, tuple)): - set_to_add = set() - for t in triples: - s, p, o = t[:3] - set_to_add.add((s, p, o)) - sets_to_add.append(set_to_add) - for s in sets_to_add: - for t in s: - data_graph.add(t) - return + all_added = 0 + iterate_limit = 100 + while True: + if iterate_limit < 1: + raise ReportableRuntimeError("Local rule iteration exceeded iteration limit of 100.") + iterate_limit -= 1 + added = 0 + applicable_nodes = self.filter_conditions(focus_nodes, data_graph) + sets_to_add = [] + for a in applicable_nodes: + args_map = {"this": a} + results = self.js_exe.execute(data_graph, args_map, mode="construct") + triples = results['_result'] + this_added = False + if triples is not None and isinstance(triples, (list, tuple)): + set_to_add = set() + for t in triples: + s, p, o = tr = t[:3] + if not this_added and tr not in data_graph: + this_added = True + set_to_add.add(tr) + sets_to_add.append(set_to_add) + if this_added: + added += 1 + if added > 0: + all_added += added + for s in sets_to_add: + for t in s: + data_graph.add(t) + if self.iterate: + continue # Jump up to iterate + else: + break # Don't iterate + break + return all_added diff --git a/pyshacl/rules/__init__.py b/pyshacl/rules/__init__.py index 4ffa6b0..a32400e 100644 --- a/pyshacl/rules/__init__.py +++ b/pyshacl/rules/__init__.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, Union from pyshacl.consts import RDF_type, SH_rule, SH_SPARQLRule, SH_TripleRule -from pyshacl.errors import RuleLoadError +from pyshacl.errors import ReportableRuntimeError, RuleLoadError from pyshacl.pytypes import GraphLike from pyshacl.rules.sparql import SPARQLRule from pyshacl.rules.triple import TripleRule @@ -16,7 +16,7 @@ from .shacl_rule import SHACLRule -def gather_rules(shacl_graph: 'ShapesGraph') -> Dict['Shape', List['SHACLRule']]: +def gather_rules(shacl_graph: 'ShapesGraph', iterate_rules=False) -> Dict['Shape', List['SHACLRule']]: """ :param shacl_graph: @@ -63,7 +63,7 @@ def gather_rules(shacl_graph: 'ShapesGraph') -> Dict['Shape', List['SHACLRule']] "https://www.w3.org/TR/shacl-af/#rules-syntax", ) if obj in triple_rule_nodes: - rule: SHACLRule = TripleRule(shape, obj) + rule: SHACLRule = TripleRule(shape, obj, iterate=iterate_rules) elif obj in sparql_rule_nodes: rule = SPARQLRule(shape, obj) elif use_JSRule and callable(use_JSRule) and obj in js_rule_nodes: @@ -77,13 +77,29 @@ def gather_rules(shacl_graph: 'ShapesGraph') -> Dict['Shape', List['SHACLRule']] return ret_rules -def apply_rules(shapes_rules: Dict, data_graph: GraphLike): +def apply_rules(shapes_rules: Dict, data_graph: GraphLike, iterate=False) -> int: # short the shapes dict by shapes sh:order before execution sorted_shapes_rules: List[Tuple[Any, Any]] = sorted(shapes_rules.items(), key=lambda x: x[0].order) + total_modified = 0 for shape, rules in sorted_shapes_rules: # sort the rules by the sh:order before execution rules = sorted(rules, key=lambda x: x.order) - for r in rules: - if r.deactivated: - continue - r.apply(data_graph) + iterate_limit = 100 + while True: + if iterate_limit < 1: + raise ReportableRuntimeError("SHACL Shape Rule iteration exceeded iteration limit of 100.") + iterate_limit -= 1 + this_modified = 0 + for r in rules: + if r.deactivated: + continue + n_modified = r.apply(data_graph) + this_modified += n_modified + if this_modified > 0: + total_modified += this_modified + if iterate: + continue + else: + break + break + return total_modified diff --git a/pyshacl/rules/shacl_rule.py b/pyshacl/rules/shacl_rule.py index f366554..dbdcfcf 100644 --- a/pyshacl/rules/shacl_rule.py +++ b/pyshacl/rules/shacl_rule.py @@ -25,9 +25,9 @@ def validate_condition(self, data_graph, focus_node): class SHACLRule(object): - __slots__ = ("shape", "node", "_deactivated") + __slots__ = ("shape", "node", "iterate", "_deactivated") - def __init__(self, shape, rule_node): + def __init__(self, shape, rule_node, iterate=False): """ :param shape: @@ -38,6 +38,8 @@ def __init__(self, shape, rule_node): super(SHACLRule, self).__init__() self.shape = shape self.node = rule_node + self.iterate = False + deactivated_nodes = list(self.shape.sg.objects(self.node, SH_deactivated)) self._deactivated = len(deactivated_nodes) > 0 and bool(deactivated_nodes[0]) diff --git a/pyshacl/rules/sparql/__init__.py b/pyshacl/rules/sparql/__init__.py index 85b7a83..bac6d79 100644 --- a/pyshacl/rules/sparql/__init__.py +++ b/pyshacl/rules/sparql/__init__.py @@ -15,6 +15,7 @@ if TYPE_CHECKING: + from pyshacl.pytypes import GraphLike from pyshacl.shape import Shape XSD_string = XSD.term('string') @@ -23,7 +24,7 @@ class SPARQLRule(SHACLRule): __slots__ = ("_constructs", "_qh") - def __init__(self, shape: 'Shape', rule_node: 'rdflib.term.Identifier'): + def __init__(self, shape: 'Shape', rule_node: 'rdflib.term.Identifier', **kwargs): """ :param shape: @@ -31,7 +32,7 @@ def __init__(self, shape: 'Shape', rule_node: 'rdflib.term.Identifier'): :param rule_node: :type rule_node: rdflib.term.Identifier """ - super(SPARQLRule, self).__init__(shape, rule_node) + super(SPARQLRule, self).__init__(shape, rule_node, **kwargs) construct_nodes = set(self.shape.sg.objects(self.node, SH_construct)) if len(construct_nodes) < 1: raise RuleLoadError("No sh:construct on SPARQLRule", "https://www.w3.org/TR/shacl-af/#SPARQLRule") @@ -49,21 +50,44 @@ def __init__(self, shape: 'Shape', rule_node: 'rdflib.term.Identifier'): query_helper.collect_prefixes() self._qh = query_helper - def apply(self, data_graph): + def apply(self, data_graph: 'GraphLike') -> int: focus_nodes = self.shape.focus_nodes(data_graph) # uses target nodes to find focus nodes - applicable_nodes = self.filter_conditions(focus_nodes, data_graph) - construct_graphs = set() + all_added = 0 SPARQLQueryHelper = get_query_helper_cls() - for a in applicable_nodes: - for c in self._constructs: - init_bindings = {} - found_this = SPARQLQueryHelper.bind_this_regex.search(c) - if found_this: - init_bindings['this'] = a - c = self._qh.apply_prefixes(c) - results = data_graph.query(c, initBindings=init_bindings) - if results.type != "CONSTRUCT": - raise ReportableRuntimeError("Query executed by a SHACL SPARQLRule must be CONSTRUCT query.") - construct_graphs.add(results.graph) - for g in construct_graphs: - data_graph = clone_graph(g, target_graph=data_graph) + iterate_limit = 100 + while True: + if iterate_limit < 1: + raise ReportableRuntimeError("Local rule iteration exceeded iteration limit of 100.") + iterate_limit -= 1 + added = 0 + applicable_nodes = self.filter_conditions(focus_nodes, data_graph) + construct_graphs = set() + for a in applicable_nodes: + for c in self._constructs: + init_bindings = {} + found_this = SPARQLQueryHelper.bind_this_regex.search(c) + if found_this: + init_bindings['this'] = a + c = self._qh.apply_prefixes(c) + results = data_graph.query(c, initBindings=init_bindings) + if results.type != "CONSTRUCT": + raise ReportableRuntimeError("Query executed by a SHACL SPARQLRule must be CONSTRUCT query.") + this_added = False + for i in results.graph: + if not this_added and i not in data_graph: + this_added = True + # We only need to know at least one triple was added, then break! + break + if this_added: + added += 1 + construct_graphs.add(results.graph) + if added > 0: + for g in construct_graphs: + data_graph = clone_graph(g, target_graph=data_graph) + all_added += added + if self.iterate: + continue # Jump up to iterate + else: + break # Don't iterate + break # We've reached a local steady state + return all_added diff --git a/pyshacl/rules/triple/__init__.py b/pyshacl/rules/triple/__init__.py index f5cbf5c..76fcb67 100644 --- a/pyshacl/rules/triple/__init__.py +++ b/pyshacl/rules/triple/__init__.py @@ -31,7 +31,7 @@ class TripleRule(SHACLRule): __slots__ = ("s", "p", "o") - def __init__(self, shape: 'Shape', rule_node: 'rdflib.term.Identifier'): + def __init__(self, shape: 'Shape', rule_node: 'rdflib.term.Identifier', **kwargs): """ :param shape: @@ -39,7 +39,7 @@ def __init__(self, shape: 'Shape', rule_node: 'rdflib.term.Identifier'): :param rule_node: :type rule_node: rdflib.term.Identifier """ - super(TripleRule, self).__init__(shape, rule_node) + super(TripleRule, self).__init__(shape, rule_node, **kwargs) my_subject_nodes = set(self.shape.sg.objects(self.node, SH_subject)) if len(my_subject_nodes) < 1: raise RuntimeError("No sh:subject") @@ -183,13 +183,36 @@ def get_nodes_from_node_expression( else: raise NotImplementedError("Unsupported expression s, p, or o, in SHACL TripleRule") - def apply(self, data_graph): + def apply(self, data_graph: 'GraphLike') -> int: focus_nodes = self.shape.focus_nodes(data_graph) # uses target nodes to find focus nodes applicable_nodes = self.filter_conditions(focus_nodes, data_graph) - for a in applicable_nodes: - s_set = self.get_nodes_from_node_expression(self.s, a, data_graph) - p_set = self.get_nodes_from_node_expression(self.p, a, data_graph) - o_set = self.get_nodes_from_node_expression(self.o, a, data_graph) - new_triples = itertools.product(s_set, p_set, o_set) - for i in iter(new_triples): - data_graph.add(i) + all_added = 0 + iterate_limit = 100 + while True: + if iterate_limit < 1: + raise ReportableRuntimeError("sh:rule iteration exceeded iteration limit of 100.") + iterate_limit -= 1 + added = 0 + to_add = [] + for a in applicable_nodes: + s_set = self.get_nodes_from_node_expression(self.s, a, data_graph) + p_set = self.get_nodes_from_node_expression(self.p, a, data_graph) + o_set = self.get_nodes_from_node_expression(self.o, a, data_graph) + new_triples = itertools.product(s_set, p_set, o_set) + this_added = False + for i in iter(new_triples): + if not this_added and i not in data_graph: + this_added = True + to_add.append(i) + if this_added: + added += 1 + if added > 0: + for i in to_add: + data_graph.add(i) + all_added += added + if self.iterate: + continue # Jump up to iterate + else: + break # Don't iterate + break + return all_added diff --git a/pyshacl/validate.py b/pyshacl/validate.py index 01450fb..9065e8f 100644 --- a/pyshacl/validate.py +++ b/pyshacl/validate.py @@ -69,6 +69,7 @@ def _load_default_options(cls, options_dict: dict): options_dict.setdefault('inference', 'none') options_dict.setdefault('inplace', False) options_dict.setdefault('use_js', False) + options_dict.setdefault('iterate_rules', False) options_dict.setdefault('abort_on_error', False) if 'logger' not in options_dict: options_dict['logger'] = logging.getLogger(__name__) @@ -221,10 +222,13 @@ def run(self): self._target_graph = the_target_graph shapes = self.shacl_graph.shapes # This property getter triggers shapes harvest. - + iterate_rules = self.options.get("iterate_rules", False) if self.options['advanced']: target_types = gather_target_types(self.shacl_graph) - advanced = {'functions': gather_functions(self.shacl_graph), 'rules': gather_rules(self.shacl_graph)} + advanced = { + 'functions': gather_functions(self.shacl_graph), + 'rules': gather_rules(self.shacl_graph, iterate_rules=iterate_rules), + } for s in shapes: s.set_advanced(True) apply_target_types(target_types) @@ -245,7 +249,7 @@ def run(self): for g in named_graphs: if advanced: apply_functions(advanced['functions'], g) - apply_rules(advanced['rules'], g) + apply_rules(advanced['rules'], g, iterate=iterate_rules) for s in shapes: _is_conform, _reports = s.validate(g) non_conformant = non_conformant or (not _is_conform) @@ -372,6 +376,7 @@ def validate( ) rdflib_bool_unpatch() use_js = kwargs.pop('js', None) + iterate_rules = kwargs.pop('iterate_rules', False) validator = None try: validator = Validator( @@ -383,6 +388,7 @@ def validate( 'inplace': inplace, 'abort_on_error': abort_on_error, 'advanced': advanced, + 'iterate_rules': iterate_rules, 'use_js': use_js, 'logger': log, }, diff --git a/test/issues/test_076.py b/test/issues/test_076.py new file mode 100644 index 0000000..e04c659 --- /dev/null +++ b/test/issues/test_076.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# +""" +https://github.com/RDFLib/pySHACL/issues/76 +""" +import rdflib + +from pyshacl import validate + +shacl_file_text = ''' +@prefix rdf: . +@prefix rdfs: . +@prefix sh: . +@prefix skos: . +@prefix xml: . +@prefix xsd: . +@prefix owl: . +@prefix : . + + + rdf:type owl:Ontology ; + owl:imports ; + sh:declare [ + sh:prefix "" ; + sh:namespace "http://example.com/issue/076#"^^xsd:anyURI ; + ] . + +:TopConceptRule + a sh:NodeShape ; + sh:property [ + sh:path skos:topConceptOf ; + sh:minCount 1 ; + ] . + +:DepthRule + a sh:NodeShape ; + sh:targetClass skos:Concept ; + sh:rule [ + a sh:SPARQLRule ; + sh:prefixes skos:, : ; + sh:order 1 ; + sh:condition :TopConceptRule ; + sh:construct """ + CONSTRUCT { + $this :hasDepth 0 . + } + WHERE { + } + """ ; + ] ; + sh:rule [ + a sh:SPARQLRule ; + sh:prefixes skos:, : ; + sh:order 2 ; + sh:construct """ + CONSTRUCT { + $this :hasDepth ?plusOne . + } + WHERE { + $this skos:broader ?parent . + ?parent :hasDepth ?depth . + bind(?depth + 1 as ?plusOne) + } + """ ; + ] . +''' + +data_file_text = """ +@prefix rdf: . +@prefix rdfs: . +@prefix skos: . +@prefix dct: . +@prefix xml: . +@prefix xsd: . +@prefix ex: . + +ex:animalsVocabulary rdf:type skos:ConceptScheme; + dct:title "Animals Vocabulary"@en; + skos:hasTopConcept ex:animals . + +ex:animals rdf:type skos:Concept; + skos:prefLabel "animals"@en; + skos:inScheme ex:animalsVocabulary; + skos:topConceptOf ex:animalsVocabulary . + +ex:cat rdf:type skos:Concept; + skos:prefLabel "cat"@en; + skos:broader ex:animals ; + skos:inScheme ex:animalsVocabulary. + +ex:wildcat a skos:Concept; + skos:inScheme ex:animalsVocabulary; + skos:broader ex:cat . + +ex:europeanWildcat a skos:Concept; + skos:inScheme ex:animalsVocabulary; + skos:broader ex:wildcat . +""" + +def test_076_positive(): + data = rdflib.Graph() + data.parse(data=data_file_text, format="turtle") + res = validate(data, shacl_graph=shacl_file_text, + data_graph_format='turtle', shacl_graph_format='turtle', + inference='rdfs', inplace=True, advanced=True, iterate_rules=True, debug=True) + conforms, graph, string = res + find_s = rdflib.URIRef("http://example.com#europeanWildcat") + find_p = rdflib.URIRef("http://example.com/issue/076#hasDepth") + find_o = rdflib.Literal(3) + assert (find_s, find_p, find_o) in data + +def test_076_negative(): + data = rdflib.Graph() + data.parse(data=data_file_text, format="turtle") + res = validate(data, shacl_graph=shacl_file_text, + data_graph_format='turtle', shacl_graph_format='turtle', + inference='rdfs', inplace=True, advanced=True, iterate_rules=False, debug=True) + conforms, graph, string = res + find_s = rdflib.URIRef("http://example.com#europeanWildcat") + find_p = rdflib.URIRef("http://example.com/issue/076#hasDepth") + find_o = rdflib.Literal(3) + assert (find_s, find_p, find_o) not in data + +if __name__ == "__main__": + test_076_positive() + test_076_negative()