From e8c3a0e5472c86183debb672f682f047edb47f5f Mon Sep 17 00:00:00 2001 From: Iwan Aucamp Date: Mon, 11 Apr 2022 18:50:03 +0200 Subject: [PATCH] Add two xfails related to Example 2 from RDF 1.1 TriG specification The first xfail occurs during round tripping, TriG seems to be making some mistake when encoding blank nodes, as it is encoding that "Bob" knows someone who does not exist. This was reported by @gjhiggins in https://github.com/RDFLib/rdflib/pull/1796#discussion_r846781069 The second xfail seems to be related to hextuple parsing, when comparing the hextuple parsed result of Example 2 with the TriG parsed graph of Example 2 the graphs are not isomorphic more than 70% of the time, but sometimes they are isomorphic. Inoticed this while adding the xfail for the issue @gjhiggins noticed. Other changes: - Added `simple_quad` to variants tests with HexTuple and TriG format. - Added an additional exact_match assert for variants which can be used to sidestep some of the known issues with isomorphic graph detection. This is useful for graphs with no BNodes. - Also added round-tripping for `variants/simple_quad.trig`. - Various changes to ensure determensitic ordering so that it is easier to compare things visually and so that tests always do the exact same thing in the exact same order. --- .editorconfig | 2 +- test/test_graph/test_variants.py | 40 ++++++++++++++++--- test/test_roundtrip.py | 24 ++++++++++++ test/testutils.py | 22 ++++++----- test/variants/README.md | 2 +- test/variants/rdf11trig_eg2-asserts.json | 3 ++ test/variants/rdf11trig_eg2.hext | 7 ++++ test/variants/rdf11trig_eg2.jsonld | 43 +++++++++++++++++++++ test/variants/rdf11trig_eg2.nq | 7 ++++ test/variants/rdf11trig_eg2.trig | 26 +++++++++++++ test/variants/rdf_prefix-asserts.json | 3 ++ test/variants/schema_only_base-asserts.json | 3 +- test/variants/simple_quad-asserts.json | 4 ++ test/variants/simple_quad.hext | 1 + test/variants/simple_quad.jsonld | 15 +++++++ test/variants/simple_quad.nq | 1 + test/variants/simple_quad.trig | 5 +++ test/variants/simple_triple-asserts.json | 3 +- test/variants/simple_triple.nq | 1 + test/variants/simple_triple.nt | 1 + test/variants/special_chars-asserts.json | 3 +- test/variants/xml_literal-asserts.json | 3 +- 22 files changed, 198 insertions(+), 21 deletions(-) create mode 100644 test/variants/rdf11trig_eg2-asserts.json create mode 100644 test/variants/rdf11trig_eg2.hext create mode 100644 test/variants/rdf11trig_eg2.jsonld create mode 100644 test/variants/rdf11trig_eg2.nq create mode 100644 test/variants/rdf11trig_eg2.trig create mode 100644 test/variants/rdf_prefix-asserts.json create mode 100644 test/variants/simple_quad-asserts.json create mode 100644 test/variants/simple_quad.hext create mode 100644 test/variants/simple_quad.jsonld create mode 100644 test/variants/simple_quad.nq create mode 100644 test/variants/simple_quad.trig create mode 100644 test/variants/simple_triple.nq create mode 100644 test/variants/simple_triple.nt diff --git a/.editorconfig b/.editorconfig index e18c0788d..a044bc5c9 100644 --- a/.editorconfig +++ b/.editorconfig @@ -20,7 +20,7 @@ trim_trailing_whitespace = false [*.{js,py,pyi,toml,yml,yaml}] charset = utf-8 -[*.{yaml,yml,json}] +[*.{yaml,yml,json,jsonld}] indent_style = space indent_size = 2 diff --git a/test/test_graph/test_variants.py b/test/test_graph/test_variants.py index 1a291e0dd..57221d5bb 100644 --- a/test/test_graph/test_variants.py +++ b/test/test_graph/test_variants.py @@ -12,6 +12,7 @@ Iterable, List, Optional, + OrderedDict, Pattern, Tuple, Union, @@ -45,10 +46,13 @@ class GraphAsserts: """ quad_count: Optional[int] = None + exact_match: bool = False - def check(self, graph: ConjunctiveGraph) -> None: + def check(self, first_graph: Optional[ConjunctiveGraph], graph: ConjunctiveGraph) -> None: if self.quad_count is not None: assert self.quad_count == len(list(graph.quads())) + if first_graph is not None and self.exact_match: + GraphHelper.assert_quad_sets_equals(first_graph, graph) @dataclass(order=True) @@ -58,7 +62,7 @@ class GraphVariants: """ key: str - variants: Dict[str, Path] = field(default_factory=dict) + variants: Dict[str, Path] = field(default_factory=OrderedDict) asserts: GraphAsserts = field(default_factory=lambda: GraphAsserts()) _variant_regex: ClassVar[Pattern[str]] = re.compile( @@ -135,6 +139,29 @@ def for_directory( reason="Some issue with handling base URI that does not end with a slash", raises=ValueError, ), + ("variants/rdf11trig_eg2"): pytest.mark.xfail( + reason=""" + This fails randomly, passing less than 10% of the time, and always failing + with comparing hext against trig. Not clear why, it may be a big with hext + parsing. + + AssertionError: checking rdf11trig_eg2.hext against rdf11trig_eg2.trig + in both: + (rdflib.term.BNode('cb0'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/mbox'), rdflib.term.URIRef('mailto:bob@oldcorp.example.org')) + (rdflib.term.BNode('cb0'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name'), rdflib.term.Literal('Bob')) + (rdflib.term.URIRef('http://example.org/bob'), rdflib.term.URIRef('http://purl.org/dc/terms/publisher'), rdflib.term.Literal('Bob')) + (rdflib.term.URIRef('http://example.org/alice'), rdflib.term.URIRef('http://purl.org/dc/terms/publisher'), rdflib.term.Literal('Alice')) + only in first: + (rdflib.term.BNode('cb0'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/knows'), rdflib.term.BNode('cbb5eb12b5dcf688537b0298cce144c6dd68cf047530d0b4a455a8f31f314244fd')) + (rdflib.term.BNode('cbb5eb12b5dcf688537b0298cce144c6dd68cf047530d0b4a455a8f31f314244fd'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/mbox'), rdflib.term.URIRef('mailto:alice@work.example.org')) + (rdflib.term.BNode('cbb5eb12b5dcf688537b0298cce144c6dd68cf047530d0b4a455a8f31f314244fd'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name'), rdflib.term.Literal('Alice')) + only in second: + (rdflib.term.BNode('cb0'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/knows'), rdflib.term.BNode('cbcd41774964510991c01701d8430149bc373e1f23734d9c938c81a40b1429aa33')) + (rdflib.term.BNode('cbcd41774964510991c01701d8430149bc373e1f23734d9c938c81a40b1429aa33'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/mbox'), rdflib.term.URIRef('mailto:alice@work.example.org')) + (rdflib.term.BNode('cbcd41774964510991c01701d8430149bc373e1f23734d9c938c81a40b1429aa33'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name'), rdflib.term.Literal('Alice')) + """, + raises=AssertionError, + ), } @@ -164,7 +191,8 @@ def test_variants(graph_variant: GraphVariants) -> None: logging.debug("graph_variant = %s", graph_variant) public_id = URIRef(f"example:{graph_variant.key}") assert len(graph_variant.variants) > 0 - first_graph: Optional[Graph] = None + first_graph: Optional[ConjunctiveGraph] = None + first_path: Optional[Path] = None for variant_key, variant_path in graph_variant.variants.items(): logging.debug("variant_path = %s", variant_path) format = guess_format(variant_path.name, fmap=SUFFIX_FORMAT_MAP) @@ -175,8 +203,10 @@ def test_variants(graph_variant: GraphVariants) -> None: # opinions of when a bare string is of datatype XSD.string or not. # Probably something that needs more investigation. GraphHelper.strip_literal_datatypes(graph, {XSD.string}) - graph_variant.asserts.check(graph) + graph_variant.asserts.check(first_graph, graph) if first_graph is None: first_graph = graph + first_path = variant_path else: - GraphHelper.assert_isomorphic(first_graph, graph) + assert first_path is not None + GraphHelper.assert_isomorphic(first_graph, graph, f"checking {variant_path.relative_to(VARIANTS_DIR)} against {first_path.relative_to(VARIANTS_DIR)}") diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py index 16daec7eb..697dd6105 100644 --- a/test/test_roundtrip.py +++ b/test/test_roundtrip.py @@ -142,6 +142,28 @@ reason="results in invalid xml element name: ", raises=SAXParseException, ), + ("trig", "rdf11trig_eg2.trig"): pytest.mark.xfail( + reason=""" + Something is going wrong here with blank node serialization. In the second + graph below bob knows someone who does not exist, while in first he knows + someone that does exist and has the name Alice. + + AssertionError: in both: + (rdflib.term.BNode('cbb5eb12b5dcf688537b0298cce144c6dd68cf047530d0b4a455a8f31f314244fd'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/mbox'), rdflib.term.URIRef('mailto:alice@work.example.org')) + (rdflib.term.BNode('cbb5eb12b5dcf688537b0298cce144c6dd68cf047530d0b4a455a8f31f314244fd'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name'), rdflib.term.Literal('Alice')) + (rdflib.term.URIRef('http://example.org/alice'), rdflib.term.URIRef('http://purl.org/dc/terms/publisher'), rdflib.term.Literal('Alice')) + (rdflib.term.URIRef('http://example.org/bob'), rdflib.term.URIRef('http://purl.org/dc/terms/publisher'), rdflib.term.Literal('Bob')) + only in first: + (rdflib.term.BNode('cb0'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/knows'), rdflib.term.BNode('cbb5eb12b5dcf688537b0298cce144c6dd68cf047530d0b4a455a8f31f314244fd')) + (rdflib.term.BNode('cb0'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/mbox'), rdflib.term.URIRef('mailto:bob@oldcorp.example.org')) + (rdflib.term.BNode('cb0'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name'), rdflib.term.Literal('Bob')) + only in second: + (rdflib.term.BNode('cb7be1d0397a49ddd4ae8aa96acc7b6135903c5f3fa5e47bf619c0e4b438aafcc1'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/knows'), rdflib.term.BNode('cb0')) + (rdflib.term.BNode('cb7be1d0397a49ddd4ae8aa96acc7b6135903c5f3fa5e47bf619c0e4b438aafcc1'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/mbox'), rdflib.term.URIRef('mailto:bob@oldcorp.example.org')) + (rdflib.term.BNode('cb7be1d0397a49ddd4ae8aa96acc7b6135903c5f3fa5e47bf619c0e4b438aafcc1'), rdflib.term.URIRef('http://xmlns.com/foaf/0.1/name'), rdflib.term.Literal('Bob')) + """, + raises=AssertionError, + ), } # This is for files which can only be represented properly in one format @@ -267,6 +289,8 @@ def test_n3(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Pat (TEST_DIR / "variants" / "special_chars.nt", "ntriples"), (TEST_DIR / "variants" / "xml_literal.rdf", "xml"), (TEST_DIR / "variants" / "rdf_prefix.jsonld", "json-ld"), + (TEST_DIR / "variants" / "simple_quad.trig", "trig"), + (TEST_DIR / "variants" / "rdf11trig_eg2.trig", "trig"), ] diff --git a/test/testutils.py b/test/testutils.py index 2d4500700..ec8324121 100644 --- a/test/testutils.py +++ b/test/testutils.py @@ -191,36 +191,38 @@ def assert_sets_equals( @classmethod def format_set( - cls, item_set: Union[IdentifierQuadSet, IdentifierTripleSet], prefix: str = " " + cls, item_set: Union[IdentifierQuadSet, IdentifierTripleSet], prefix: str = " ", sort: bool = False ) -> str: items = [] - for item in item_set: + use_item_set = sorted(item_set) if sort else item_set + for item in use_item_set: items.append(f"{prefix}{item}") return "\n".join(items) @classmethod - def format_graph_set(cls, graph: Graph, prefix: str = " ") -> str: - return cls.format_set(cls.triple_or_quad_set(graph), prefix) + def format_graph_set(cls, graph: Graph, prefix: str = " ", sort: bool = False) -> str: + return cls.format_set(cls.triple_or_quad_set(graph), prefix, sort) @classmethod - def assert_isomorphic(cls, lhs: Graph, rhs: Graph) -> None: + def assert_isomorphic(cls, lhs: Graph, rhs: Graph, message: Optional[str] = None) -> None: """ This asserts that the two graphs are isomorphic, providing a nicely formatted error message if they are not. """ - def format_report() -> str: + def format_report(message: Optional[str] = None) -> str: in_both, in_lhs, in_rhs = rdflib.compare.graph_diff(lhs, rhs) + preamle = "" if message is None else f"{message}\n" return ( - "in both:\n" + f"{preamle}in both:\n" f"{cls.format_graph_set(in_both)}" "\nonly in first:\n" - f"{cls.format_graph_set(in_lhs)}" + f"{cls.format_graph_set(in_lhs, sort = True)}" "\nonly in second:\n" - f"{cls.format_graph_set(in_rhs)}" + f"{cls.format_graph_set(in_rhs, sort = True)}" ) - assert rdflib.compare.isomorphic(lhs, rhs), format_report() + assert rdflib.compare.isomorphic(lhs, rhs), format_report(message) @classmethod def strip_literal_datatypes(cls, graph: Graph, datatypes: Set[URIRef]) -> None: diff --git a/test/variants/README.md b/test/variants/README.md index dce13c0e9..28eea67db 100644 --- a/test/variants/README.md +++ b/test/variants/README.md @@ -18,4 +18,4 @@ test/variants/literal_with_lang.ttl Some additional assertions on graphs can be specified in file names that end with `-asserts.json`, for details on supported asserts see -`test/test_variants.py`. +`test/test_graph/test_variants.py`. diff --git a/test/variants/rdf11trig_eg2-asserts.json b/test/variants/rdf11trig_eg2-asserts.json new file mode 100644 index 000000000..4a62d909f --- /dev/null +++ b/test/variants/rdf11trig_eg2-asserts.json @@ -0,0 +1,3 @@ +{ + "quad_count": 7 +} diff --git a/test/variants/rdf11trig_eg2.hext b/test/variants/rdf11trig_eg2.hext new file mode 100644 index 000000000..d865ac0fd --- /dev/null +++ b/test/variants/rdf11trig_eg2.hext @@ -0,0 +1,7 @@ +["_:b", "http://xmlns.com/foaf/0.1/mbox", "mailto:alice@work.example.org", "globalId", "", "http://example.org/alice"] +["_:b", "http://xmlns.com/foaf/0.1/name", "Alice", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.org/alice"] +["_:a", "http://xmlns.com/foaf/0.1/mbox", "mailto:bob@oldcorp.example.org", "globalId", "", "http://example.org/bob"] +["_:a", "http://xmlns.com/foaf/0.1/name", "Bob", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.org/bob"] +["_:a", "http://xmlns.com/foaf/0.1/knows", "_:b", "localId", "", "http://example.org/bob"] +["http://example.org/bob", "http://purl.org/dc/terms/publisher", "Bob", "http://www.w3.org/2001/XMLSchema#string", "", ""] +["http://example.org/alice", "http://purl.org/dc/terms/publisher", "Alice", "http://www.w3.org/2001/XMLSchema#string", "", ""] diff --git a/test/variants/rdf11trig_eg2.jsonld b/test/variants/rdf11trig_eg2.jsonld new file mode 100644 index 000000000..2623e40ac --- /dev/null +++ b/test/variants/rdf11trig_eg2.jsonld @@ -0,0 +1,43 @@ +{ + "@graph": [ + { + "@graph": [ + { + "@id": "http://example.org/bob", + "http://purl.org/dc/terms/publisher": "Bob" + }, + { + "@id": "http://example.org/alice", + "http://purl.org/dc/terms/publisher": "Alice" + } + ] + }, + { + "@graph": [ + { + "@id": "_:a", + "http://xmlns.com/foaf/0.1/knows": { + "@id": "_:b" + }, + "http://xmlns.com/foaf/0.1/mbox": { + "@id": "mailto:bob@oldcorp.example.org" + }, + "http://xmlns.com/foaf/0.1/name": "Bob" + } + ], + "@id": "http://example.org/bob" + }, + { + "@graph": [ + { + "@id": "_:b", + "http://xmlns.com/foaf/0.1/mbox": { + "@id": "mailto:alice@work.example.org" + }, + "http://xmlns.com/foaf/0.1/name": "Alice" + } + ], + "@id": "http://example.org/alice" + } + ] +} diff --git a/test/variants/rdf11trig_eg2.nq b/test/variants/rdf11trig_eg2.nq new file mode 100644 index 000000000..c998bdd67 --- /dev/null +++ b/test/variants/rdf11trig_eg2.nq @@ -0,0 +1,7 @@ +_:a _:b . +_:a . +_:a "Bob" . +_:b . +_:b "Alice" . + "Alice" . + "Bob" . diff --git a/test/variants/rdf11trig_eg2.trig b/test/variants/rdf11trig_eg2.trig new file mode 100644 index 000000000..ee201e864 --- /dev/null +++ b/test/variants/rdf11trig_eg2.trig @@ -0,0 +1,26 @@ +# from example 2 in https://www.w3.org/TR/trig/#sec-graph-statements + +# This document contains a default graph and two named graphs. + +@prefix rdf: . +@prefix dc: . +@prefix foaf: . + +# default graph + { + dc:publisher "Bob" . + dc:publisher "Alice" . + } + + + { + _:a foaf:name "Bob" . + _:a foaf:mbox . + _:a foaf:knows _:b . + } + + + { + _:b foaf:name "Alice" . + _:b foaf:mbox . + } diff --git a/test/variants/rdf_prefix-asserts.json b/test/variants/rdf_prefix-asserts.json new file mode 100644 index 000000000..cbcf60043 --- /dev/null +++ b/test/variants/rdf_prefix-asserts.json @@ -0,0 +1,3 @@ +{ + "quad_count": 3 +} diff --git a/test/variants/schema_only_base-asserts.json b/test/variants/schema_only_base-asserts.json index 6e4a630d8..c228d02c9 100644 --- a/test/variants/schema_only_base-asserts.json +++ b/test/variants/schema_only_base-asserts.json @@ -1,3 +1,4 @@ { - "quad_count": 4 + "quad_count": 4, + "exact_match": true } diff --git a/test/variants/simple_quad-asserts.json b/test/variants/simple_quad-asserts.json new file mode 100644 index 000000000..00fc24eb4 --- /dev/null +++ b/test/variants/simple_quad-asserts.json @@ -0,0 +1,4 @@ +{ + "quad_count": 1, + "exact_match": true +} diff --git a/test/variants/simple_quad.hext b/test/variants/simple_quad.hext new file mode 100644 index 000000000..b05702d34 --- /dev/null +++ b/test/variants/simple_quad.hext @@ -0,0 +1 @@ +["http://example.org/subject", "http://example.org/predicate", "http://example.org/object", "globalId", "", "http://example.org/graph"] diff --git a/test/variants/simple_quad.jsonld b/test/variants/simple_quad.jsonld new file mode 100644 index 000000000..643855817 --- /dev/null +++ b/test/variants/simple_quad.jsonld @@ -0,0 +1,15 @@ +[ + { + "@graph": [ + { + "@id": "http://example.org/subject", + "http://example.org/predicate": [ + { + "@id": "http://example.org/object" + } + ] + } + ], + "@id": "http://example.org/graph" + } +] diff --git a/test/variants/simple_quad.nq b/test/variants/simple_quad.nq new file mode 100644 index 000000000..47ef3d034 --- /dev/null +++ b/test/variants/simple_quad.nq @@ -0,0 +1 @@ + . diff --git a/test/variants/simple_quad.trig b/test/variants/simple_quad.trig new file mode 100644 index 000000000..04e8f8f9d --- /dev/null +++ b/test/variants/simple_quad.trig @@ -0,0 +1,5 @@ +@prefix example: . + +example:graph { + example:subject example:predicate example:object . +} diff --git a/test/variants/simple_triple-asserts.json b/test/variants/simple_triple-asserts.json index be09e348b..00fc24eb4 100644 --- a/test/variants/simple_triple-asserts.json +++ b/test/variants/simple_triple-asserts.json @@ -1,3 +1,4 @@ { - "quad_count": 1 + "quad_count": 1, + "exact_match": true } diff --git a/test/variants/simple_triple.nq b/test/variants/simple_triple.nq new file mode 100644 index 000000000..0529c7857 --- /dev/null +++ b/test/variants/simple_triple.nq @@ -0,0 +1 @@ + . diff --git a/test/variants/simple_triple.nt b/test/variants/simple_triple.nt new file mode 100644 index 000000000..0529c7857 --- /dev/null +++ b/test/variants/simple_triple.nt @@ -0,0 +1 @@ + . diff --git a/test/variants/special_chars-asserts.json b/test/variants/special_chars-asserts.json index 4a62d909f..454f6529e 100644 --- a/test/variants/special_chars-asserts.json +++ b/test/variants/special_chars-asserts.json @@ -1,3 +1,4 @@ { - "quad_count": 7 + "quad_count": 7, + "exact_match": true } diff --git a/test/variants/xml_literal-asserts.json b/test/variants/xml_literal-asserts.json index be09e348b..00fc24eb4 100644 --- a/test/variants/xml_literal-asserts.json +++ b/test/variants/xml_literal-asserts.json @@ -1,3 +1,4 @@ { - "quad_count": 1 + "quad_count": 1, + "exact_match": true }