From 4f96f468451bac36e7f606d238376b4025dd6897 Mon Sep 17 00:00:00 2001 From: Iwan Aucamp Date: Thu, 23 Jun 2022 21:27:54 +0200 Subject: [PATCH] Fix SPARQL CSV result serialization of blank nodes (#1979) This patch fixes SPARQL CSV result serialization of blank nodes. Previously blank nodes were serialized the same as normal nodes, but blank nodes should be prefixed with `_:` as per : > Blank nodes use the `_:label` form from Turtle and SPARQL. > Use of the same label indicates the same blank node within > the results but has no significance outside the results. --- rdflib/plugins/sparql/results/csvresults.py | 2 + test/test_sparql/test_result.py | 41 ++++++++++++++++++++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/rdflib/plugins/sparql/results/csvresults.py b/rdflib/plugins/sparql/results/csvresults.py index 9138cba6a..16273cbcd 100644 --- a/rdflib/plugins/sparql/results/csvresults.py +++ b/rdflib/plugins/sparql/results/csvresults.py @@ -83,5 +83,7 @@ def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs): def serializeTerm(self, term, encoding): if term is None: return "" + elif isinstance(term, BNode): + return f"_:{term}" else: return term diff --git a/test/test_sparql/test_result.py b/test/test_sparql/test_result.py index a861d91b1..0b301e6ee 100644 --- a/test/test_sparql/test_result.py +++ b/test/test_sparql/test_result.py @@ -16,6 +16,7 @@ Iterator, Mapping, Optional, + Pattern, Sequence, Set, TextIO, @@ -29,8 +30,9 @@ from pyparsing import ParseException from rdflib.graph import Graph +from rdflib.namespace import Namespace from rdflib.query import Result, ResultRow -from rdflib.term import Identifier, Literal, Node, Variable +from rdflib.term import BNode, Identifier, Literal, Node, Variable BindingsType = Sequence[Mapping[Variable, Identifier]] ParseOutcomeType = Union[BindingsType, Type[Exception]] @@ -75,6 +77,43 @@ def test_select_result_parse( assert parse_outcome == parsed_result.bindings +EGSCHEME = Namespace("example:") + + +@pytest.mark.parametrize( + ("node", "format", "expected_result"), + [ + (BNode(), "csv", re.compile(r"^_:.*$")), + (BNode("a"), "csv", "_:a"), + (Literal("x11"), "csv", "x11"), + ], +) +def test_xsv_serialize( + node: Identifier, format: str, expected_result: Union[Pattern[str], str] +) -> None: + graph = Graph() + graph.add((EGSCHEME.checkSubject, EGSCHEME.checkPredicate, node)) + result = graph.query( + f""" + PREFIX egscheme: <{EGSCHEME}> + SELECT ?o {{ + egscheme:checkSubject egscheme:checkPredicate ?o + }} + """ + ) + assert len(result.bindings) == 1 + with BytesIO() as bio: + result.serialize(bio, format=format) + result_text = bio.getvalue().decode("utf-8") + result_lines = result_text.splitlines() + assert len(result_lines) == 2 + logging.debug("result_lines[1] = %r", result_lines[1]) + if isinstance(expected_result, str): + assert expected_result == result_lines[1] + else: + assert expected_result.match(result_lines[1]) + + @pytest.fixture(scope="module") def select_result(rdfs_graph: Graph) -> Result: query = """