Fix SPARQL CSV result serialization of blank nodes (#1979)

This patch fixes SPARQL CSV result serialization of blank nodes. Previously blank nodes were serialized the same as normal nodes, but blank nodes should be prefixed with `_:` as per <https://www.w3.org/TR/sparql11-results-csv-tsv/#csv-terms>: > Blank nodes use the `_:label` form from Turtle and SPARQL. > Use of the same label indicates the same blank node within > the results but has no significance outside the results.
RDFLib · Jun 23, 2022 · 4f96f46 · 4f96f46
1 parent 0198c0d
commit 4f96f46
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 1 deletion.
diff --git a/rdflib/plugins/sparql/results/csvresults.py b/rdflib/plugins/sparql/results/csvresults.py
@@ -83,5 +83,7 @@ def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs):
     def serializeTerm(self, term, encoding):
         if term is None:
             return ""
+        elif isinstance(term, BNode):
+            return f"_:{term}"
         else:
             return term
diff --git a/test/test_sparql/test_result.py b/test/test_sparql/test_result.py
@@ -16,6 +16,7 @@
     Iterator,
     Mapping,
     Optional,
+    Pattern,
     Sequence,
     Set,
     TextIO,
@@ -29,8 +30,9 @@
 from pyparsing import ParseException
 
 from rdflib.graph import Graph
+from rdflib.namespace import Namespace
 from rdflib.query import Result, ResultRow
-from rdflib.term import Identifier, Literal, Node, Variable
+from rdflib.term import BNode, Identifier, Literal, Node, Variable
 
 BindingsType = Sequence[Mapping[Variable, Identifier]]
 ParseOutcomeType = Union[BindingsType, Type[Exception]]
@@ -75,6 +77,43 @@ def test_select_result_parse(
         assert parse_outcome == parsed_result.bindings
 
 
+EGSCHEME = Namespace("example:")
+
+
+@pytest.mark.parametrize(
+    ("node", "format", "expected_result"),
+    [
+        (BNode(), "csv", re.compile(r"^_:.*$")),
+        (BNode("a"), "csv", "_:a"),
+        (Literal("x11"), "csv", "x11"),
+    ],
+)
+def test_xsv_serialize(
+    node: Identifier, format: str, expected_result: Union[Pattern[str], str]
+) -> None:
+    graph = Graph()
+    graph.add((EGSCHEME.checkSubject, EGSCHEME.checkPredicate, node))
+    result = graph.query(
+        f"""
+    PREFIX egscheme: <{EGSCHEME}>
+    SELECT ?o {{
+        egscheme:checkSubject egscheme:checkPredicate ?o
+    }}
+    """
+    )
+    assert len(result.bindings) == 1
+    with BytesIO() as bio:
+        result.serialize(bio, format=format)
+        result_text = bio.getvalue().decode("utf-8")
+    result_lines = result_text.splitlines()
+    assert len(result_lines) == 2
+    logging.debug("result_lines[1] = %r", result_lines[1])
+    if isinstance(expected_result, str):
+        assert expected_result == result_lines[1]
+    else:
+        assert expected_result.match(result_lines[1])
+
+
 @pytest.fixture(scope="module")
 def select_result(rdfs_graph: Graph) -> Result:
     query = """