fix: SPARQL LOAD ... INTO GRAPH handling (#2554)

`LOAD ... INTO GRAPH` stopped working correctly after the change to handling of the `publicID` `Graph.parse` parameter in RDFLib 7.0.0 (<#2406>). This is because `LOAD` evaluation relied on `publicID` to select the graph name. So after <#2406> data would be loaded into the default graph even if a named graph is specified. This change adds tests for `LOAD ... INTO GRAPH` and fixes the load evaluation. A consequence of this change is also that relative IRI lookup for graphs loaded with `LOAD ... INTO GRAPH` is now relative to the source document URI instead of the base URI of the graph being loaded into, which is more correct.
RDFLib · Aug 30, 2023 · 09354a5 · 09354a5
1 parent 077f4ac
commit 09354a5
Show file tree

Hide file tree

Showing 8 changed files with 199 additions and 8 deletions.
diff --git a/rdflib/plugins/sparql/sparql.py b/rdflib/plugins/sparql/sparql.py
@@ -311,14 +311,23 @@ def dataset(self) -> ConjunctiveGraph:
             )
         return self._dataset
 
-    def load(self, source: URIRef, default: bool = False, **kwargs: Any) -> None:
+    def load(
+        self,
+        source: URIRef,
+        default: bool = False,
+        into: Optional[Identifier] = None,
+        **kwargs: Any,
+    ) -> None:
         """
         Load data from the source into the query context's.
 
         :param source: The source to load from.
-        :param default: If `True`, triples from the source will be added to the
-            default graph, otherwise it will be loaded into a graph with
-            ``source`` URI as its name.
+        :param default: If `True`, triples from the source will be added
+            to the default graph, otherwise it will be loaded into a
+            graph with ``source`` URI as its name.
+        :param into: The name of the graph to load the data into. If
+            `None`, the source URI will be used as as the name of the
+            graph.
         :param kwargs: Keyword arguments to pass to
             :meth:`rdflib.graph.Graph.parse`.
         """
@@ -353,7 +362,9 @@ def _load(graph, source):
             if default:
                 _load(self.graph, source)
             else:
-                _load(self.dataset.get_context(source), source)
+                if into is None:
+                    into = source
+                _load(self.dataset.get_context(into), source)
 
     def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]:
         # in SPARQL BNodes are just labels

diff --git a/rdflib/plugins/sparql/update.py b/rdflib/plugins/sparql/update.py
@@ -51,7 +51,7 @@ def evalLoad(ctx: QueryContext, u: CompValue) -> None:
         assert isinstance(u.iri, URIRef)
 
     if u.graphiri:
-        ctx.load(u.iri, default=False, publicID=u.graphiri)
+        ctx.load(u.iri, default=False, into=u.graphiri)
     else:
         ctx.load(u.iri, default=True)
 

diff --git a/test/data/variants/relative_triple-meta.json b/test/data/variants/relative_triple-meta.json
@@ -0,0 +1,8 @@
+{
+  "public_id": "http://example.org/variants/relative_triples",
+  "quad_count": 1,
+  "exact_match" : true,
+  "has_subject_iris": [
+    "http://example.org/variants/path/subject"
+  ]
+}
diff --git a/test/data/variants/relative_triple.nt b/test/data/variants/relative_triple.nt
@@ -0,0 +1 @@
+<http://example.org/variants/path/subject> <http://example.org/variants/path/predicate> <http://example.org/variants/path/object> .
diff --git a/test/data/variants/relative_triple.ttl b/test/data/variants/relative_triple.ttl
@@ -0,0 +1,3 @@
+@prefix anchor: <path/> .
+
+anchor:subject anchor:predicate anchor:object .
diff --git a/test/test_sparql/test_update.py b/test/test_sparql/test_update.py
@@ -0,0 +1,92 @@
+import itertools
+import logging
+from test.data import TEST_DATA_DIR
+from test.utils import GraphHelper
+from test.utils.graph import GraphSource
+from test.utils.namespace import EGDO
+from typing import Callable
+
+import pytest
+
+from rdflib.graph import ConjunctiveGraph, Dataset, Graph
+
+
+@pytest.mark.parametrize(
+    ("graph_factory", "source"),
+    itertools.product(
+        [Graph, ConjunctiveGraph, Dataset],
+        GraphSource.from_paths(
+            TEST_DATA_DIR / "variants" / "simple_triple.ttl",
+            TEST_DATA_DIR / "variants" / "relative_triple.ttl",
+        ),
+    ),
+    ids=GraphSource.idfn,
+)
+def test_load_into_default(
+    graph_factory: Callable[[], Graph], source: GraphSource
+) -> None:
+    """
+    Evaluation of ``LOAD <source>`` into default graph works correctly.
+    """
+
+    expected_graph = graph_factory()
+    source.load(graph=expected_graph)
+
+    actual_graph = graph_factory()
+    actual_graph.update(f"LOAD <{source.public_id_or_path_uri()}>")
+
+    if logging.getLogger().isEnabledFor(logging.DEBUG):
+        debug_format = (
+            "nquads" if isinstance(expected_graph, ConjunctiveGraph) else "ntriples"
+        )
+        logging.debug(
+            "expected_graph = \n%s", expected_graph.serialize(format=debug_format)
+        )
+        logging.debug(
+            "actual_graph = \n%s", actual_graph.serialize(format=debug_format)
+        )
+
+    if isinstance(expected_graph, ConjunctiveGraph):
+        assert isinstance(actual_graph, ConjunctiveGraph)
+        GraphHelper.assert_collection_graphs_equal(expected_graph, actual_graph)
+    else:
+        GraphHelper.assert_triple_sets_equals(expected_graph, actual_graph)
+
+
+@pytest.mark.parametrize(
+    ("graph_factory", "source"),
+    itertools.product(
+        [ConjunctiveGraph, Dataset],
+        GraphSource.from_paths(
+            TEST_DATA_DIR / "variants" / "simple_triple.ttl",
+            TEST_DATA_DIR / "variants" / "relative_triple.ttl",
+        ),
+    ),
+    ids=GraphSource.idfn,
+)
+def test_load_into_named(
+    graph_factory: Callable[[], ConjunctiveGraph], source: GraphSource
+) -> None:
+    """
+    Evaluation of ``LOAD <source> INTO GRAPH <name>`` works correctly.
+    """
+
+    expected_graph = graph_factory()
+    source.load(graph=expected_graph.get_context(EGDO.graph))
+
+    actual_graph = graph_factory()
+
+    actual_graph.update(
+        f"LOAD <{source.public_id_or_path_uri()}> INTO GRAPH <{EGDO.graph}>"
+    )
+
+    if logging.getLogger().isEnabledFor(logging.DEBUG):
+        debug_format = "nquads"
+        logging.debug(
+            "expected_graph = \n%s", expected_graph.serialize(format=debug_format)
+        )
+        logging.debug(
+            "actual_graph = \n%s", actual_graph.serialize(format=debug_format)
+        )
+
+    GraphHelper.assert_collection_graphs_equal(expected_graph, actual_graph)
diff --git a/test/utils/__init__.py b/test/utils/__init__.py
@@ -19,6 +19,7 @@
     Iterable,
     List,
     Optional,
+    Sequence,
     Set,
     Tuple,
     Type,
@@ -35,7 +36,7 @@
 from rdflib import BNode, ConjunctiveGraph, Graph
 from rdflib.graph import Dataset
 from rdflib.plugin import Plugin
-from rdflib.term import Identifier, Literal, Node, URIRef
+from rdflib.term import IdentifiedNode, Identifier, Literal, Node, URIRef
 
 PluginT = TypeVar("PluginT")
 
@@ -257,6 +258,23 @@ def assert_quad_sets_equals(
         else:
             assert lhs_set != rhs_set
 
+    @classmethod
+    def assert_collection_graphs_equal(
+        cls, lhs: ConjunctiveGraph, rhs: ConjunctiveGraph
+    ) -> None:
+        """
+        Assert that all graphs in the provided collections are equal,
+        comparing named graphs with identically named graphs.
+        """
+        cls.assert_triple_sets_equals(lhs.default_context, rhs.default_context)
+        graph_names = cls.non_default_graph_names(lhs) | cls.non_default_graph_names(
+            rhs
+        )
+        for identifier in graph_names:
+            cls.assert_triple_sets_equals(
+                lhs.get_context(identifier), rhs.get_context(identifier)
+            )
+
     @classmethod
     def assert_sets_equals(
         cls,
@@ -381,6 +399,21 @@ def strip_literal_datatypes(cls, graph: Graph, datatypes: Set[URIRef]) -> None:
             if object.datatype in datatypes:
                 object._datatype = None
 
+    @classmethod
+    def non_default_graph_names(
+        cls, container: ConjunctiveGraph
+    ) -> Set[IdentifiedNode]:
+        return set(context.identifier for context in container.contexts()) - {
+            container.default_context.identifier
+        }
+
+    @classmethod
+    def non_default_graphs(cls, container: ConjunctiveGraph) -> Sequence[Graph]:
+        result = []
+        for name in cls.non_default_graph_names(container):
+            result.append(container.get_context(name))
+        return result
+
 
 def eq_(lhs, rhs, msg=None):
     """
@@ -455,4 +488,24 @@ def ensure_suffix(value: str, suffix: str) -> str:
     return value
 
 
+def idfns(*idfns: Callable[[Any], Optional[str]]) -> Callable[[Any], Optional[str]]:
+    """
+    Returns an ID function which will try each of the provided ID
+    functions in order.
+
+    :param idfns: The ID functions to try.
+    :return: An ID function which will try each of the provided ID
+        functions.
+    """
+
+    def _idfns(value: Any) -> Optional[str]:
+        for idfn in idfns:
+            result = idfn(value)
+            if result is not None:
+                return result
+        return None
+
+    return _idfns
+
+
 from test.utils.iri import file_uri_to_path  # noqa: E402
diff --git a/test/utils/graph.py b/test/utils/graph.py
@@ -1,11 +1,12 @@
 from __future__ import annotations
 
 import logging
+import test.data
 from dataclasses import dataclass
 from functools import lru_cache
 from pathlib import Path
 from runpy import run_path
-from typing import Optional, Tuple, Type, Union
+from typing import Any, Optional, Tuple, Type, Union
 
 import rdflib.util
 from rdflib.graph import Graph, _GraphT
@@ -57,6 +58,11 @@ def from_source(
             source = GraphSource.from_path(source)
         return source
 
+    def public_id_or_path_uri(self) -> str:
+        if self.public_id is not None:
+            self.public_id
+        return self.path.as_uri()
+
     def load(
         self,
         graph: Optional[_GraphT] = None,
@@ -77,6 +83,23 @@ def load(
             )
         return graph
 
+    @classmethod
+    def idfn(cls, val: Any) -> Optional[str]:
+        """
+        ID function for GraphSource objects.
+
+        :param val: The value to try to generate and identifier for.
+        :return: A string identifying the given value if the value is a
+            `GraphSource`, otherwise return `None`.
+        """
+        if isinstance(val, cls):
+            try:
+                path_string = f"{val.path.relative_to(test.data.TEST_DATA_DIR)}"
+            except ValueError:
+                path_string = f"{val.path}"
+            return f"GS({path_string}, {val.format}, {val.public_id})"
+        return None
+
 
 def load_sources(
     *sources: GraphSourceType,