Skip to content

Commit

Permalink
fix: SPARQL LOAD ... INTO GRAPH handling (#2554)
Browse files Browse the repository at this point in the history
`LOAD ... INTO GRAPH` stopped working correctly after the change to
handling of the `publicID` `Graph.parse` parameter in RDFLib 7.0.0
(<#2406>).

This is because `LOAD` evaluation relied on `publicID` to select the
graph name. So after <#2406> data
would be loaded into the default graph even if a named graph is
specified.

This change adds tests for `LOAD ... INTO GRAPH` and fixes the load
evaluation.

A consequence of this change is also that relative IRI lookup for graphs
loaded with `LOAD ... INTO GRAPH` is now relative to the source document
URI instead of the base URI of the graph being loaded into, which is
more correct.
  • Loading branch information
aucampia committed Aug 30, 2023
1 parent 077f4ac commit 09354a5
Show file tree
Hide file tree
Showing 8 changed files with 199 additions and 8 deletions.
21 changes: 16 additions & 5 deletions rdflib/plugins/sparql/sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,14 +311,23 @@ def dataset(self) -> ConjunctiveGraph:
)
return self._dataset

def load(self, source: URIRef, default: bool = False, **kwargs: Any) -> None:
def load(
self,
source: URIRef,
default: bool = False,
into: Optional[Identifier] = None,
**kwargs: Any,
) -> None:
"""
Load data from the source into the query context's.
:param source: The source to load from.
:param default: If `True`, triples from the source will be added to the
default graph, otherwise it will be loaded into a graph with
``source`` URI as its name.
:param default: If `True`, triples from the source will be added
to the default graph, otherwise it will be loaded into a
graph with ``source`` URI as its name.
:param into: The name of the graph to load the data into. If
`None`, the source URI will be used as as the name of the
graph.
:param kwargs: Keyword arguments to pass to
:meth:`rdflib.graph.Graph.parse`.
"""
Expand Down Expand Up @@ -353,7 +362,9 @@ def _load(graph, source):
if default:
_load(self.graph, source)
else:
_load(self.dataset.get_context(source), source)
if into is None:
into = source
_load(self.dataset.get_context(into), source)

def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]:
# in SPARQL BNodes are just labels
Expand Down
2 changes: 1 addition & 1 deletion rdflib/plugins/sparql/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def evalLoad(ctx: QueryContext, u: CompValue) -> None:
assert isinstance(u.iri, URIRef)

if u.graphiri:
ctx.load(u.iri, default=False, publicID=u.graphiri)
ctx.load(u.iri, default=False, into=u.graphiri)
else:
ctx.load(u.iri, default=True)

Expand Down
8 changes: 8 additions & 0 deletions test/data/variants/relative_triple-meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"public_id": "http://example.org/variants/relative_triples",
"quad_count": 1,
"exact_match" : true,
"has_subject_iris": [
"http://example.org/variants/path/subject"
]
}
1 change: 1 addition & 0 deletions test/data/variants/relative_triple.nt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<http://example.org/variants/path/subject> <http://example.org/variants/path/predicate> <http://example.org/variants/path/object> .
3 changes: 3 additions & 0 deletions test/data/variants/relative_triple.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
@prefix anchor: <path/> .

anchor:subject anchor:predicate anchor:object .
92 changes: 92 additions & 0 deletions test/test_sparql/test_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import itertools
import logging
from test.data import TEST_DATA_DIR
from test.utils import GraphHelper
from test.utils.graph import GraphSource
from test.utils.namespace import EGDO
from typing import Callable

import pytest

from rdflib.graph import ConjunctiveGraph, Dataset, Graph


@pytest.mark.parametrize(
("graph_factory", "source"),
itertools.product(
[Graph, ConjunctiveGraph, Dataset],
GraphSource.from_paths(
TEST_DATA_DIR / "variants" / "simple_triple.ttl",
TEST_DATA_DIR / "variants" / "relative_triple.ttl",
),
),
ids=GraphSource.idfn,
)
def test_load_into_default(
graph_factory: Callable[[], Graph], source: GraphSource
) -> None:
"""
Evaluation of ``LOAD <source>`` into default graph works correctly.
"""

expected_graph = graph_factory()
source.load(graph=expected_graph)

actual_graph = graph_factory()
actual_graph.update(f"LOAD <{source.public_id_or_path_uri()}>")

if logging.getLogger().isEnabledFor(logging.DEBUG):
debug_format = (
"nquads" if isinstance(expected_graph, ConjunctiveGraph) else "ntriples"
)
logging.debug(
"expected_graph = \n%s", expected_graph.serialize(format=debug_format)
)
logging.debug(
"actual_graph = \n%s", actual_graph.serialize(format=debug_format)
)

if isinstance(expected_graph, ConjunctiveGraph):
assert isinstance(actual_graph, ConjunctiveGraph)
GraphHelper.assert_collection_graphs_equal(expected_graph, actual_graph)
else:
GraphHelper.assert_triple_sets_equals(expected_graph, actual_graph)


@pytest.mark.parametrize(
("graph_factory", "source"),
itertools.product(
[ConjunctiveGraph, Dataset],
GraphSource.from_paths(
TEST_DATA_DIR / "variants" / "simple_triple.ttl",
TEST_DATA_DIR / "variants" / "relative_triple.ttl",
),
),
ids=GraphSource.idfn,
)
def test_load_into_named(
graph_factory: Callable[[], ConjunctiveGraph], source: GraphSource
) -> None:
"""
Evaluation of ``LOAD <source> INTO GRAPH <name>`` works correctly.
"""

expected_graph = graph_factory()
source.load(graph=expected_graph.get_context(EGDO.graph))

actual_graph = graph_factory()

actual_graph.update(
f"LOAD <{source.public_id_or_path_uri()}> INTO GRAPH <{EGDO.graph}>"
)

if logging.getLogger().isEnabledFor(logging.DEBUG):
debug_format = "nquads"
logging.debug(
"expected_graph = \n%s", expected_graph.serialize(format=debug_format)
)
logging.debug(
"actual_graph = \n%s", actual_graph.serialize(format=debug_format)
)

GraphHelper.assert_collection_graphs_equal(expected_graph, actual_graph)
55 changes: 54 additions & 1 deletion test/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Iterable,
List,
Optional,
Sequence,
Set,
Tuple,
Type,
Expand All @@ -35,7 +36,7 @@
from rdflib import BNode, ConjunctiveGraph, Graph
from rdflib.graph import Dataset
from rdflib.plugin import Plugin
from rdflib.term import Identifier, Literal, Node, URIRef
from rdflib.term import IdentifiedNode, Identifier, Literal, Node, URIRef

PluginT = TypeVar("PluginT")

Expand Down Expand Up @@ -257,6 +258,23 @@ def assert_quad_sets_equals(
else:
assert lhs_set != rhs_set

@classmethod
def assert_collection_graphs_equal(
cls, lhs: ConjunctiveGraph, rhs: ConjunctiveGraph
) -> None:
"""
Assert that all graphs in the provided collections are equal,
comparing named graphs with identically named graphs.
"""
cls.assert_triple_sets_equals(lhs.default_context, rhs.default_context)
graph_names = cls.non_default_graph_names(lhs) | cls.non_default_graph_names(
rhs
)
for identifier in graph_names:
cls.assert_triple_sets_equals(
lhs.get_context(identifier), rhs.get_context(identifier)
)

@classmethod
def assert_sets_equals(
cls,
Expand Down Expand Up @@ -381,6 +399,21 @@ def strip_literal_datatypes(cls, graph: Graph, datatypes: Set[URIRef]) -> None:
if object.datatype in datatypes:
object._datatype = None

@classmethod
def non_default_graph_names(
cls, container: ConjunctiveGraph
) -> Set[IdentifiedNode]:
return set(context.identifier for context in container.contexts()) - {
container.default_context.identifier
}

@classmethod
def non_default_graphs(cls, container: ConjunctiveGraph) -> Sequence[Graph]:
result = []
for name in cls.non_default_graph_names(container):
result.append(container.get_context(name))
return result


def eq_(lhs, rhs, msg=None):
"""
Expand Down Expand Up @@ -455,4 +488,24 @@ def ensure_suffix(value: str, suffix: str) -> str:
return value


def idfns(*idfns: Callable[[Any], Optional[str]]) -> Callable[[Any], Optional[str]]:
"""
Returns an ID function which will try each of the provided ID
functions in order.
:param idfns: The ID functions to try.
:return: An ID function which will try each of the provided ID
functions.
"""

def _idfns(value: Any) -> Optional[str]:
for idfn in idfns:
result = idfn(value)
if result is not None:
return result
return None

return _idfns


from test.utils.iri import file_uri_to_path # noqa: E402
25 changes: 24 additions & 1 deletion test/utils/graph.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import annotations

import logging
import test.data
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from runpy import run_path
from typing import Optional, Tuple, Type, Union
from typing import Any, Optional, Tuple, Type, Union

import rdflib.util
from rdflib.graph import Graph, _GraphT
Expand Down Expand Up @@ -57,6 +58,11 @@ def from_source(
source = GraphSource.from_path(source)
return source

def public_id_or_path_uri(self) -> str:
if self.public_id is not None:
self.public_id
return self.path.as_uri()

def load(
self,
graph: Optional[_GraphT] = None,
Expand All @@ -77,6 +83,23 @@ def load(
)
return graph

@classmethod
def idfn(cls, val: Any) -> Optional[str]:
"""
ID function for GraphSource objects.
:param val: The value to try to generate and identifier for.
:return: A string identifying the given value if the value is a
`GraphSource`, otherwise return `None`.
"""
if isinstance(val, cls):
try:
path_string = f"{val.path.relative_to(test.data.TEST_DATA_DIR)}"
except ValueError:
path_string = f"{val.path}"
return f"GS({path_string}, {val.format}, {val.public_id})"
return None


def load_sources(
*sources: GraphSourceType,
Expand Down

0 comments on commit 09354a5

Please sign in to comment.