diff --git a/rdflib/util.py b/rdflib/util.py index 93954b4cb..3a8858717 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -53,7 +53,6 @@ if TYPE_CHECKING: from rdflib.graph import Graph - __all__ = [ "list2set", "first", @@ -615,3 +614,31 @@ def _iri2uri(iri: str) -> str: uri += "#" return uri + + +def _has_non_default_graphs(graph: rdflib.graph.ConjunctiveGraph) -> bool: + """ + Check if the container passed as `graph` contains graphs other than the + default graph. + + The intent of this is to detect if the value passed can be serialized using + formats which do not support named graphs like N-Triples and Turtle. + + Ideally this function would check if the supplied value contains any named + graphs, but RDFLib assigns a name to the default graph, so the best that can + be done is to check if the supplied graph contains any graphs other than the + default graph. + + If the supplied value contains only the default graph and other graphs, this + function will return `False`, otherwise if the value passed contains at + least one graph other than the default graph it will return `True`. + """ + default_context = graph.default_context + # logging.debug("default_context.identifier = %s", default_context.identifier) + for context_index, context in enumerate(graph.contexts()): + # logging.debug("contexts[%s].identifier = %s", context_index, context.identifier) + if context.identifier != default_context.identifier: + return True + if context_index > 0: + return True + return False diff --git a/test/data.py b/test/data/__init__.py similarity index 100% rename from test/data.py rename to test/data/__init__.py diff --git a/test/data/variants/README.md b/test/data/variants/README.md index 28eea67db..dd37c6135 100644 --- a/test/data/variants/README.md +++ b/test/data/variants/README.md @@ -1,12 +1,12 @@ # multi variant graphs -This directory containts variants of the same graph encoded in different +This directory contains variants of the same graph encoded in different formats, or differently in the same format. -The graph that a specific file is a variant of is determined by it's filename. -Files that differ only in file extention but have the same basename are +The graph that a specific file is a variant of is determined by its filename. +Files that differ only in file extensions but have the same basename are considered variants of the same graph. Additionally, any suffix that matches -`-variant-[^/]*` is excluded when determening the graph key, so the following +`-variant-[^/]*` is excluded when determining the graph key, so the following files are all considered variants of the same graph: ``` diff --git a/test/data/variants/diverse_triples.nq b/test/data/variants/diverse_triples.nq new file mode 100644 index 000000000..3b1129077 --- /dev/null +++ b/test/data/variants/diverse_triples.nq @@ -0,0 +1,5 @@ + "日本語の表記体系"@jpx . + . + "XSD string" . + . + "12"^^ . diff --git a/test/data/variants/diverse_triples.py b/test/data/variants/diverse_triples.py index 3167a78a6..48d18e0e3 100644 --- a/test/data/variants/diverse_triples.py +++ b/test/data/variants/diverse_triples.py @@ -14,5 +14,4 @@ def populate_graph(graph: Graph) -> None: graph.add((EGSCHEME.subject, EGSCHEME.predicate, EGSCHEME.object)) graph.add((EGSCHEME.subject, EGSCHEME.predicate, Literal(12))) - __all__ = ["populate_graph"] diff --git a/test/data/variants/diverse_triples.trig b/test/data/variants/diverse_triples.trig new file mode 100644 index 000000000..832794540 --- /dev/null +++ b/test/data/variants/diverse_triples.trig @@ -0,0 +1,15 @@ +@prefix eghttp: . +@prefix egurn: . +@prefix egschema: . +@prefix xsd: . + +{ + eghttp:subject eghttp:predicate "日本語の表記体系"@jpx . + + egurn:subject egschema:predicate egschema:subject . +} + +egschema:object eghttp:predicate "XSD string"^^xsd:string . + +egschema:subject egschema:predicate egschema:object, + 12 . diff --git a/test/test_dataset/test_dataset.py b/test/test_dataset/test_dataset.py index a6e005c08..95d469ec0 100644 --- a/test/test_dataset/test_dataset.py +++ b/test/test_dataset/test_dataset.py @@ -3,12 +3,20 @@ import tempfile from test.data import CONTEXT1, LIKES, PIZZA, TAREK from test.utils.namespace import EGSCHEME +import logging +import os +import shutil +import tempfile +from typing import Optional +from rdflib.term import Identifier, Literal import pytest from rdflib import URIRef, plugin from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph from rdflib.store import Store +from test.utils.namespace import EGDC, EGSCHEME, EGURN +from rdflib.namespace import XSD # Will also run SPARQLUpdateStore tests against local SPARQL1.1 endpoint if # available. This assumes SPARQL1.1 query/update endpoints running locally at @@ -261,3 +269,47 @@ def test_subgraph_without_identifier() -> None: ) == ("genid", genid_prefix) assert f"{subgraph.identifier}".startswith(genid_prefix) + + +def test_updating_datatype() -> None: + dataset = Dataset() + + dataset.add((EGSCHEME.subject, EGSCHEME.predicate, EGSCHEME.object)) + + egurn_graph = dataset.get_context(EGURN.graph) + egurn_graph.add( + (EGSCHEME.subject, EGDC.predicate, Literal("XSD string", datatype=XSD.string)) + ) + + def find_literal_obj_ctx_id(dataset: Dataset, literal_value: str) -> Optional[Identifier]: + for quad in (quad for quad in dataset.quads((None, None, None, None))): + if isinstance(quad[2], Literal) and quad[2].value == literal_value: + logging.debug("quad = %s", quad) + return quad[3] + return None + + assert find_literal_obj_ctx_id(dataset, "XSD string") == EGURN.graph + + for context in dataset.contexts(): + logging.debug("context.identifier = %s", context.identifier) + for triple in context: + logging.debug("triple = %s", triple) + object = triple[2] + if not isinstance(object, Literal): + continue + if object.datatype is None: + continue + logging.debug("object.datatype = %s", object.datatype) + if object.datatype == XSD.string: + object._datatype = None + + assert find_literal_obj_ctx_id(dataset, "XSD string") == EGURN.graph + + + # found = False + # for quad in (quad for quad in dataset.quads((None, None, None, None))): + # if isinstance(quad[2], Literal) and quad[2].value == "XSD string": + # logging.debug("quad = %s", quad) + # # found = True + # # break + # # assert found is True diff --git a/test/test_serializers/test_serialize_context_aware.py b/test/test_serializers/test_serialize_context_aware.py new file mode 100644 index 000000000..9e2539ff1 --- /dev/null +++ b/test/test_serializers/test_serialize_context_aware.py @@ -0,0 +1,53 @@ +import itertools +import logging +from test.utils.graph import GRAPH_FORMATS, GraphType +from test.utils.variants import load_pyvariant +from typing import Dict, Iterable, Type + +import pytest +from _pytest.mark.structures import ParameterSet +from _pytest.outcomes import Failed + +from rdflib.graph import ConjunctiveGraph, Dataset, Graph + + +def make_quads_in_triples_cases() -> Iterable[ParameterSet]: + """ + Generate test cases for serializing named graphs (i.e. quads) into a format + that does not support named graphs. + """ + triple_only_formats = [ + graph_format + for graph_format in GRAPH_FORMATS + if graph_format.info.graph_types == {GraphType.TRIPLE} + ] + for graph_type, graph_format in itertools.product( + (ConjunctiveGraph, Dataset), triple_only_formats + ): + for serializer in graph_format.info.serializers: + yield pytest.param( + graph_type, serializer, marks=pytest.mark.xfail(raises=Failed) + ) + + +CONJUNCTIVE_GRAPH_WITH_QUADS = load_pyvariant("diverse_quads", ConjunctiveGraph) +DATASET_WITH_QUADS = load_pyvariant("diverse_quads", Dataset) + +GRAPHS: Dict[Type[Graph], Graph] = { + ConjunctiveGraph: CONJUNCTIVE_GRAPH_WITH_QUADS, + Dataset: DATASET_WITH_QUADS, +} + + +@pytest.mark.parametrize(["graph_type", "serializer"], make_quads_in_triples_cases()) +def test_quads_in_triples(graph_type: Type[ConjunctiveGraph], serializer: str) -> None: + """ + Serializing named graphs (i.e. quads) inside a `ConjunctiveGraph` into a + format that does not support named graphs should result in an exception. + """ + graph = GRAPHS[graph_type] + assert type(graph) is graph_type + with pytest.raises(Exception) as caught: + graph.serialize(format=serializer) + + logging.debug("caught.value = %r", caught.value, exc_info=caught.value) diff --git a/test/test_util.py b/test/test_util.py index 7ed486f96..e8e744f33 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,21 +1,37 @@ from __future__ import annotations +import itertools +from json import load import logging import time from contextlib import ExitStack from pathlib import Path from test.data import TEST_DATA_DIR -from test.utils.graph import cached_graph +from test.utils.graph import cached_graph, load_sources from test.utils.namespace import RDFT -from typing import Any, Collection, List, Optional, Set, Tuple, Type, Union +from test.utils.outcome import ExpectedOutcome, ValueOutcome +from typing import Any, Collection, Iterable, List, Optional, Set, Tuple, Type, Union import pytest +from _pytest.mark.structures import ParameterSet from rdflib import XSD, util -from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph +from rdflib.graph import ( + ConjunctiveGraph, + Dataset, + Graph, + QuotedGraph, + _ConjunctiveGraphT, +) from rdflib.namespace import RDF, RDFS from rdflib.term import BNode, IdentifiedNode, Literal, Node, URIRef -from rdflib.util import _coalesce, _iri2uri, find_roots, get_tree +from rdflib.util import ( + _coalesce, + _has_non_default_graphs, + _iri2uri, + find_roots, + get_tree, +) n3source = """\ @prefix : . @@ -672,3 +688,57 @@ def test_iri2uri(iri: str, expected_result: Union[Set[str], Type[Exception]]) -> else: assert isinstance(expected_result, set) assert result in expected_result + + +def make_has_non_default_graphs_cases() -> Iterable[ParameterSet]: + with_named_graphs = [ + TEST_DATA_DIR / "variants" / "diverse_quads.trig", + TEST_DATA_DIR / "variants" / "diverse_quads.py", + TEST_DATA_DIR / "variants" / "simple_quad.nq", + TEST_DATA_DIR / "variants" / "simple_quad.py", + ] + without_named_graphs = [ + TEST_DATA_DIR / "variants" / "diverse_triples.ttl", + TEST_DATA_DIR / "variants" / "diverse_triples.py", + TEST_DATA_DIR / "variants" / "simple_triple.nt", + TEST_DATA_DIR / "variants" / "simple_triple.trig", + TEST_DATA_DIR / "variants" / "simple_triple.py", + ] + + for graph_path, graph_type in itertools.product( + with_named_graphs, (ConjunctiveGraph, Dataset) + ): + yield pytest.param( + graph_path, + graph_type, + ValueOutcome(True), + id=f"{graph_path.relative_to(TEST_DATA_DIR)}-{graph_type.__name__}-True", + ) + + for graph_path, graph_type in itertools.product( + without_named_graphs, (ConjunctiveGraph, Dataset) + ): + marks: Collection[pytest.MarkDecorator] = tuple() + # if not graph_path.name.endswith(".py"): + # marks = (pytest.mark.xfail(reason="Triples don't get loaded into the default graph."),) + + yield pytest.param( + graph_path, + graph_type, + ValueOutcome(False), + id=f"{graph_path.relative_to(TEST_DATA_DIR)}-{graph_type.__name__}-False", + marks=marks, + ) + + +@pytest.mark.parametrize( + ["source", "graph_type", "expected_outcome"], make_has_non_default_graphs_cases() +) +def test_has_non_default_graphs( + source: Path, + graph_type: Type[_ConjunctiveGraphT], + expected_outcome: ExpectedOutcome[bool], +) -> None: + with expected_outcome.check_raises(): + graph = load_sources(source, graph_type=graph_type) + expected_outcome.check_value(_has_non_default_graphs(graph)) diff --git a/test/utils/variants.py b/test/utils/variants.py new file mode 100644 index 000000000..d7ad1abb8 --- /dev/null +++ b/test/utils/variants.py @@ -0,0 +1,34 @@ +""" +Functionality for interacting with graph variants in `test/data/variants`. +""" + +from functools import lru_cache +from importlib import import_module +from typing import Type + +from rdflib.graph import Graph, _GraphT + + +def parse_pyvariant(variant_name: str, target: Graph) -> None: + """ + Parse the graph variant with the given name into the target graph. + + :param variant_name: the name of the graph variant to parse + :param target: the graph to parse the variant into + """ + module_name = f"test.data.variants.{variant_name}" + module = import_module(module_name) + module.populate_graph(target) + + +@lru_cache(maxsize=None) +def load_pyvariant(variant_name: str, graph_type: Type[_GraphT]) -> _GraphT: + """ + Load the graph variant with the given name. + + :param variant_name: the name of the graph variant to load + :return: the loaded graph variant + """ + target = graph_type() + parse_pyvariant(variant_name, target) + return target