Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: raise an exception when serialization format does not support quads #2430

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 28 additions & 1 deletion rdflib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
if TYPE_CHECKING:
from rdflib.graph import Graph


__all__ = [
"list2set",
"first",
Expand Down Expand Up @@ -615,3 +614,31 @@ def _iri2uri(iri: str) -> str:
uri += "#"

return uri


def _has_non_default_graphs(graph: rdflib.graph.ConjunctiveGraph) -> bool:
"""
Check if the container passed as `graph` contains graphs other than the
default graph.

The intent of this is to detect if the value passed can be serialized using
formats which do not support named graphs like N-Triples and Turtle.

Ideally this function would check if the supplied value contains any named
graphs, but RDFLib assigns a name to the default graph, so the best that can
be done is to check if the supplied graph contains any graphs other than the
default graph.

If the supplied value contains only the default graph and other graphs, this
function will return `False`, otherwise if the value passed contains at
least one graph other than the default graph it will return `True`.
"""
default_context = graph.default_context
# logging.debug("default_context.identifier = %s", default_context.identifier)
for context_index, context in enumerate(graph.contexts()):
# logging.debug("contexts[%s].identifier = %s", context_index, context.identifier)
if context.identifier != default_context.identifier:
return True
if context_index > 0:
return True
return False
File renamed without changes.
8 changes: 4 additions & 4 deletions test/data/variants/README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# multi variant graphs

This directory containts variants of the same graph encoded in different
This directory contains variants of the same graph encoded in different
formats, or differently in the same format.

The graph that a specific file is a variant of is determined by it's filename.
Files that differ only in file extention but have the same basename are
The graph that a specific file is a variant of is determined by its filename.
Files that differ only in file extensions but have the same basename are
considered variants of the same graph. Additionally, any suffix that matches
`-variant-[^/]*` is excluded when determening the graph key, so the following
`-variant-[^/]*` is excluded when determining the graph key, so the following
files are all considered variants of the same graph:

```
Expand Down
5 changes: 5 additions & 0 deletions test/data/variants/diverse_triples.nq
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<http://example.com/subject> <http://example.com/predicate> "日本語の表記体系"@jpx .
<urn:example:subject> <example:predicate> <example:subject> .
<example:object> <http://example.com/predicate> "XSD string" .
<example:subject> <example:predicate> <example:object> .
<example:subject> <example:predicate> "12"^^<http://www.w3.org/2001/XMLSchema#integer> .
1 change: 0 additions & 1 deletion test/data/variants/diverse_triples.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,4 @@ def populate_graph(graph: Graph) -> None:
graph.add((EGSCHEME.subject, EGSCHEME.predicate, EGSCHEME.object))
graph.add((EGSCHEME.subject, EGSCHEME.predicate, Literal(12)))


__all__ = ["populate_graph"]
15 changes: 15 additions & 0 deletions test/data/variants/diverse_triples.trig
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
@prefix eghttp: <http://example.com/> .
@prefix egurn: <urn:example:> .
@prefix egschema: <example:> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

{
eghttp:subject eghttp:predicate "日本語の表記体系"@jpx .

egurn:subject egschema:predicate egschema:subject .
}

egschema:object eghttp:predicate "XSD string"^^xsd:string .

egschema:subject egschema:predicate egschema:object,
12 .
52 changes: 52 additions & 0 deletions test/test_dataset/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,20 @@
import tempfile
from test.data import CONTEXT1, LIKES, PIZZA, TAREK
from test.utils.namespace import EGSCHEME
import logging
import os
import shutil
import tempfile
from typing import Optional
from rdflib.term import Identifier, Literal

import pytest

from rdflib import URIRef, plugin
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
from rdflib.store import Store
from test.utils.namespace import EGDC, EGSCHEME, EGURN
from rdflib.namespace import XSD

# Will also run SPARQLUpdateStore tests against local SPARQL1.1 endpoint if
# available. This assumes SPARQL1.1 query/update endpoints running locally at
Expand Down Expand Up @@ -261,3 +269,47 @@ def test_subgraph_without_identifier() -> None:
) == ("genid", genid_prefix)

assert f"{subgraph.identifier}".startswith(genid_prefix)


def test_updating_datatype() -> None:
dataset = Dataset()

dataset.add((EGSCHEME.subject, EGSCHEME.predicate, EGSCHEME.object))

egurn_graph = dataset.get_context(EGURN.graph)
egurn_graph.add(
(EGSCHEME.subject, EGDC.predicate, Literal("XSD string", datatype=XSD.string))
)

def find_literal_obj_ctx_id(dataset: Dataset, literal_value: str) -> Optional[Identifier]:
for quad in (quad for quad in dataset.quads((None, None, None, None))):
if isinstance(quad[2], Literal) and quad[2].value == literal_value:
logging.debug("quad = %s", quad)
return quad[3]
return None

assert find_literal_obj_ctx_id(dataset, "XSD string") == EGURN.graph

for context in dataset.contexts():
logging.debug("context.identifier = %s", context.identifier)
for triple in context:
logging.debug("triple = %s", triple)
object = triple[2]
if not isinstance(object, Literal):
continue
if object.datatype is None:
continue
logging.debug("object.datatype = %s", object.datatype)
if object.datatype == XSD.string:
object._datatype = None

assert find_literal_obj_ctx_id(dataset, "XSD string") == EGURN.graph


# found = False
# for quad in (quad for quad in dataset.quads((None, None, None, None))):
# if isinstance(quad[2], Literal) and quad[2].value == "XSD string":
# logging.debug("quad = %s", quad)
# # found = True
# # break
# # assert found is True
53 changes: 53 additions & 0 deletions test/test_serializers/test_serialize_context_aware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import itertools
import logging
from test.utils.graph import GRAPH_FORMATS, GraphType
from test.utils.variants import load_pyvariant
from typing import Dict, Iterable, Type

import pytest
from _pytest.mark.structures import ParameterSet
from _pytest.outcomes import Failed

from rdflib.graph import ConjunctiveGraph, Dataset, Graph


def make_quads_in_triples_cases() -> Iterable[ParameterSet]:
"""
Generate test cases for serializing named graphs (i.e. quads) into a format
that does not support named graphs.
"""
triple_only_formats = [
graph_format
for graph_format in GRAPH_FORMATS
if graph_format.info.graph_types == {GraphType.TRIPLE}
]
for graph_type, graph_format in itertools.product(
(ConjunctiveGraph, Dataset), triple_only_formats
):
for serializer in graph_format.info.serializers:
yield pytest.param(
graph_type, serializer, marks=pytest.mark.xfail(raises=Failed)
)


CONJUNCTIVE_GRAPH_WITH_QUADS = load_pyvariant("diverse_quads", ConjunctiveGraph)
DATASET_WITH_QUADS = load_pyvariant("diverse_quads", Dataset)

GRAPHS: Dict[Type[Graph], Graph] = {
ConjunctiveGraph: CONJUNCTIVE_GRAPH_WITH_QUADS,
Dataset: DATASET_WITH_QUADS,
}


@pytest.mark.parametrize(["graph_type", "serializer"], make_quads_in_triples_cases())
def test_quads_in_triples(graph_type: Type[ConjunctiveGraph], serializer: str) -> None:
"""
Serializing named graphs (i.e. quads) inside a `ConjunctiveGraph` into a
format that does not support named graphs should result in an exception.
"""
graph = GRAPHS[graph_type]
assert type(graph) is graph_type
with pytest.raises(Exception) as caught:
graph.serialize(format=serializer)

logging.debug("caught.value = %r", caught.value, exc_info=caught.value)
78 changes: 74 additions & 4 deletions test/test_util.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,37 @@
from __future__ import annotations

import itertools
from json import load
import logging
import time
from contextlib import ExitStack
from pathlib import Path
from test.data import TEST_DATA_DIR
from test.utils.graph import cached_graph
from test.utils.graph import cached_graph, load_sources
from test.utils.namespace import RDFT
from typing import Any, Collection, List, Optional, Set, Tuple, Type, Union
from test.utils.outcome import ExpectedOutcome, ValueOutcome
from typing import Any, Collection, Iterable, List, Optional, Set, Tuple, Type, Union

import pytest
from _pytest.mark.structures import ParameterSet

from rdflib import XSD, util
from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph
from rdflib.graph import (
ConjunctiveGraph,
Dataset,
Graph,
QuotedGraph,
_ConjunctiveGraphT,
)
from rdflib.namespace import RDF, RDFS
from rdflib.term import BNode, IdentifiedNode, Literal, Node, URIRef
from rdflib.util import _coalesce, _iri2uri, find_roots, get_tree
from rdflib.util import (
_coalesce,
_has_non_default_graphs,
_iri2uri,
find_roots,
get_tree,
)

n3source = """\
@prefix : <http://www.w3.org/2000/10/swap/Primer#>.
Expand Down Expand Up @@ -672,3 +688,57 @@ def test_iri2uri(iri: str, expected_result: Union[Set[str], Type[Exception]]) ->
else:
assert isinstance(expected_result, set)
assert result in expected_result


def make_has_non_default_graphs_cases() -> Iterable[ParameterSet]:
with_named_graphs = [
TEST_DATA_DIR / "variants" / "diverse_quads.trig",
TEST_DATA_DIR / "variants" / "diverse_quads.py",
TEST_DATA_DIR / "variants" / "simple_quad.nq",
TEST_DATA_DIR / "variants" / "simple_quad.py",
]
without_named_graphs = [
TEST_DATA_DIR / "variants" / "diverse_triples.ttl",
TEST_DATA_DIR / "variants" / "diverse_triples.py",
TEST_DATA_DIR / "variants" / "simple_triple.nt",
TEST_DATA_DIR / "variants" / "simple_triple.trig",
TEST_DATA_DIR / "variants" / "simple_triple.py",
]

for graph_path, graph_type in itertools.product(
with_named_graphs, (ConjunctiveGraph, Dataset)
):
yield pytest.param(
graph_path,
graph_type,
ValueOutcome(True),
id=f"{graph_path.relative_to(TEST_DATA_DIR)}-{graph_type.__name__}-True",
)

for graph_path, graph_type in itertools.product(
without_named_graphs, (ConjunctiveGraph, Dataset)
):
marks: Collection[pytest.MarkDecorator] = tuple()
# if not graph_path.name.endswith(".py"):
# marks = (pytest.mark.xfail(reason="Triples don't get loaded into the default graph."),)

yield pytest.param(
graph_path,
graph_type,
ValueOutcome(False),
id=f"{graph_path.relative_to(TEST_DATA_DIR)}-{graph_type.__name__}-False",
marks=marks,
)


@pytest.mark.parametrize(
["source", "graph_type", "expected_outcome"], make_has_non_default_graphs_cases()
)
def test_has_non_default_graphs(
source: Path,
graph_type: Type[_ConjunctiveGraphT],
expected_outcome: ExpectedOutcome[bool],
) -> None:
with expected_outcome.check_raises():
graph = load_sources(source, graph_type=graph_type)
expected_outcome.check_value(_has_non_default_graphs(graph))
34 changes: 34 additions & 0 deletions test/utils/variants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
Functionality for interacting with graph variants in `test/data/variants`.
"""

from functools import lru_cache
from importlib import import_module
from typing import Type

from rdflib.graph import Graph, _GraphT


def parse_pyvariant(variant_name: str, target: Graph) -> None:
"""
Parse the graph variant with the given name into the target graph.

:param variant_name: the name of the graph variant to parse
:param target: the graph to parse the variant into
"""
module_name = f"test.data.variants.{variant_name}"
module = import_module(module_name)
module.populate_graph(target)


@lru_cache(maxsize=None)
def load_pyvariant(variant_name: str, graph_type: Type[_GraphT]) -> _GraphT:
"""
Load the graph variant with the given name.

:param variant_name: the name of the graph variant to load
:return: the loaded graph variant
"""
target = graph_type()
parse_pyvariant(variant_name, target)
return target