Skip to content

Commit

Permalink
Add more tests for graph_diff
Browse files Browse the repository at this point in the history
This is to confirm that `graph_diff` does not work with quads.

Other changes:
- Added support to GraphHelper for collapsing BNodes to a specifc IRI
  when coverting to triple sets, this is in addition to the current
  functionality for ignoring bnodes. This makes it possible to more
  accurately compare Graphs that contain blank nodes.
- Added `assert_cgraph_isomorphic` to GraphHelper.
  This method adds some rudementary support for checking if conjunctive
  graphs are isomorphic, it will ignore name graphs that have blank
  nodes as name.
  • Loading branch information
aucampia committed Jun 5, 2022
1 parent a3e6f67 commit 1394f60
Show file tree
Hide file tree
Showing 5 changed files with 488 additions and 85 deletions.
174 changes: 168 additions & 6 deletions test/test_graph/test_diff.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
from test.utils import GraphHelper
from typing import TYPE_CHECKING, Set
from dataclasses import dataclass, field
from test.utils import (
COLLAPSED_BNODE,
BNodeHandling,
GHQuad,
GHTriple,
GraphHelper,
MarksType,
MarkType,
)
from typing import TYPE_CHECKING, Collection, Set, Tuple, Type, Union, cast

import pytest
from _pytest.mark.structures import ParameterSet

import rdflib
from rdflib import Graph
from rdflib.compare import graph_diff
from rdflib.namespace import FOAF, RDF
from rdflib.graph import ConjunctiveGraph, Dataset
from rdflib.namespace import FOAF, RDF, Namespace
from rdflib.term import BNode, Literal

if TYPE_CHECKING:
Expand All @@ -15,7 +26,7 @@
"""Test for graph_diff - much more extensive testing
would certainly be possible"""

_TripleSetT = Set["_TripleType"]
_TripleSetType = Set["_TripleType"]


class TestDiff:
Expand Down Expand Up @@ -48,7 +59,7 @@ def test_subsets(self) -> None:
triples only in `g0`, and that there are triples that occur in both
`g0` and `g1`, and that there are triples only in `g1`.
"""
g0_ts: _TripleSetT = set()
g0_ts: _TripleSetType = set()
bnode = BNode()
g0_ts.update(
{
Expand All @@ -59,7 +70,7 @@ def test_subsets(self) -> None:
g0 = Graph()
g0 += g0_ts

g1_ts: _TripleSetT = set()
g1_ts: _TripleSetType = set()
bnode = BNode()
g1_ts.update(
{
Expand All @@ -76,3 +87,154 @@ def test_subsets(self) -> None:
assert in_first == set()
assert len(in_second) > 0
assert len(in_both) > 0


_ElementSetType = Union[Collection[GHTriple], Collection[GHQuad]]

_ElementSetTypeOrStr = Union[_ElementSetType, str]


@dataclass
class GraphDiffCase:
graph_type: Type[Graph]
format: str
lhs: str
rhs: str
expected_result: Tuple[
_ElementSetTypeOrStr, _ElementSetTypeOrStr, _ElementSetTypeOrStr
]
marks: MarkType = field(default_factory=lambda: cast(MarksType, list()))

def as_element_set(self, value: _ElementSetTypeOrStr) -> _ElementSetType:
if isinstance(value, str):
graph = self.graph_type()
graph.parse(data=value, format=self.format)
if isinstance(graph, ConjunctiveGraph):
return GraphHelper.quad_set(graph, BNodeHandling.COLLAPSE)
else:
return GraphHelper.triple_set(graph, BNodeHandling.COLLAPSE)
return value

def expected_in_both_set(self) -> _ElementSetType:
return self.as_element_set(self.expected_result[0])

def expected_in_lhs_set(self) -> _ElementSetType:
return self.as_element_set(self.expected_result[1])

def expected_in_rhs_set(self) -> _ElementSetType:
return self.as_element_set(self.expected_result[2])

def as_params(self) -> ParameterSet:
return pytest.param(self, marks=self.marks)


EGSCHEME = Namespace("example:")


@pytest.mark.parametrize(
"test_case",
[
GraphDiffCase(
Graph,
format="turtle",
lhs="""
@prefix eg: <example:> .
_:a _:b _:c .
eg:o0 eg:p0 eg:s0 .
eg:o1 eg:p1 eg:s1 .
""",
rhs="""
@prefix eg: <example:> .
eg:o0 eg:p0 eg:s0 .
eg:o1 eg:p1 eg:s1 .
""",
expected_result=(
"""
@prefix eg: <example:> .
eg:o0 eg:p0 eg:s0 .
eg:o1 eg:p1 eg:s1 .
""",
{(COLLAPSED_BNODE, COLLAPSED_BNODE, COLLAPSED_BNODE)},
"",
),
),
GraphDiffCase(
Graph,
format="turtle",
lhs="""
@prefix eg: <example:> .
eg:o0 eg:p0 eg:s0 .
eg:o1 eg:p1 eg:s1 .
""",
rhs="""
@prefix eg: <example:> .
eg:o0 eg:p0 eg:s0 .
eg:o1 eg:p1 eg:s1 .
""",
expected_result=(
"""
@prefix eg: <example:> .
eg:o0 eg:p0 eg:s0 .
eg:o1 eg:p1 eg:s1 .
""",
"",
"",
),
),
GraphDiffCase(
Dataset,
format="trig",
lhs="""
@prefix eg: <example:> .
eg:o0 eg:p0 eg:s0 .
eg:o1 eg:p1 eg:s1 .
""",
rhs="""
@prefix eg: <example:> .
eg:o0 eg:p0 eg:s0 .
eg:o1 eg:p1 eg:s1 .
""",
expected_result=(
"""
@prefix eg: <example:> .
eg:o0 eg:p0 eg:s0 .
eg:o1 eg:p1 eg:s1 .
""",
"",
"",
),
marks=pytest.mark.xfail(
reason="quads are not supported", raises=ValueError
),
).as_params(),
],
)
def test_assert_sets_equal(test_case: GraphDiffCase):
"""
GraphHelper.sets_equals and related functions work correctly in both
positive and negative cases.
"""
lhs_graph: Graph = test_case.graph_type()
lhs_graph.parse(data=test_case.lhs, format=test_case.format)

rhs_graph: Graph = test_case.graph_type()
rhs_graph.parse(data=test_case.rhs, format=test_case.format)

in_both, in_lhs, in_rhs = graph_diff(lhs_graph, rhs_graph)
in_both_set = GraphHelper.triple_or_quad_set(in_both, BNodeHandling.COLLAPSE)
in_lhs_set = GraphHelper.triple_or_quad_set(in_lhs, BNodeHandling.COLLAPSE)
in_rhs_set = GraphHelper.triple_or_quad_set(in_rhs, BNodeHandling.COLLAPSE)

assert test_case.expected_in_both_set() == in_both_set
assert test_case.expected_in_lhs_set() == in_lhs_set
assert test_case.expected_in_rhs_set() == in_rhs_set

# Diff should be symetric
in_rboth, in_rlhs, in_rrhs = graph_diff(rhs_graph, lhs_graph)
in_rboth_set = GraphHelper.triple_or_quad_set(in_rboth, BNodeHandling.COLLAPSE)
in_rlhs_set = GraphHelper.triple_or_quad_set(in_rlhs, BNodeHandling.COLLAPSE)
in_rrhs_set = GraphHelper.triple_or_quad_set(in_rrhs, BNodeHandling.COLLAPSE)

assert test_case.expected_in_both_set() == in_rboth_set
assert test_case.expected_in_rhs_set() == in_rlhs_set
assert test_case.expected_in_lhs_set() == in_rrhs_set
4 changes: 2 additions & 2 deletions test/test_graph/test_graph_cbd.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from test.data import TEST_DATA_DIR
from test.utils import GraphHelper
from test.utils import BNodeHandling, GraphHelper

import pytest

Expand Down Expand Up @@ -130,7 +130,7 @@ def test_cbd_example():

query = "http://example.com/aReallyGreatBook"
GraphHelper.assert_isomorphic(g.cbd(URIRef(query)), g_cbd)
GraphHelper.assert_sets_equals(g.cbd(URIRef(query)), g_cbd, exclude_blanks=True)
GraphHelper.assert_sets_equals(g.cbd(URIRef(query)), g_cbd, BNodeHandling.COLLAPSE)
assert len(g.cbd(URIRef(query))) == (
21
), "cbd() for aReallyGreatBook should return 21 triples"
10 changes: 7 additions & 3 deletions test/test_roundtrip.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os.path
from pathlib import Path
from test.data import TEST_DATA_DIR
from test.utils import GraphHelper
from test.utils import BNodeHandling, GraphHelper
from typing import Callable, Iterable, List, Optional, Set, Tuple, Type, Union
from xml.sax import SAXParseException

Expand Down Expand Up @@ -285,9 +285,13 @@ def roundtrip(
GraphHelper.assert_isomorphic(g1, g2)
if checks is not None:
if Check.SET_EQUALS in checks:
GraphHelper.assert_sets_equals(g1, g2, exclude_blanks=False)
GraphHelper.assert_sets_equals(
g1, g2, bnode_handling=BNodeHandling.COLLAPSE
)
if Check.SET_EQUALS_WITHOUT_BLANKS in checks:
GraphHelper.assert_sets_equals(g1, g2, exclude_blanks=True)
GraphHelper.assert_sets_equals(
g1, g2, bnode_handling=BNodeHandling.COLLAPSE
)

if logger.isEnabledFor(logging.DEBUG):
logger.debug("OK")
Expand Down

0 comments on commit 1394f60

Please sign in to comment.