Skip to content

Commit

Permalink
Fixed and refactored roundtrip, n3_suite and nt_suite tests
Browse files Browse the repository at this point in the history
Issues in previous roundtrip that are fixed:

1. Roundtrip tests were skipping tests that would fail instead of
   reporting them with xfail.
   While this is maybe a minor distinction it
   is more appropriate to report as xfail as that is a better semantic
   description of a known issue. Skipping should be reserved for tests
   that are expected to succeed when ran but can only run in a suitable
   environment (e.g. if berkleydb is installed). Furthermore, marking
   tests with xfail will still run the test, and will also report them
   as unexpected pass (xpass) if they do actually pass.

2. Removed ("json-ld", "example-misc.n3") from xfail/skip as it does
   pass.

Refactoring and improvements:

1. Removed `test/test_n3_suite.py::test_n3_writing`.
   This test was doing something functionally equivalent to what
   `test/test_roundtrip.py::test_n3` would do, except it was doing it
   with different code (`test/testutils.py::check_serialize_parse`).
   Specifically it would test, parse from n3, serialize to n3,
   parse from n3, and this is also something that
   `test/test_roundtrip.py::test_n3` does. One difference is that
   `check_serialize_parse` was using `crapCompare` instead of
   `rdflib.compare.isomorphic`, and `crapCompare` does a significantly
   worse job but also hides an issue with `rdflib.compare.isomorphic`
   and quoted graphs which is now correctly reported as xfail.
2. Removed `_get_test_files_formats` and `all_{n3,nt}_files` from
   `test/test_{nt,n3}_suite.py` (and removed the files since there is
   nothing else in them of value). These are now replaced with code
   inside `test/test_roundtrip.py` which has less duplication and uses
   `guess_format` for determinering formats instead of a custom if/elif
   block.
3. Change skipping/xfail to work on file basename instead of path.
4. Changed file collector from a generator to a function returning a
   list as the result of this function is iteratred over multiple times.
5. Changed test id's to be more meningful, e.g. `roundtrip_n3-writer-test-28.n3_n3_trig`
   instead of `roundtrip-args303`.

Other changes:

1. Renamed `test/nt/test.ntriples` -> `test/nt/test.nt`: .ntriples is not
  a recognized extension and the test this is being used in does not
  test anything pertaining to extention handling.
2. Add guess_format tests for `.{rdf,nt,n3}` and negative tests for
  `.{docx,mkv}` and a file with no extension.
  • Loading branch information
aucampia committed Jan 2, 2022
1 parent d496504 commit e8fe7e1
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 220 deletions.
File renamed without changes.
38 changes: 0 additions & 38 deletions test/test_n3_suite.py

This file was deleted.

2 changes: 1 addition & 1 deletion test/test_nt_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def test_W3CNTriplesParser_parsestring(self):
self.assertTrue(res == None)

def test_w3_ntriple_variants(self):
uri = Path(nt_file("test.ntriples")).absolute().as_uri()
uri = Path(nt_file("test.nt")).absolute().as_uri()

parser = ntriples.W3CNTriplesParser()
u = urlopen(uri)
Expand Down
52 changes: 0 additions & 52 deletions test/test_nt_suite.py

This file was deleted.

6 changes: 6 additions & 0 deletions test/test_parse_file_guess_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ def test_guess_format(self) -> None:
self.assertEqual(guess_format("local-file.json-ld"), "json-ld")
self.assertEqual(guess_format("/some/place/on/disk/example.json"), "json-ld")
self.assertEqual(guess_format("../../relative/place/on/disk/example.json"), "json-ld")
self.assertEqual(guess_format("example.rdf"), "xml")
self.assertEqual(guess_format("example.nt"), "nt")
self.assertEqual(guess_format("example.n3"), "n3")
self.assertIsNone(guess_format("example.docx", None))
self.assertIsNone(guess_format("example", None))
self.assertIsNone(guess_format("example.mkv", None))

def test_jsonld(self) -> None:
g = Graph()
Expand Down
2 changes: 1 addition & 1 deletion test/test_parser_hext.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_roundtrip():
"even_more_literals.nt": "JSON decoding error",
"literals-02.nt": "JSON decoding error",
"more_literals.nt": "JSON decoding error",
"test.ntriples": "JSON decoding error",
"test.nt": "JSON decoding error",
"literals-05.nt": "JSON decoding error",
"i18n-01.nt": "JSON decoding error",
"literals-04.nt": "JSON decoding error",
Expand Down
236 changes: 163 additions & 73 deletions test/test_roundtrip.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import logging
import os.path
from pathlib import Path
from test.testutils import GraphHelper
from typing import Callable, Collection, Iterable, List, Optional, Set, Tuple, Union
from xml.sax import SAXParseException

import pytest
from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet

import rdflib
import rdflib.compare

try:
from .test_nt_suite import all_nt_files
assert all_nt_files

from .test_n3_suite import all_n3_files
assert all_n3_files
except:
from test.test_nt_suite import all_nt_files
from test.test_n3_suite import all_n3_files
from rdflib.util import guess_format

"""
Test round-tripping by all serializers/parser that are registered.
Expand All @@ -35,27 +34,113 @@
"""


SKIP = [
("xml", "test/n3/n3-writer-test-29.n3"),
# has predicates that cannot be shortened to strict qnames
("xml", "test/nt/qname-02.nt"), # uses a property that cannot be qname'd
("trix", "test/n3/strquot.n3"), # contains characters forbidden by the xml spec
("xml", "test/n3/strquot.n3"), # contains characters forbidden by the xml spec
("json-ld", "test/nt/keywords-04.nt"), # known NT->JSONLD problem
("json-ld", "test/n3/example-misc.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/n3-writer-test-16.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/rdf-test-11.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/rdf-test-28.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/n3-writer-test-26.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/n3-writer-test-28.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/n3-writer-test-22.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/rdf-test-21.n3"), # known N3->JSONLD problem
]


def roundtrip(e, verbose=False):
infmt, testfmt, source = e
NT_DATA_DIR = Path(__file__).parent / "nt"
INVALID_NT_FILES = {
# illegal literal as subject
"literals-01.nt",
"keywords-08.nt",
"paths-04.nt",
"numeric-01.nt",
"numeric-02.nt",
"numeric-03.nt",
"numeric-04.nt",
"numeric-05.nt",
# illegal variables
"formulae-01.nt",
"formulae-02.nt",
"formulae-03.nt",
"formulae-05.nt",
"formulae-06.nt",
"formulae-10.nt",
# illegal bnode as predicate
"paths-06.nt",
"anons-02.nt",
"anons-03.nt",
"qname-01.nt",
"lists-06.nt",
}


N3_DATA_DIR = Path(__file__).parent / "n3"

XFAILS = {
("xml", "n3-writer-test-29.n3",): pytest.mark.xfail(
reason="has predicates that cannot be shortened to strict qnames",
raises=ValueError,
),
("xml", "qname-02.nt"): pytest.mark.xfail(
reason="uses a property that cannot be qname'd",
raises=ValueError,
),
("trix", "strquot.n3"): pytest.mark.xfail(
reason="contains characters forbidden by the xml spec",
raises=SAXParseException,
),
("xml", "strquot.n3"): pytest.mark.xfail(
reason="contains characters forbidden by the xml spec",
raises=SAXParseException,
),
("json-ld", "keywords-04.nt"): pytest.mark.xfail(
reason="known NT->JSONLD problem",
raises=AssertionError,
),
("json-ld", "example-misc.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "rdf-test-11.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "rdf-test-28.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "n3-writer-test-26.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "n3-writer-test-28.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "n3-writer-test-22.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "rdf-test-21.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("n3", "example-lots_of_graphs.n3"): pytest.mark.xfail(
reason="rdflib.compare.isomorphic does not work for quoted graphs.",
raises=AssertionError,
),
}

# This is for files which can only be represented properly in one format
CONSTRAINED_FORMAT_MAP = {
"example-lots_of_graphs.n3": {"n3"} # only n3 can serialize QuotedGraph
}


def collect_files(
directory: Path, exclude_names: Optional[Set[str]] = None
) -> List[Tuple[Path, str]]:
result = []
for path in directory.glob("**/*"):
if not path.is_file():
continue
if exclude_names is not None and path.name in exclude_names:
continue
format = guess_format(path.name)
if format is None:
raise ValueError(f"could not determine format for {path}")
result.append((path, format))
return result


def roundtrip(infmt: str, testfmt: str, source: Path, verbose: bool = False) -> None:

g1 = rdflib.ConjunctiveGraph()

Expand Down Expand Up @@ -89,54 +174,59 @@ def roundtrip(e, verbose=False):
print("Ok!")


formats = None
_formats: Optional[Set[str]] = None


def get_cases():
global formats
if not formats:
def get_formats() -> Set[str]:
global _formats
if not _formats:
serializers = set(
x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Serializer)
)
parsers = set(x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Parser))
formats = parsers.intersection(serializers)

for testfmt in formats:
if testfmt != "hext":
if "/" in testfmt:
continue # skip double testing
for f, infmt in all_nt_files():
if (testfmt, f) not in SKIP:
yield roundtrip, (infmt, testfmt, f)


@pytest.mark.parametrize("checker, args", get_cases())
def test_cases(checker, args):
checker(args)
_formats = {
format for format in parsers.intersection(serializers) if "/" not in format
}
return _formats


def get_n3_test():
global formats
if not formats:
serializers = set(
x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Serializer)
)
parsers = set(x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Parser))
formats = parsers.intersection(serializers)

def make_cases(files: Collection[Tuple[Path, str]]) -> Iterable[ParameterSet]:
formats = get_formats()
for testfmt in formats:
if testfmt != "hext":
if "/" in testfmt:
continue # skip double testing
for f, infmt in all_n3_files():
if (testfmt, f) not in SKIP:
yield roundtrip, (infmt, testfmt, f)


@pytest.mark.parametrize("checker, args", get_n3_test())
def test_n3(checker, args):
checker(args)


if __name__ == "__main__":
print("hi")
if testfmt == "hext":
continue
logging.debug("testfmt = %s", testfmt)
for f, infmt in files:
constrained_formats = CONSTRAINED_FORMAT_MAP.get(f.name, None)
if constrained_formats is not None and testfmt not in constrained_formats:
logging.debug(
f"skipping format {testfmt} as it is not in the list of constrained formats for {f} which is {constrained_formats}"
)
continue
marks: List[Union[MarkDecorator, Mark]] = []
xfail = XFAILS.get((testfmt, f.name))
if xfail:
marks.append(xfail)
id = f"roundtrip_{os.path.basename(f)}_{infmt}_{testfmt}"
values = (roundtrip, (infmt, testfmt, f))
logging.debug("values = %s", values)
yield pytest.param(*values, marks=marks, id=id)


def test_formats() -> None:
formats = get_formats()
logging.debug("formats = %s", formats)
assert formats is not None
assert len(formats) > 4


@pytest.mark.parametrize(
"checker, args", make_cases(collect_files(NT_DATA_DIR, INVALID_NT_FILES))
)
def test_nt(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path]):
checker(*args)


@pytest.mark.parametrize("checker, args", make_cases(collect_files(N3_DATA_DIR)))
def test_n3(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path]):
checker(*args)

0 comments on commit e8fe7e1

Please sign in to comment.