Fixed and refactored roundtrip, n3_suite and nt_suite tests

Issues in previous roundtrip that are fixed: 1. Roundtrip tests were skipping tests that would fail instead of reporting them with xfail. While this is maybe a minor distinction it is more appropriate to report as xfail as that is a better semantic description of a known issue. Skipping should be reserved for tests that are expected to succeed when ran but can only run in a suitable environment (e.g. if berkleydb is installed). Furthermore, marking tests with xfail will still run the test, and will also report them as unexpected pass (xpass) if they do actually pass. 2. Removed ("json-ld", "example-misc.n3") from xfail/skip as it does pass. Refactoring and improvements: 1. Removed `test/test_n3_suite.py::test_n3_writing`. This test was doing something functionally equivalent to what `test/test_roundtrip.py::test_n3` would do, except it was doing it with different code (`test/testutils.py::check_serialize_parse`). Specifically it would test, parse from n3, serialize to n3, parse from n3, and this is also something that `test/test_roundtrip.py::test_n3` does. One difference is that `check_serialize_parse` was using `crapCompare` instead of `rdflib.compare.isomorphic`, and `crapCompare` does a significantly worse job but also hides an issue with `rdflib.compare.isomorphic` and quoted graphs which is now correctly reported as xfail. 2. Removed `_get_test_files_formats` and `all_{n3,nt}_files` from `test/test_{nt,n3}_suite.py` (and removed the files since there is nothing else in them of value). These are now replaced with code inside `test/test_roundtrip.py` which has less duplication and uses `guess_format` for determinering formats instead of a custom if/elif block. 3. Change skipping/xfail to work on file basename instead of path. 4. Changed file collector from a generator to a function returning a list as the result of this function is iteratred over multiple times. 5. Changed test id's to be more meningful, e.g. `roundtrip_n3-writer-test-28.n3_n3_trig` instead of `roundtrip-args303`. Other changes: 1. Renamed `test/nt/test.ntriples` -> `test/nt/test.nt`: .ntriples is not a recognized extension and the test this is being used in does not test anything pertaining to extention handling. 2. Add guess_format tests for `.{rdf,nt,n3}` and negative tests for `.{docx,mkv}` and a file with no extension.
RDFLib · Jan 2, 2022 · e8fe7e1 · e8fe7e1
1 parent d496504
commit e8fe7e1
Show file tree

Hide file tree

Showing 8 changed files with 171 additions and 220 deletions.
diff --git a/test/nt/test.ntriples → test/nt/test.nt b/test/nt/test.ntriples → test/nt/test.nt
diff --git a/test/test_n3_suite.py b/test/test_n3_suite.py
diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py
@@ -126,7 +126,7 @@ def test_W3CNTriplesParser_parsestring(self):
         self.assertTrue(res == None)
 
     def test_w3_ntriple_variants(self):
-        uri = Path(nt_file("test.ntriples")).absolute().as_uri()
+        uri = Path(nt_file("test.nt")).absolute().as_uri()
 
         parser = ntriples.W3CNTriplesParser()
         u = urlopen(uri)

diff --git a/test/test_nt_suite.py b/test/test_nt_suite.py
diff --git a/test/test_parse_file_guess_format.py b/test/test_parse_file_guess_format.py
@@ -26,6 +26,12 @@ def test_guess_format(self) -> None:
         self.assertEqual(guess_format("local-file.json-ld"), "json-ld")
         self.assertEqual(guess_format("/some/place/on/disk/example.json"), "json-ld")
         self.assertEqual(guess_format("../../relative/place/on/disk/example.json"), "json-ld")
+        self.assertEqual(guess_format("example.rdf"), "xml")
+        self.assertEqual(guess_format("example.nt"), "nt")
+        self.assertEqual(guess_format("example.n3"), "n3")
+        self.assertIsNone(guess_format("example.docx", None))
+        self.assertIsNone(guess_format("example", None))
+        self.assertIsNone(guess_format("example.mkv", None))
 
     def test_jsonld(self) -> None:
         g = Graph()

diff --git a/test/test_parser_hext.py b/test/test_parser_hext.py
@@ -56,7 +56,7 @@ def test_roundtrip():
         "even_more_literals.nt": "JSON decoding error",
         "literals-02.nt": "JSON decoding error",
         "more_literals.nt": "JSON decoding error",
-        "test.ntriples": "JSON decoding error",
+        "test.nt": "JSON decoding error",
         "literals-05.nt": "JSON decoding error",
         "i18n-01.nt": "JSON decoding error",
         "literals-04.nt": "JSON decoding error",

diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py
@@ -1,17 +1,16 @@
+import logging
+import os.path
+from pathlib import Path
+from test.testutils import GraphHelper
+from typing import Callable, Collection, Iterable, List, Optional, Set, Tuple, Union
+from xml.sax import SAXParseException
+
 import pytest
+from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet
 
 import rdflib
 import rdflib.compare
-
-try:
-    from .test_nt_suite import all_nt_files
-    assert all_nt_files
-
-    from .test_n3_suite import all_n3_files
-    assert all_n3_files
-except:
-    from test.test_nt_suite import all_nt_files
-    from test.test_n3_suite import all_n3_files
+from rdflib.util import guess_format
 
 """
 Test round-tripping by all serializers/parser that are registered.
@@ -35,27 +34,113 @@
 
 """
 
-
-SKIP = [
-    ("xml", "test/n3/n3-writer-test-29.n3"),
-    # has predicates that cannot be shortened to strict qnames
-    ("xml", "test/nt/qname-02.nt"),  # uses a property that cannot be qname'd
-    ("trix", "test/n3/strquot.n3"),  # contains characters forbidden by the xml spec
-    ("xml", "test/n3/strquot.n3"),  # contains characters forbidden by the xml spec
-    ("json-ld", "test/nt/keywords-04.nt"),  # known NT->JSONLD problem
-    ("json-ld", "test/n3/example-misc.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/n3-writer-test-16.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/rdf-test-11.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/rdf-test-28.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/n3-writer-test-26.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/n3-writer-test-28.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/n3-writer-test-22.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/rdf-test-21.n3"),  # known N3->JSONLD problem
-]
-
-
-def roundtrip(e, verbose=False):
-    infmt, testfmt, source = e
+NT_DATA_DIR = Path(__file__).parent / "nt"
+INVALID_NT_FILES = {
+    # illegal literal as subject
+    "literals-01.nt",
+    "keywords-08.nt",
+    "paths-04.nt",
+    "numeric-01.nt",
+    "numeric-02.nt",
+    "numeric-03.nt",
+    "numeric-04.nt",
+    "numeric-05.nt",
+    # illegal variables
+    "formulae-01.nt",
+    "formulae-02.nt",
+    "formulae-03.nt",
+    "formulae-05.nt",
+    "formulae-06.nt",
+    "formulae-10.nt",
+    # illegal bnode as predicate
+    "paths-06.nt",
+    "anons-02.nt",
+    "anons-03.nt",
+    "qname-01.nt",
+    "lists-06.nt",
+}
+
+
+N3_DATA_DIR = Path(__file__).parent / "n3"
+
+XFAILS = {
+    ("xml", "n3-writer-test-29.n3",): pytest.mark.xfail(
+        reason="has predicates that cannot be shortened to strict qnames",
+        raises=ValueError,
+    ),
+    ("xml", "qname-02.nt"): pytest.mark.xfail(
+        reason="uses a property that cannot be qname'd",
+        raises=ValueError,
+    ),
+    ("trix", "strquot.n3"): pytest.mark.xfail(
+        reason="contains characters forbidden by the xml spec",
+        raises=SAXParseException,
+    ),
+    ("xml", "strquot.n3"): pytest.mark.xfail(
+        reason="contains characters forbidden by the xml spec",
+        raises=SAXParseException,
+    ),
+    ("json-ld", "keywords-04.nt"): pytest.mark.xfail(
+        reason="known NT->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "example-misc.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "rdf-test-11.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "rdf-test-28.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "n3-writer-test-26.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "n3-writer-test-28.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "n3-writer-test-22.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "rdf-test-21.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("n3", "example-lots_of_graphs.n3"): pytest.mark.xfail(
+        reason="rdflib.compare.isomorphic does not work for quoted graphs.",
+        raises=AssertionError,
+    ),
+}
+
+# This is for files which can only be represented properly in one format
+CONSTRAINED_FORMAT_MAP = {
+    "example-lots_of_graphs.n3": {"n3"}  # only n3 can serialize QuotedGraph
+}
+
+
+def collect_files(
+    directory: Path, exclude_names: Optional[Set[str]] = None
+) -> List[Tuple[Path, str]]:
+    result = []
+    for path in directory.glob("**/*"):
+        if not path.is_file():
+            continue
+        if exclude_names is not None and path.name in exclude_names:
+            continue
+        format = guess_format(path.name)
+        if format is None:
+            raise ValueError(f"could not determine format for {path}")
+        result.append((path, format))
+    return result
+
+
+def roundtrip(infmt: str, testfmt: str, source: Path, verbose: bool = False) -> None:
 
     g1 = rdflib.ConjunctiveGraph()
 
@@ -89,54 +174,59 @@ def roundtrip(e, verbose=False):
         print("Ok!")
 
 
-formats = None
+_formats: Optional[Set[str]] = None
 
 
-def get_cases():
-    global formats
-    if not formats:
+def get_formats() -> Set[str]:
+    global _formats
+    if not _formats:
         serializers = set(
             x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Serializer)
         )
         parsers = set(x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Parser))
-        formats = parsers.intersection(serializers)
-
-    for testfmt in formats:
-        if testfmt != "hext":
-            if "/" in testfmt:
-                continue  # skip double testing
-            for f, infmt in all_nt_files():
-                if (testfmt, f) not in SKIP:
-                    yield roundtrip, (infmt, testfmt, f)
-
-
-@pytest.mark.parametrize("checker, args", get_cases())
-def test_cases(checker, args):
-    checker(args)
+        _formats = {
+            format for format in parsers.intersection(serializers) if "/" not in format
+        }
+    return _formats
 
 
-def get_n3_test():
-    global formats
-    if not formats:
-        serializers = set(
-            x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Serializer)
-        )
-        parsers = set(x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Parser))
-        formats = parsers.intersection(serializers)
-
+def make_cases(files: Collection[Tuple[Path, str]]) -> Iterable[ParameterSet]:
+    formats = get_formats()
     for testfmt in formats:
-        if testfmt != "hext":
-            if "/" in testfmt:
-                continue  # skip double testing
-            for f, infmt in all_n3_files():
-                if (testfmt, f) not in SKIP:
-                    yield roundtrip, (infmt, testfmt, f)
-
-
-@pytest.mark.parametrize("checker, args", get_n3_test())
-def test_n3(checker, args):
-    checker(args)
-
-
-if __name__ == "__main__":
-    print("hi")
+        if testfmt == "hext":
+            continue
+        logging.debug("testfmt = %s", testfmt)
+        for f, infmt in files:
+            constrained_formats = CONSTRAINED_FORMAT_MAP.get(f.name, None)
+            if constrained_formats is not None and testfmt not in constrained_formats:
+                logging.debug(
+                    f"skipping format {testfmt} as it is not in the list of constrained formats for {f} which is {constrained_formats}"
+                )
+                continue
+            marks: List[Union[MarkDecorator, Mark]] = []
+            xfail = XFAILS.get((testfmt, f.name))
+            if xfail:
+                marks.append(xfail)
+            id = f"roundtrip_{os.path.basename(f)}_{infmt}_{testfmt}"
+            values = (roundtrip, (infmt, testfmt, f))
+            logging.debug("values = %s", values)
+            yield pytest.param(*values, marks=marks, id=id)
+
+
+def test_formats() -> None:
+    formats = get_formats()
+    logging.debug("formats = %s", formats)
+    assert formats is not None
+    assert len(formats) > 4
+
+
+@pytest.mark.parametrize(
+    "checker, args", make_cases(collect_files(NT_DATA_DIR, INVALID_NT_FILES))
+)
+def test_nt(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path]):
+    checker(*args)
+
+
+@pytest.mark.parametrize("checker, args", make_cases(collect_files(N3_DATA_DIR)))
+def test_n3(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path]):
+    checker(*args)