RDFLib · nicholascar · Jan 2, 2022 · Jan 2, 2022
diff --git a/test/nt/test.ntriples → test/nt/test.nt b/test/nt/test.ntriples → test/nt/test.nt
diff --git a/test/test_n3_suite.py b/test/test_n3_suite.py
diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py
@@ -126,7 +126,7 @@ def test_W3CNTriplesParser_parsestring(self):
         self.assertTrue(res == None)
 
     def test_w3_ntriple_variants(self):
-        uri = Path(nt_file("test.ntriples")).absolute().as_uri()
+        uri = Path(nt_file("test.nt")).absolute().as_uri()
 
         parser = ntriples.W3CNTriplesParser()
         u = urlopen(uri)

diff --git a/test/test_nt_suite.py b/test/test_nt_suite.py
diff --git a/test/test_parse_file_guess_format.py b/test/test_parse_file_guess_format.py
@@ -26,6 +26,12 @@ def test_guess_format(self) -> None:
         self.assertEqual(guess_format("local-file.json-ld"), "json-ld")
         self.assertEqual(guess_format("/some/place/on/disk/example.json"), "json-ld")
         self.assertEqual(guess_format("../../relative/place/on/disk/example.json"), "json-ld")
+        self.assertEqual(guess_format("example.rdf"), "xml")
+        self.assertEqual(guess_format("example.nt"), "nt")
+        self.assertEqual(guess_format("example.n3"), "n3")
+        self.assertIsNone(guess_format("example.docx", None))
+        self.assertIsNone(guess_format("example", None))
+        self.assertIsNone(guess_format("example.mkv", None))
 
     def test_jsonld(self) -> None:
         g = Graph()

diff --git a/test/test_parser_hext.py b/test/test_parser_hext.py
@@ -56,7 +56,7 @@ def test_roundtrip():
         "even_more_literals.nt": "JSON decoding error",
         "literals-02.nt": "JSON decoding error",
         "more_literals.nt": "JSON decoding error",
-        "test.ntriples": "JSON decoding error",
+        "test.nt": "JSON decoding error",
         "literals-05.nt": "JSON decoding error",
         "i18n-01.nt": "JSON decoding error",
         "literals-04.nt": "JSON decoding error",

diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py
@@ -1,17 +1,16 @@
+import logging
+import os.path
+from pathlib import Path
+from test.testutils import GraphHelper
+from typing import Callable, Collection, Iterable, List, Optional, Set, Tuple, Union
+from xml.sax import SAXParseException
+
 import pytest
+from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet
 
 import rdflib
 import rdflib.compare
-
-try:
-    from .test_nt_suite import all_nt_files
-    assert all_nt_files
-
-    from .test_n3_suite import all_n3_files
-    assert all_n3_files
-except:
-    from test.test_nt_suite import all_nt_files
-    from test.test_n3_suite import all_n3_files
+from rdflib.util import guess_format
 
 """
 Test round-tripping by all serializers/parser that are registered.
@@ -35,27 +34,113 @@
 
 """
 
-
-SKIP = [
-    ("xml", "test/n3/n3-writer-test-29.n3"),
-    # has predicates that cannot be shortened to strict qnames
-    ("xml", "test/nt/qname-02.nt"),  # uses a property that cannot be qname'd
-    ("trix", "test/n3/strquot.n3"),  # contains characters forbidden by the xml spec
-    ("xml", "test/n3/strquot.n3"),  # contains characters forbidden by the xml spec
-    ("json-ld", "test/nt/keywords-04.nt"),  # known NT->JSONLD problem
-    ("json-ld", "test/n3/example-misc.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/n3-writer-test-16.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/rdf-test-11.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/rdf-test-28.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/n3-writer-test-26.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/n3-writer-test-28.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/n3-writer-test-22.n3"),  # known N3->JSONLD problem
-    ("json-ld", "test/n3/rdf-test-21.n3"),  # known N3->JSONLD problem
-]
-
-
-def roundtrip(e, verbose=False):
-    infmt, testfmt, source = e
+NT_DATA_DIR = Path(__file__).parent / "nt"
+INVALID_NT_FILES = {
+    # illegal literal as subject
+    "literals-01.nt",
+    "keywords-08.nt",
+    "paths-04.nt",
+    "numeric-01.nt",
+    "numeric-02.nt",
+    "numeric-03.nt",
+    "numeric-04.nt",
+    "numeric-05.nt",
+    # illegal variables
+    "formulae-01.nt",
+    "formulae-02.nt",
+    "formulae-03.nt",
+    "formulae-05.nt",
+    "formulae-06.nt",
+    "formulae-10.nt",
+    # illegal bnode as predicate
+    "paths-06.nt",
+    "anons-02.nt",
+    "anons-03.nt",
+    "qname-01.nt",
+    "lists-06.nt",
+}
+
+
+N3_DATA_DIR = Path(__file__).parent / "n3"
+
+XFAILS = {
+    ("xml", "n3-writer-test-29.n3",): pytest.mark.xfail(
+        reason="has predicates that cannot be shortened to strict qnames",
+        raises=ValueError,
+    ),
+    ("xml", "qname-02.nt"): pytest.mark.xfail(
+        reason="uses a property that cannot be qname'd",
+        raises=ValueError,
+    ),
+    ("trix", "strquot.n3"): pytest.mark.xfail(
+        reason="contains characters forbidden by the xml spec",
+        raises=SAXParseException,
+    ),
+    ("xml", "strquot.n3"): pytest.mark.xfail(
+        reason="contains characters forbidden by the xml spec",
+        raises=SAXParseException,
+    ),
+    ("json-ld", "keywords-04.nt"): pytest.mark.xfail(
+        reason="known NT->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "example-misc.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "rdf-test-11.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "rdf-test-28.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "n3-writer-test-26.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "n3-writer-test-28.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "n3-writer-test-22.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("json-ld", "rdf-test-21.n3"): pytest.mark.xfail(
+        reason="known N3->JSONLD problem",
+        raises=AssertionError,
+    ),
+    ("n3", "example-lots_of_graphs.n3"): pytest.mark.xfail(
+        reason="rdflib.compare.isomorphic does not work for quoted graphs.",
+        raises=AssertionError,
+    ),
+}
+
+# This is for files which can only be represented properly in one format
+CONSTRAINED_FORMAT_MAP = {
+    "example-lots_of_graphs.n3": {"n3"}  # only n3 can serialize QuotedGraph
+}
+
+
+def collect_files(
+    directory: Path, exclude_names: Optional[Set[str]] = None
+) -> List[Tuple[Path, str]]:
+    result = []
+    for path in directory.glob("**/*"):
+        if not path.is_file():
+            continue
+        if exclude_names is not None and path.name in exclude_names:
+            continue
+        format = guess_format(path.name)
+        if format is None:
+            raise ValueError(f"could not determine format for {path}")
+        result.append((path, format))
+    return result
+
+
+def roundtrip(infmt: str, testfmt: str, source: Path, verbose: bool = False) -> None:
 
     g1 = rdflib.ConjunctiveGraph()
 
@@ -89,54 +174,59 @@ def roundtrip(e, verbose=False):
         print("Ok!")
 
 
-formats = None
+_formats: Optional[Set[str]] = None
 
 
-def get_cases():
-    global formats
-    if not formats:
+def get_formats() -> Set[str]:
+    global _formats
+    if not _formats:
         serializers = set(
             x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Serializer)
         )
         parsers = set(x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Parser))
-        formats = parsers.intersection(serializers)
-
-    for testfmt in formats:
-        if testfmt != "hext":
-            if "/" in testfmt:
-                continue  # skip double testing
-            for f, infmt in all_nt_files():
-                if (testfmt, f) not in SKIP:
-                    yield roundtrip, (infmt, testfmt, f)
-
-
-@pytest.mark.parametrize("checker, args", get_cases())
-def test_cases(checker, args):
-    checker(args)
+        _formats = {
+            format for format in parsers.intersection(serializers) if "/" not in format
+        }
+    return _formats
 
 
-def get_n3_test():
-    global formats
-    if not formats:
-        serializers = set(
-            x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Serializer)
-        )
-        parsers = set(x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Parser))
-        formats = parsers.intersection(serializers)
-
+def make_cases(files: Collection[Tuple[Path, str]]) -> Iterable[ParameterSet]:
+    formats = get_formats()
     for testfmt in formats:
-        if testfmt != "hext":
-            if "/" in testfmt:
-                continue  # skip double testing
-            for f, infmt in all_n3_files():
-                if (testfmt, f) not in SKIP:
-                    yield roundtrip, (infmt, testfmt, f)
-
-
-@pytest.mark.parametrize("checker, args", get_n3_test())
-def test_n3(checker, args):
-    checker(args)
-
-
-if __name__ == "__main__":
-    print("hi")
+        if testfmt == "hext":
+            continue
+        logging.debug("testfmt = %s", testfmt)
+        for f, infmt in files:
+            constrained_formats = CONSTRAINED_FORMAT_MAP.get(f.name, None)
+            if constrained_formats is not None and testfmt not in constrained_formats:
+                logging.debug(
+                    f"skipping format {testfmt} as it is not in the list of constrained formats for {f} which is {constrained_formats}"
+                )
+                continue
+            marks: List[Union[MarkDecorator, Mark]] = []
+            xfail = XFAILS.get((testfmt, f.name))
+            if xfail:
+                marks.append(xfail)
+            id = f"roundtrip_{os.path.basename(f)}_{infmt}_{testfmt}"
+            values = (roundtrip, (infmt, testfmt, f))
+            logging.debug("values = %s", values)
+            yield pytest.param(*values, marks=marks, id=id)
+
+
+def test_formats() -> None:
+    formats = get_formats()
+    logging.debug("formats = %s", formats)
+    assert formats is not None
+    assert len(formats) > 4
+
+
+@pytest.mark.parametrize(
+    "checker, args", make_cases(collect_files(NT_DATA_DIR, INVALID_NT_FILES))
+)
+def test_nt(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path]):
+    checker(*args)
+
+
+@pytest.mark.parametrize("checker, args", make_cases(collect_files(N3_DATA_DIR)))
+def test_n3(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path]):
+    checker(*args)