Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed and refactored roundtrip, n3_suite and nt_suite tests #1644

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
File renamed without changes.
38 changes: 0 additions & 38 deletions test/test_n3_suite.py

This file was deleted.

2 changes: 1 addition & 1 deletion test/test_nt_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def test_W3CNTriplesParser_parsestring(self):
self.assertTrue(res == None)

def test_w3_ntriple_variants(self):
uri = Path(nt_file("test.ntriples")).absolute().as_uri()
uri = Path(nt_file("test.nt")).absolute().as_uri()

parser = ntriples.W3CNTriplesParser()
u = urlopen(uri)
Expand Down
52 changes: 0 additions & 52 deletions test/test_nt_suite.py

This file was deleted.

6 changes: 6 additions & 0 deletions test/test_parse_file_guess_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ def test_guess_format(self) -> None:
self.assertEqual(guess_format("local-file.json-ld"), "json-ld")
self.assertEqual(guess_format("/some/place/on/disk/example.json"), "json-ld")
self.assertEqual(guess_format("../../relative/place/on/disk/example.json"), "json-ld")
self.assertEqual(guess_format("example.rdf"), "xml")
self.assertEqual(guess_format("example.nt"), "nt")
self.assertEqual(guess_format("example.n3"), "n3")
self.assertIsNone(guess_format("example.docx", None))
self.assertIsNone(guess_format("example", None))
self.assertIsNone(guess_format("example.mkv", None))

def test_jsonld(self) -> None:
g = Graph()
Expand Down
2 changes: 1 addition & 1 deletion test/test_parser_hext.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_roundtrip():
"even_more_literals.nt": "JSON decoding error",
"literals-02.nt": "JSON decoding error",
"more_literals.nt": "JSON decoding error",
"test.ntriples": "JSON decoding error",
"test.nt": "JSON decoding error",
"literals-05.nt": "JSON decoding error",
"i18n-01.nt": "JSON decoding error",
"literals-04.nt": "JSON decoding error",
Expand Down
236 changes: 163 additions & 73 deletions test/test_roundtrip.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import logging
import os.path
from pathlib import Path
from test.testutils import GraphHelper
from typing import Callable, Collection, Iterable, List, Optional, Set, Tuple, Union
from xml.sax import SAXParseException

import pytest
from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet

import rdflib
import rdflib.compare

try:
from .test_nt_suite import all_nt_files
assert all_nt_files

from .test_n3_suite import all_n3_files
assert all_n3_files
except:
from test.test_nt_suite import all_nt_files
from test.test_n3_suite import all_n3_files
from rdflib.util import guess_format

"""
Test round-tripping by all serializers/parser that are registered.
Expand All @@ -35,27 +34,113 @@

"""


SKIP = [
("xml", "test/n3/n3-writer-test-29.n3"),
# has predicates that cannot be shortened to strict qnames
("xml", "test/nt/qname-02.nt"), # uses a property that cannot be qname'd
("trix", "test/n3/strquot.n3"), # contains characters forbidden by the xml spec
("xml", "test/n3/strquot.n3"), # contains characters forbidden by the xml spec
("json-ld", "test/nt/keywords-04.nt"), # known NT->JSONLD problem
("json-ld", "test/n3/example-misc.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/n3-writer-test-16.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/rdf-test-11.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/rdf-test-28.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/n3-writer-test-26.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/n3-writer-test-28.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/n3-writer-test-22.n3"), # known N3->JSONLD problem
("json-ld", "test/n3/rdf-test-21.n3"), # known N3->JSONLD problem
]


def roundtrip(e, verbose=False):
infmt, testfmt, source = e
NT_DATA_DIR = Path(__file__).parent / "nt"
INVALID_NT_FILES = {
# illegal literal as subject
"literals-01.nt",
"keywords-08.nt",
"paths-04.nt",
"numeric-01.nt",
"numeric-02.nt",
"numeric-03.nt",
"numeric-04.nt",
"numeric-05.nt",
# illegal variables
"formulae-01.nt",
"formulae-02.nt",
"formulae-03.nt",
"formulae-05.nt",
"formulae-06.nt",
"formulae-10.nt",
# illegal bnode as predicate
"paths-06.nt",
"anons-02.nt",
"anons-03.nt",
"qname-01.nt",
"lists-06.nt",
}


N3_DATA_DIR = Path(__file__).parent / "n3"

XFAILS = {
("xml", "n3-writer-test-29.n3",): pytest.mark.xfail(
reason="has predicates that cannot be shortened to strict qnames",
raises=ValueError,
),
("xml", "qname-02.nt"): pytest.mark.xfail(
reason="uses a property that cannot be qname'd",
raises=ValueError,
),
("trix", "strquot.n3"): pytest.mark.xfail(
reason="contains characters forbidden by the xml spec",
raises=SAXParseException,
),
("xml", "strquot.n3"): pytest.mark.xfail(
reason="contains characters forbidden by the xml spec",
raises=SAXParseException,
),
("json-ld", "keywords-04.nt"): pytest.mark.xfail(
reason="known NT->JSONLD problem",
raises=AssertionError,
),
("json-ld", "example-misc.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "rdf-test-11.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "rdf-test-28.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "n3-writer-test-26.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "n3-writer-test-28.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "n3-writer-test-22.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("json-ld", "rdf-test-21.n3"): pytest.mark.xfail(
reason="known N3->JSONLD problem",
raises=AssertionError,
),
("n3", "example-lots_of_graphs.n3"): pytest.mark.xfail(
reason="rdflib.compare.isomorphic does not work for quoted graphs.",
raises=AssertionError,
),
}

# This is for files which can only be represented properly in one format
CONSTRAINED_FORMAT_MAP = {
"example-lots_of_graphs.n3": {"n3"} # only n3 can serialize QuotedGraph
}


def collect_files(
directory: Path, exclude_names: Optional[Set[str]] = None
) -> List[Tuple[Path, str]]:
result = []
for path in directory.glob("**/*"):
if not path.is_file():
continue
if exclude_names is not None and path.name in exclude_names:
continue
format = guess_format(path.name)
if format is None:
raise ValueError(f"could not determine format for {path}")
result.append((path, format))
return result


def roundtrip(infmt: str, testfmt: str, source: Path, verbose: bool = False) -> None:

g1 = rdflib.ConjunctiveGraph()

Expand Down Expand Up @@ -89,54 +174,59 @@ def roundtrip(e, verbose=False):
print("Ok!")


formats = None
_formats: Optional[Set[str]] = None


def get_cases():
global formats
if not formats:
def get_formats() -> Set[str]:
global _formats
if not _formats:
serializers = set(
x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Serializer)
)
parsers = set(x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Parser))
formats = parsers.intersection(serializers)

for testfmt in formats:
if testfmt != "hext":
if "/" in testfmt:
continue # skip double testing
for f, infmt in all_nt_files():
if (testfmt, f) not in SKIP:
yield roundtrip, (infmt, testfmt, f)


@pytest.mark.parametrize("checker, args", get_cases())
def test_cases(checker, args):
checker(args)
_formats = {
format for format in parsers.intersection(serializers) if "/" not in format
}
return _formats


def get_n3_test():
global formats
if not formats:
serializers = set(
x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Serializer)
)
parsers = set(x.name for x in rdflib.plugin.plugins(None, rdflib.plugin.Parser))
formats = parsers.intersection(serializers)

def make_cases(files: Collection[Tuple[Path, str]]) -> Iterable[ParameterSet]:
formats = get_formats()
for testfmt in formats:
if testfmt != "hext":
if "/" in testfmt:
continue # skip double testing
for f, infmt in all_n3_files():
if (testfmt, f) not in SKIP:
yield roundtrip, (infmt, testfmt, f)


@pytest.mark.parametrize("checker, args", get_n3_test())
def test_n3(checker, args):
checker(args)


if __name__ == "__main__":
print("hi")
if testfmt == "hext":
continue
logging.debug("testfmt = %s", testfmt)
for f, infmt in files:
constrained_formats = CONSTRAINED_FORMAT_MAP.get(f.name, None)
if constrained_formats is not None and testfmt not in constrained_formats:
logging.debug(
f"skipping format {testfmt} as it is not in the list of constrained formats for {f} which is {constrained_formats}"
)
continue
marks: List[Union[MarkDecorator, Mark]] = []
xfail = XFAILS.get((testfmt, f.name))
if xfail:
marks.append(xfail)
id = f"roundtrip_{os.path.basename(f)}_{infmt}_{testfmt}"
values = (roundtrip, (infmt, testfmt, f))
logging.debug("values = %s", values)
yield pytest.param(*values, marks=marks, id=id)


def test_formats() -> None:
formats = get_formats()
logging.debug("formats = %s", formats)
assert formats is not None
assert len(formats) > 4


@pytest.mark.parametrize(
"checker, args", make_cases(collect_files(NT_DATA_DIR, INVALID_NT_FILES))
)
def test_nt(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path]):
checker(*args)


@pytest.mark.parametrize("checker, args", make_cases(collect_files(N3_DATA_DIR)))
def test_n3(checker: Callable[[str, str, Path], None], args: Tuple[str, str, Path]):
checker(*args)