Skip to content

Commit

Permalink
fix: always parse HexTuple files as utf-8 (#2070)
Browse files Browse the repository at this point in the history
Always parse HexTuple files as utf-8 as was the intent anyway as
evidenced by the code that will raise a warning if the encoding provided
for a HexTuple file is something other than utf-8 or None.

https://github.com/RDFLib/rdflib/blob/cfa418074b27b12aac905ba266b002a237c5ff4c/rdflib/plugins/parsers/hext.py#L73-L79

Not adding any tests as this code is already tested and an XFAIL is
removed in this patch.
  • Loading branch information
aucampia committed Aug 7, 2022
1 parent cc80c9c commit 493241e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 12 deletions.
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,22 @@ and will be removed for release.
<!-- -->
<!-- -->


<!-- -->
<!-- -->
<!-- CHANGE BARRIER: START PR #2070 -->
<!-- -->
<!-- -->

- Always parse HexTuple files as utf-8.
[PR #2070](https://github.com/RDFLib/rdflib/pull/2070).

<!-- -->
<!-- -->
<!-- CHANGE BARRIER: END PR #2070 -->
<!-- -->
<!-- -->

<!-- -->
<!-- -->
<!-- CHANGE BARRIER: START -->
Expand Down
2 changes: 1 addition & 1 deletion rdflib/plugins/parsers/hext.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def parse(self, source, graph, **kwargs):

# handle different source types - only file and string (data) for now
if hasattr(source, "file"):
with open(source.file.name) as fp:
with open(source.file.name, encoding="utf-8") as fp:
for l in fp:
self._parse_hextuple(cg, self._load_json_line(l))
elif hasattr(source, "_InputSource__bytefile"):
Expand Down
11 changes: 0 additions & 11 deletions test/test_graph/test_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
import os
import re
import sys
from dataclasses import dataclass, field
from pathlib import Path, PurePath
from test.data import TEST_DATA_DIR
Expand Down Expand Up @@ -192,16 +191,6 @@ def for_directory(
),
}

if sys.platform == "win32":
EXPECTED_FAILURES["variants/diverse_triples"] = pytest.mark.xfail(
reason="""
Some encoding issue when parsing hext on windows:
> return codecs.charmap_decode(input,self.errors,decoding_table)[0]
E UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 356: character maps to <undefined>
""",
raises=UnicodeDecodeError,
)


def tests_found() -> None:
logging.debug("VARIANTS_DIR = %s", VARIANTS_DIR)
Expand Down

0 comments on commit 493241e

Please sign in to comment.