diff --git a/.coveragerc b/.coveragerc index d5be9e3..fa7d1bc 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,6 +2,9 @@ [run] branch = True +# We seem to need timid mode to get correct results. +timid = True + source = src tests diff --git a/src/backports/os.py b/src/backports/os.py index 911897f..060fe9e 100644 --- a/src/backports/os.py +++ b/src/backports/os.py @@ -103,6 +103,9 @@ def _fscodec(): import codecs # Use codecs.lookup() for name normalisation. _HACK_AROUND_PY2_UTF8 = (sys.version_info < (3,) and codecs.lookup(encoding) == codecs.lookup('utf-8')) + # Do we need to hack around Python 2's ASCII codec error handler behaviour? + _HACK_AROUND_PY2_ASCII = (sys.version_info < (3,) and + codecs.lookup(encoding) == codecs.lookup('ascii')) # XXX backport: chr(octet) became bytes([octet]) _byte = chr if sys.version_info < (3,) else lambda i: bytes([i]) @@ -116,7 +119,7 @@ def fsencode(filename): if isinstance(filename, bytes): return filename elif isinstance(filename, _str): - if _HACK_AROUND_PY2_UTF8: + if _HACK_AROUND_PY2_UTF8 or _HACK_AROUND_PY2_ASCII: # XXX backport: Unlike Python 3, Python 2's UTF-8 codec does not # consider surrogate codepoints invalid, so the surrogateescape # error handler never gets invoked to encode them back into high @@ -125,6 +128,16 @@ def fsencode(filename): # This code hacks around that by manually encoding the surrogate # codepoints to high bytes, without relying on surrogateescape. # + # As a *separate* issue to the above, Python2's ASCII codec has + # a different problem: it correctly invokes the surrogateescape + # error handler, but then seems to do additional strict + # validation (?) on the interim surrogate-decoded Unicode buffer + # returned by surrogateescape, and then fails with a + # UnicodeEncodeError anyway. + # + # The fix for that happens to be the same (manual encoding), + # even though the two causes are quite different. + # return b''.join( (_byte(ord(c) - 0xDC00) if 0xDC00 <= ord(c) <= 0xDCFF else c.encode(encoding)) diff --git a/tests/test_extra.py b/tests/test_extra.py index f445342..5fe0f59 100644 --- a/tests/test_extra.py +++ b/tests/test_extra.py @@ -4,8 +4,10 @@ """ from __future__ import unicode_literals +import codecs import os as real_os import sys +from functools import partial from backports import os @@ -46,6 +48,14 @@ UTF8_ENCODED_SURROGATE = b'\xed\xb0\x80' +# Helper strategy: If the filesystem encoding is ASCII, +# limit the set of valid text to encode to ASCII too. +FILESYSTEM_IS_ASCII = codecs.lookup(sys.getfilesystemencoding()) == codecs.lookup('ascii') +ASCII = ''.join(chr(i) for i in range(128)) +encodable_text = (partial(text, alphabet=ASCII) if FILESYSTEM_IS_ASCII else + text) + + class ExtraFSEncodingTests(unittest.TestCase): def test_encode_surrogates(self): @@ -60,7 +70,7 @@ def test_decode_surrogates(self): """ self.assertEqual(os.fsdecode(HIGH_BYTES), HIGH_SURROGATES) - @given(text()) + @given(encodable_text()) @example(HIGH_SURROGATES) def test_text_roundtrip(self, s): self.assertEqual(os.fsdecode(os.fsencode(s)), s) @@ -92,7 +102,7 @@ class TestAgainstPython3(unittest.TestCase): On Python 3, the backported implementations should match the standard library. """ - @given(text()) + @given(encodable_text()) @example(HIGH_SURROGATES) def test_encode_text(self, s): self.assertEqual(os.fsencode(s), real_os.fsencode(s)) diff --git a/tests/test_os.py b/tests/test_os.py index 253c8d2..b76a4d9 100644 --- a/tests/test_os.py +++ b/tests/test_os.py @@ -20,4 +20,16 @@ def test_identity(self): bytesfn = os.fsencode(fn) except UnicodeEncodeError: continue + + # XXX backport: Ignore bug in future.utils.surrogateescape.replace_surrogate_encode() + # by treating the below NameError like the above UnicodeEncodeError. + # + # Bug: https://github.com/PythonCharmers/python-future/issues/256 + # (This workaround can be removed once that is fixed.) + except NameError as e: # pragma: no cover + if e.message == "global name 'exc' is not defined": + continue + else: + raise + self.assertEqual(os.fsdecode(bytesfn), fn) diff --git a/tox.ini b/tox.ini index 0e0714c..f0bf4a3 100644 --- a/tox.ini +++ b/tox.ini @@ -24,6 +24,11 @@ deps = usedevelop = codecov: true +whitelist_externals = + env + +# Note: This runs the test suite with both the current locale's encoding, +# and with LANG empty, to test against ASCII. commands = # XXX: This will currently run the tests twice under codecov, but oh well. # TODO: Use a factor-based override or negation for this sometime? @@ -31,6 +36,8 @@ commands = # https://github.com/tox-dev/tox/issues/189 # https://github.com/tox-dev/tox/issues/292 python -m unittest discover tests + env LANG= python -m unittest discover tests codecov: coverage run -m unittest discover tests + codecov: env LANG= coverage run --append -m unittest discover tests codecov: codecov -e TOXENV