Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
[run]
branch = True

# We seem to need timid mode to get correct results.
timid = True

source =
src
tests
15 changes: 14 additions & 1 deletion src/backports/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ def _fscodec():
import codecs # Use codecs.lookup() for name normalisation.
_HACK_AROUND_PY2_UTF8 = (sys.version_info < (3,) and
codecs.lookup(encoding) == codecs.lookup('utf-8'))
# Do we need to hack around Python 2's ASCII codec error handler behaviour?
_HACK_AROUND_PY2_ASCII = (sys.version_info < (3,) and
codecs.lookup(encoding) == codecs.lookup('ascii'))

# XXX backport: chr(octet) became bytes([octet])
_byte = chr if sys.version_info < (3,) else lambda i: bytes([i])
Expand All @@ -116,7 +119,7 @@ def fsencode(filename):
if isinstance(filename, bytes):
return filename
elif isinstance(filename, _str):
if _HACK_AROUND_PY2_UTF8:
if _HACK_AROUND_PY2_UTF8 or _HACK_AROUND_PY2_ASCII:
# XXX backport: Unlike Python 3, Python 2's UTF-8 codec does not
# consider surrogate codepoints invalid, so the surrogateescape
# error handler never gets invoked to encode them back into high
Expand All @@ -125,6 +128,16 @@ def fsencode(filename):
# This code hacks around that by manually encoding the surrogate
# codepoints to high bytes, without relying on surrogateescape.
#
# As a *separate* issue to the above, Python2's ASCII codec has
# a different problem: it correctly invokes the surrogateescape
# error handler, but then seems to do additional strict
# validation (?) on the interim surrogate-decoded Unicode buffer
# returned by surrogateescape, and then fails with a
# UnicodeEncodeError anyway.
#
# The fix for that happens to be the same (manual encoding),
# even though the two causes are quite different.
#
return b''.join(
(_byte(ord(c) - 0xDC00) if 0xDC00 <= ord(c) <= 0xDCFF else
c.encode(encoding))
Expand Down
14 changes: 12 additions & 2 deletions tests/test_extra.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
"""
from __future__ import unicode_literals

import codecs
import os as real_os
import sys
from functools import partial

from backports import os

Expand Down Expand Up @@ -46,6 +48,14 @@
UTF8_ENCODED_SURROGATE = b'\xed\xb0\x80'


# Helper strategy: If the filesystem encoding is ASCII,
# limit the set of valid text to encode to ASCII too.
FILESYSTEM_IS_ASCII = codecs.lookup(sys.getfilesystemencoding()) == codecs.lookup('ascii')
ASCII = ''.join(chr(i) for i in range(128))
encodable_text = (partial(text, alphabet=ASCII) if FILESYSTEM_IS_ASCII else
text)


class ExtraFSEncodingTests(unittest.TestCase):

def test_encode_surrogates(self):
Expand All @@ -60,7 +70,7 @@ def test_decode_surrogates(self):
"""
self.assertEqual(os.fsdecode(HIGH_BYTES), HIGH_SURROGATES)

@given(text())
@given(encodable_text())
@example(HIGH_SURROGATES)
def test_text_roundtrip(self, s):
self.assertEqual(os.fsdecode(os.fsencode(s)), s)
Expand Down Expand Up @@ -92,7 +102,7 @@ class TestAgainstPython3(unittest.TestCase):
On Python 3, the backported implementations should match the standard library.
"""

@given(text())
@given(encodable_text())
@example(HIGH_SURROGATES)
def test_encode_text(self, s):
self.assertEqual(os.fsencode(s), real_os.fsencode(s))
Expand Down
12 changes: 12 additions & 0 deletions tests/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,16 @@ def test_identity(self):
bytesfn = os.fsencode(fn)
except UnicodeEncodeError:
continue

# XXX backport: Ignore bug in future.utils.surrogateescape.replace_surrogate_encode()
# by treating the below NameError like the above UnicodeEncodeError.
#
# Bug: https://github.com/PythonCharmers/python-future/issues/256
# (This workaround can be removed once that is fixed.)
except NameError as e: # pragma: no cover
if e.message == "global name 'exc' is not defined":
continue
else:
raise

self.assertEqual(os.fsdecode(bytesfn), fn)
7 changes: 7 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,20 @@ deps =
usedevelop =
codecov: true

whitelist_externals =
env

# Note: This runs the test suite with both the current locale's encoding,
# and with LANG empty, to test against ASCII.
commands =
# XXX: This will currently run the tests twice under codecov, but oh well.
# TODO: Use a factor-based override or negation for this sometime?
# See:
# https://github.com/tox-dev/tox/issues/189
# https://github.com/tox-dev/tox/issues/292
python -m unittest discover tests
env LANG= python -m unittest discover tests

codecov: coverage run -m unittest discover tests
codecov: env LANG= coverage run --append -m unittest discover tests
codecov: codecov -e TOXENV