PiDelport · PiDelport · Mar 9, 2016 · Mar 8, 2016 · Mar 8, 2016 · Mar 8, 2016
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,7 @@
+# http://coverage.readthedocs.org/en/latest/config.html
+[run]
+branch = True
+
+source =
+    src
+    tests
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,6 @@ dist/
 # Test coverage
 .coverage
 htmlcov
+
+# Hypothesis examples
+.hypothesis/
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,32 @@
+language: python
+cache: pip
+
+matrix:
+  include:
+    - { python: '2.7', env: TOXENV=py27 }
+    - { python: '3.4', env: TOXENV=py34 }
+    - { python: '3.5', env: TOXENV=py35 }
+    - { python: 'pypy', env: TOXENV=pypy }
+    - { python: 'pypy3', env: TOXENV=pypy3 }
+
+    # Report coverage for the latest Python 2 and 3 versions
+    - { python: '2.7', env: TOXENV=py27-codecov }
+    - { python: '3.5', env: TOXENV=py35-codecov }
+
+  allow_failures:
+    # PyPy3 on Travis seems to be broken, as of 2016-02.
+    #
+    # See: https://github.com/travis-ci/travis-ci/issues/4306
+    #
+    - python: 'pypy3'
+
+# Avoid overriding the default install step,
+# so that automatic pip caching works.
+#
+# See: https://github.com/travis-ci/travis-ci/issues/3239
+#
+before_script:
+  - pip install tox
+
+script:
+  - tox
diff --git a/HACKING.rst b/HACKING.rst
@@ -0,0 +1,29 @@
+=======================
+Working on backports.os
+=======================
+
+
+Running the tests
+=================
+
+Running ``tox``, ``detox``, or ``pytest`` should all work.
+
+With ``unittest``::
+
+    python -m unittest discover tests
+
+
+Coverage
+========
+
+With ``coverage``::
+
+    coverage run -m unittest discover tests
+    coverage report
+    coverage html
+
+With ``pytest`` and ``pytest-cov``::
+
+    py.test --cov
+    py.test --cov --cov-report=html
+
diff --git a/README.rst b/README.rst
@@ -0,0 +1,47 @@
+============
+backports.os
+============
+
+This package provides backports of new features in Python's os_ module
+under the backports_ namespace.
+
+.. _os: https://docs.python.org/3.5/library/os.html
+.. _backports: https://pypi.python.org/pypi/backports
+
+.. image:: https://img.shields.io/pypi/v/backports.os.svg
+    :target: https://pypi.python.org/pypi/backports.os
+
+.. image:: https://img.shields.io/badge/source-GitHub-lightgrey.svg
+    :target: https://github.com/pjdelport/backports.os
+
+.. image:: https://img.shields.io/github/issues/pjdelport/backports.os.svg
+    :target: https://github.com/pjdelport/backports.os/issues?q=is:open
+
+.. image:: https://travis-ci.org/pjdelport/backports.os.svg?branch=master
+    :target: https://travis-ci.org/pjdelport/backports.os
+
+.. image:: https://codecov.io/github/pjdelport/backports.os/coverage.svg?branch=master
+    :target: https://codecov.io/github/pjdelport/backports.os?branch=master
+
+
+Supported Python versions
+=========================
+
+* CPython: 2.7, 3.4, 3.5
+* PyPy
+
+
+Backported functionality
+========================
+
+* `os.fsencode`_ (new in Python 3.2)
+* `os.fsdecode`_ (new in Python 3.2)
+
+.. _`os.fsencode`: https://docs.python.org/3.5/library/os.html#os.fsencode
+.. _`os.fsdecode`: https://docs.python.org/3.5/library/os.html#os.fsdecode
+
+
+Contributing
+============
+
+See `<HACKING.rst>`__.
diff --git a/setup.py b/setup.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+import sys
+from setuptools import setup, find_packages
+
+
+# Backward-compatibility dependencies for Python 2
+_python2_requires = [
+    'future',  # For backport of surrogateescape
+] if sys.version_info < (3,) else []
+
+
+setup(
+    name='backports.os',
+    description="Backport of new features in Python's os module",
+    url='https://github.com/pjdelport/backports.os',
+
+    author=u'Piët Delport',
+    author_email='pjdelport@gmail.com',
+
+    package_dir={'': 'src'},
+    packages=find_packages('src'),
+
+    setup_requires=['setuptools_scm'],
+    use_scm_version=True,
+
+    install_requires=_python2_requires,
+
+    license='Python Software Foundation License',
+    classifiers=[
+        'Development Status :: 6 - Mature',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: Python Software Foundation License',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+    ],
+)
diff --git a/src/backports/__init__.py b/src/backports/__init__.py
@@ -0,0 +1,4 @@
+# See https://pypi.python.org/pypi/backports
+
+from pkgutil import extend_path
+__path__ = extend_path(__path__, __name__)
diff --git a/src/backports/os.py b/src/backports/os.py
@@ -0,0 +1,181 @@
+"""
+Partial backport of new functionality in Python 3.5's os module:
+
+    fsencode (new in Python 3.2)
+    fsdecode (new in Python 3.2)
+
+Backport modifications are marked with "XXX backport" and "TODO backport".
+"""
+from __future__ import unicode_literals
+
+import sys
+
+# XXX backport: unicode on Python 2
+_str = unicode if sys.version_info < (3,) else str
+
+# XXX backport: Use backported surrogateescape for Python 2
+# TODO backport: Find a way to do this without pulling in the entire future package?
+if sys.version_info < (3,):
+    from future.utils.surrogateescape import register_surrogateescape
+    register_surrogateescape()
+
+
+# XXX backport: This invalid_utf8_indexes() helper is shamelessly copied from
+# Bob Ippolito's pyutf8 package (pyutf8/ref.py), in order to help support the
+# Python 2 UTF-8 decoding hack in fsdecode() below.
+#
+# URL: https://github.com/etrepum/pyutf8/blob/master/pyutf8/ref.py
+#
+def _invalid_utf8_indexes(bytes):
+    skips = []
+    i = 0
+    len_bytes = len(bytes)
+    while i < len_bytes:
+        c1 = bytes[i]
+        if c1 < 0x80:
+            # U+0000 - U+007F - 7 bits
+            i += 1
+            continue
+        try:
+            c2 = bytes[i + 1]
+            if ((c1 & 0xE0 == 0xC0) and (c2 & 0xC0 == 0x80)):
+                # U+0080 - U+07FF - 11 bits
+                c = (((c1 & 0x1F) << 6) |
+                     (c2 & 0x3F))
+                if c < 0x80:
+                    # Overlong encoding
+                    skips.extend([i, i + 1])
+                i += 2
+                continue
+            c3 = bytes[i + 2]
+            if ((c1 & 0xF0 == 0xE0) and
+                (c2 & 0xC0 == 0x80) and
+                (c3 & 0xC0 == 0x80)):
+                # U+0800 - U+FFFF - 16 bits
+                c = (((((c1 & 0x0F) << 6) |
+                       (c2 & 0x3F)) << 6) |
+                     (c3 & 0x3f))
+                if ((c < 0x800) or (0xD800 <= c <= 0xDFFF)):
+                    # Overlong encoding or surrogate.
+                    skips.extend([i, i + 1, i + 2])
+                i += 3
+                continue
+            c4 = bytes[i + 3]
+            if ((c1 & 0xF8 == 0xF0) and
+                (c2 & 0xC0 == 0x80) and
+                (c3 & 0xC0 == 0x80) and
+                (c4 & 0xC0 == 0x80)):
+                # U+10000 - U+10FFFF - 21 bits
+                c = (((((((c1 & 0x0F) << 6) |
+                         (c2 & 0x3F)) << 6) |
+                       (c3 & 0x3F)) << 6) |
+                     (c4 & 0x3F))
+                if (c < 0x10000) or (c > 0x10FFFF):
+                    # Overlong encoding or invalid code point.
+                    skips.extend([i, i + 1, i + 2, i + 3])
+                i += 4
+                continue
+        except IndexError:
+            pass
+        skips.append(i)
+        i += 1
+    return skips
+
+
+# XXX backport: Another helper to support the Python 2 UTF-8 decoding hack.
+def _chunks(b, indexes):
+    i = 0
+    for j in indexes:
+        yield b[i:j]
+        yield b[j:j + 1]
+        i = j + 1
+    yield b[i:]
+
+
+def _fscodec():
+    encoding = sys.getfilesystemencoding()
+    if encoding == 'mbcs':
+        errors = 'strict'
+    else:
+        errors = 'surrogateescape'
+
+    # XXX backport: Do we need to hack around Python 2's UTF-8 codec?
+    import codecs  # Use codecs.lookup() for name normalisation.
+    _HACK_AROUND_PY2_UTF8 = (sys.version_info < (3,) and
+                             codecs.lookup(encoding) == codecs.lookup('utf-8'))
+
+    # XXX backport: chr(octet) became bytes([octet])
+    _byte = chr if sys.version_info < (3,) else lambda i: bytes([i])
+
+    def fsencode(filename):
+        """
+        Encode filename to the filesystem encoding with 'surrogateescape' error
+        handler, return bytes unchanged. On Windows, use 'strict' error handler if
+        the file system encoding is 'mbcs' (which is the default encoding).
+        """
+        if isinstance(filename, bytes):
+            return filename
+        elif isinstance(filename, _str):
+            if _HACK_AROUND_PY2_UTF8:
+                # XXX backport: Unlike Python 3, Python 2's UTF-8 codec does not
+                # consider surrogate codepoints invalid, so the surrogateescape
+                # error handler never gets invoked to encode them back into high
+                # bytes.
+                #
+                # This code hacks around that by manually encoding the surrogate
+                # codepoints to high bytes, without relying on surrogateescape.
+                #
+                return b''.join(
+                    (_byte(ord(c) - 0xDC00) if 0xDC00 <= ord(c) <= 0xDCFF else
+                     c.encode(encoding))
+                    for c in filename)
+            else:
+                return filename.encode(encoding, errors)
+        else:
+            # XXX backport: unicode instead of str for Python 2
+            raise TypeError("expect bytes or {_str}, not {}".format(type(filename).__name__,
+                                                                    _str=_str.__name__, ))
+
+    def fsdecode(filename):
+        """
+        Decode filename from the filesystem encoding with 'surrogateescape' error
+        handler, return str unchanged. On Windows, use 'strict' error handler if
+        the file system encoding is 'mbcs' (which is the default encoding).
+        """
+        if isinstance(filename, _str):
+            return filename
+        elif isinstance(filename, bytes):
+            if _HACK_AROUND_PY2_UTF8:
+                # XXX backport: See the remarks in fsencode() above.
+                #
+                # This case is slightly trickier: Python 2 will invoke the
+                # surrogateescape error handler for most bad high byte
+                # sequences, *except* for full UTF-8 sequences that happen to
+                # decode to surrogate codepoints.
+                #
+                # For decoding, it's not trivial to sidestep the UTF-8 codec
+                # only for surrogates like fsencode() does, but as a hack we can
+                # split the input into separate chunks around each invalid byte,
+                # decode the chunks separately, and join the results.
+                #
+                # This prevents Python 2's UTF-8 codec from seeing the encoded
+                # surrogate sequences as valid, which lets surrogateescape take
+                # over and escape the individual bytes.
+                #
+                # TODO: Improve this.
+                #
+                from array import array
+                indexes = _invalid_utf8_indexes(array(str('B'), filename))
+                return ''.join(chunk.decode(encoding, errors)
+                               for chunk in _chunks(filename, indexes))
+            else:
+                return filename.decode(encoding, errors)
+        else:
+            # XXX backport: unicode instead of str for Python 2
+            raise TypeError("expect bytes or {_str}, not {}".format(type(filename).__name__,
+                                                                    _str=_str.__name__, ))
+
+    return fsencode, fsdecode
+
+fsencode, fsdecode = _fscodec()
+del _fscodec