Skip to content
Browse files

- Backport fixes from master regarding URL decoding. URL segments are

  no-longer "double-decoded" during traversal and when encountered in a route
  subpath (or other star-arg pattern).  As a result, a new API named
  ``pyramid.traversal.traversal_path_info`` was added to the system.  This
  function accepts an already-URL-decoded string and returns a tuple of
  Unicode objects.  This API is used internally by Pyramid in all places that
  ``pyramid.traversal.traversal_path`` used to be used.  The
  ``traversal_path`` function remains for backwards compatibility, however,
  and can still be used when a path is encoded.  See
  #349 for more information.

Closes #349.
  • Loading branch information...
1 parent 4963504 commit 7fe36a33f03844269e843ffbe2974e7cf9cbd58a @mcdonc mcdonc committed Nov 18, 2011
View
11 CHANGES.txt
@@ -7,6 +7,17 @@ Bug Fixes
- The ``pryamid.view.view_config`` decorator did not accept a ``match_params``
predicate argument. See https://github.com/Pylons/pyramid/pull/308
+- Backport fixes from master regarding URL decoding. URL segments are
+ no-longer "double-decoded" during traversal and when encountered in a route
+ subpath (or other star-arg pattern). As a result, a new API named
+ ``pyramid.traversal.traversal_path_info`` was added to the system. This
+ function accepts an already-URL-decoded string and returns a tuple of
+ Unicode objects. This API is used internally by Pyramid in all places that
+ ``pyramid.traversal.traversal_path`` used to be used. The
+ ``traversal_path`` function remains for backwards compatibility, however,
+ and can still be used when a path is encoded. See
+ https://github.com/Pylons/pyramid/issues/349 for more information.
+
1.2.1 (2011-09-28)
==================
View
2 docs/api/traversal.rst
@@ -21,5 +21,7 @@
.. autofunction:: traverse
+ .. autofunction:: traversal_path_info(path)
+
.. autofunction:: traversal_path(path)
View
4 pyramid/config/testing.py
@@ -6,7 +6,7 @@
from pyramid.interfaces import IRendererFactory
from pyramid.renderers import RendererHelper
-from pyramid.traversal import traversal_path
+from pyramid.traversal import traversal_path_info
from pyramid.config.util import action_method
@@ -66,7 +66,7 @@ def __init__(self, context):
def __call__(self, request):
path = request.environ['PATH_INFO']
ob = resources[path]
- traversed = traversal_path(path)
+ traversed = traversal_path_info(path)
return {'context':ob, 'view_name':'','subpath':(),
'traversed':traversed, 'virtual_root':ob,
'virtual_root_path':(), 'root':ob}
View
4 pyramid/config/util.py
@@ -3,7 +3,7 @@
from pyramid.exceptions import ConfigurationError
from pyramid.traversal import find_interface
-from pyramid.traversal import traversal_path
+from pyramid.traversal import traversal_path_info
from hashlib import md5
@@ -237,7 +237,7 @@ def traverse_predicate(context, request):
return True
m = context['match']
tvalue = tgenerate(m)
- m['traverse'] = traversal_path(tvalue)
+ m['traverse'] = traversal_path_info(tvalue)
return True
# This isn't actually a predicate, it's just a infodict
# modifier that injects ``traverse`` into the matchdict. As a
View
4 pyramid/static.py
@@ -19,7 +19,7 @@
from pyramid.httpexceptions import HTTPMovedPermanently
from pyramid.path import caller_package
from pyramid.response import Response
-from pyramid.traversal import traversal_path
+from pyramid.traversal import traversal_path_info
def init_mimetypes(mimetypes):
# this is a function so it can be unittested
@@ -139,7 +139,7 @@ def __call__(self, context, request):
if self.use_subpath:
path_tuple = request.subpath
else:
- path_tuple = traversal_path(request.path_info)
+ path_tuple = traversal_path_info(request.environ['PATH_INFO'])
path = _secure_path(path_tuple)
View
48 pyramid/tests/test_traversal.py
@@ -2,10 +2,10 @@
from pyramid.testing import cleanUp
-class TraversalPathTests(unittest.TestCase):
+class TraversalPathInfoTests(unittest.TestCase):
def _callFUT(self, path):
- from pyramid.traversal import traversal_path
- return traversal_path(path)
+ from pyramid.traversal import traversal_path_info
+ return traversal_path_info(path)
def test_path_startswith_endswith(self):
self.assertEqual(self._callFUT('/foo/'), (u'foo',))
@@ -24,7 +24,7 @@ def test_twodots_at_start(self):
def test_element_urllquoted(self):
self.assertEqual(self._callFUT('/foo/space%20thing/bar'),
- (u'foo', u'space thing', u'bar'))
+ (u'foo', u'space%20thing', u'bar'))
def test_segments_are_unicode(self):
result = self._callFUT('/foo/bar')
@@ -38,6 +38,44 @@ def test_same_value_returned_if_cached(self):
self.assertEqual(result2, (u'foo', u'bar'))
def test_utf8(self):
+ la = 'La Pe\xc3\xb1a'
+ decoded = unicode(la, 'utf-8')
+ path = '/'.join([la, la])
+ self.assertEqual(self._callFUT(path), (decoded, decoded))
+
+ def test_utf16(self):
+ from pyramid.exceptions import URLDecodeError
+ la = unicode('La Pe\xc3\xb1a', 'utf-8').encode('utf-16')
+ path = '/'.join([la, la])
+ self.assertRaises(URLDecodeError, self._callFUT, path)
+
+ def test_unicode_highorder_chars(self):
+ path = '/\xe6\xb5\x81\xe8\xa1\x8c\xe8\xb6\x8b\xe5\x8a\xbf'
+ self.assertEqual(self._callFUT(path), (u'\u6d41\u884c\u8d8b\u52bf',))
+
+ def test_unicode_simple(self):
+ path = u'/abc'
+ self.assertEqual(self._callFUT(path), (u'abc',))
+
+ def test_unicode_undecodeable_to_ascii(self):
+ path = unicode('/La Pe\xc3\xb1a', 'utf-8')
+ self.assertRaises(UnicodeEncodeError, self._callFUT, path)
+
+class TraversalPathTests(unittest.TestCase):
+ def _callFUT(self, path):
+ from pyramid.traversal import traversal_path
+ return traversal_path(path)
+
+ def test_element_urllquoted(self):
+ self.assertEqual(self._callFUT('/foo/space%20thing/bar'),
+ (u'foo', u'space thing', u'bar'))
+
+ def test_segments_are_unicode(self):
+ result = self._callFUT('/foo/bar')
+ self.assertEqual(type(result[0]), unicode)
+ self.assertEqual(type(result[1]), unicode)
+
+ def test_utf8(self):
import urllib
la = 'La Pe\xc3\xb1a'
encoded = urllib.quote(la)
@@ -46,8 +84,8 @@ def test_utf8(self):
self.assertEqual(self._callFUT(path), (decoded, decoded))
def test_utf16(self):
- from pyramid.exceptions import URLDecodeError
import urllib
+ from pyramid.exceptions import URLDecodeError
la = unicode('La Pe\xc3\xb1a', 'utf-8').encode('utf-16')
encoded = urllib.quote(la)
path = '/'.join([encoded, encoded])
View
18 pyramid/tests/test_urldispatch.py
@@ -293,7 +293,7 @@ def test_no_beginning_slash(self):
def test_url_decode_error(self):
from pyramid.exceptions import URLDecodeError
matcher, generator = self._callFUT('/:foo')
- self.assertRaises(URLDecodeError, matcher, '/%FF%FE%8B%00')
+ self.assertRaises(URLDecodeError, matcher, '/\xff\xfe\x8b\x00')
def test_custom_regex(self):
matcher, generator = self._callFUT('foo/{baz}/biz/{buz:[^/\.]+}.{bar}')
@@ -363,10 +363,10 @@ def test_matcher_functional_newstyle(self):
self.matches('zzz/{x}*traverse', '/zzz/abc/def/g',
{'x':'abc', 'traverse':('def', 'g')})
self.matches('*traverse', '/zzz/abc', {'traverse':('zzz', 'abc')})
- self.matches('*traverse', '/zzz/%20abc', {'traverse':('zzz', ' abc')})
- self.matches('{x}', '/La%20Pe%C3%B1a', {'x':u'La Pe\xf1a'})
- self.matches('*traverse', '/La%20Pe%C3%B1a/x',
- {'traverse':(u'La Pe\xf1a', 'x')})
+ self.matches('*traverse', '/zzz/%20abc', {'traverse':('zzz', '%20abc')})
+ self.matches('{x}', '/La Pe\xc3\xb1a', {'x': u'La Pe\xf1a'})
+ self.matches('*traverse', '/La Pe\xc3\xb1a/x',
+ {'traverse': (u'La Pe\xf1a', u'x')})
self.matches('/foo/{id}.html', '/foo/bar.html', {'id':'bar'})
self.matches('/{num:[0-9]+}/*traverse', '/555/abc/def',
{'num':'555', 'traverse':('abc', 'def')})
@@ -386,10 +386,10 @@ def test_matcher_functional_oldstyle(self):
self.matches('zzz/:x*traverse', '/zzz/abc/def/g',
{'x':'abc', 'traverse':('def', 'g')})
self.matches('*traverse', '/zzz/abc', {'traverse':('zzz', 'abc')})
- self.matches('*traverse', '/zzz/%20abc', {'traverse':('zzz', ' abc')})
- self.matches(':x', '/La%20Pe%C3%B1a', {'x':u'La Pe\xf1a'})
- self.matches('*traverse', '/La%20Pe%C3%B1a/x',
- {'traverse':(u'La Pe\xf1a', 'x')})
+ self.matches('*traverse', '/zzz/%20abc', {'traverse':('zzz', '%20abc')})
+ self.matches(':x', '/La Pe\xc3\xb1a', {'x': u'La Pe\xf1a'})
+ self.matches('*traverse', '/La Pe\xc3\xb1a/x',
+ {'traverse': (u'La Pe\xf1a', u'x')})
self.matches('/foo/:id.html', '/foo/bar.html', {'id':'bar'})
self.matches('/foo/:id_html', '/foo/bar_html', {'id_html':'bar_html'})
self.matches('zzz/:_', '/zzz/abc', {'_':'abc'})
View
66 pyramid/traversal.py
@@ -407,26 +407,39 @@ def virtual_root(resource, request):
urlgenerator = TraversalContextURL(resource, request)
return urlgenerator.virtual_root()
-@lru_cache(1000)
def traversal_path(path):
- """ Given a ``PATH_INFO`` string (slash-separated path segments),
- return a tuple representing that path which can be used to
- traverse a resource tree.
-
- The ``PATH_INFO`` is split on slashes, creating a list of
- segments. Each segment is URL-unquoted, and subsequently decoded
- into Unicode. Each segment is assumed to be encoded using the
- UTF-8 encoding (or a subset, such as ASCII); a
- :exc:`pyramid.exceptions.URLDecodeError` is raised if a segment
- cannot be decoded. If a segment name is empty or if it is ``.``,
- it is ignored. If a segment name is ``..``, the previous segment
- is deleted, and the ``..`` is ignored.
+ """ Variant of :func:`pyramid.traversal.traversal_path_info` suitable for
+ decoding paths that are URL-encoded (traversal_path_info does not decode
+ URL-encoded paths).
+
+ If this function is passed a Unicode object instead of a string, that
+ Unicode object *must* directly encodeable to ASCII. For example, u'/foo'
+ will work but u'/<unprintable unicode>' (a Unicode object with characters
+ that cannot be encoded to ascii) will not. A :exc:`UnicodeError` will be
+ raised if the Unicode cannot be encoded directly to ASCII.
+ """
+ if isinstance(path, unicode):
+ path = path.encode('ascii')
+ path = urllib.unquote(path)
+ return traversal_path_info(path)
- If this function is passed a Unicode object instead of a string,
- that Unicode object *must* directly encodeable to ASCII. For
- example, u'/foo' will work but u'/<unprintable unicode>' (a
- Unicode object with characters that cannot be encoded to ascii)
- will not.
+@lru_cache(1000)
+def traversal_path_info(path):
+ """ Given a ``PATH_INFO`` environ value (a string), return a tuple
+ representing that path which can be used to traverse a resource tree.
+
+ ``PATH_INFO`` is assumed to already be URL-decoded (as per the WSGI
+ specification). It is decoded to text using ``path.decode('utf-8')``. A
+ :exc:`pyramid.exc.URLDecodeError` is raised if a the URL cannot be
+ decoded.
+
+ This API cannot be passed a Unicode ``path`` value. A
+ :exc:`UnicodeEncodeError` will result if it is passed such a value.
+
+ The ``PATH_INFO`` is split on slashes, creating a list of segments. If a
+ segment name is empty or if it is ``.``, it is ignored. If a segment
+ name is ``..``, the previous segment is deleted, and the ``..`` is
+ ignored.
Examples:
@@ -474,16 +487,13 @@ def traversal_path(path):
writing their own traversal machinery, as opposed to users writing
applications in :app:`Pyramid`.
"""
- if isinstance(path, unicode):
- path = path.encode('ascii')
+ try:
+ path = path.decode('utf-8')
+ except UnicodeDecodeError as e:
+ raise URLDecodeError(e.encoding, e.object, e.start, e.end, e.reason)
path = path.strip('/')
clean = []
for segment in path.split('/'):
- segment = urllib.unquote(segment)
- try:
- segment = segment.decode('utf-8')
- except UnicodeDecodeError, e:
- raise URLDecodeError(e.encoding, e.object, e.start, e.end, e.reason)
if not segment or segment == '.':
continue
elif segment == '..':
@@ -574,7 +584,7 @@ def __call__(self, request):
subpath = matchdict.get('subpath', ())
if not hasattr(subpath, '__iter__'):
# this is not a *subpath stararg (just a {subpath})
- subpath = traversal_path(subpath)
+ subpath = traversal_path_info(subpath)
else:
# this request did not match a route
@@ -586,7 +596,7 @@ def __call__(self, request):
if VH_ROOT_KEY in environ:
vroot_path = environ[VH_ROOT_KEY]
- vroot_tuple = traversal_path(vroot_path)
+ vroot_tuple = traversal_path_info(vroot_path)
vpath = vroot_path + path
vroot_idx = len(vroot_tuple) -1
else:
@@ -607,7 +617,7 @@ def __call__(self, request):
# and this hurts readability; apologies
i = 0
view_selector = self.VIEW_SELECTOR
- vpath_tuple = traversal_path(vpath)
+ vpath_tuple = traversal_path_info(vpath)
for segment in vpath_tuple:
if segment[:2] == view_selector:
return {'context':ob,
View
7 pyramid/urldispatch.py
@@ -7,7 +7,7 @@
from pyramid.encode import url_quote
from pyramid.exceptions import URLDecodeError
-from pyramid.traversal import traversal_path
+from pyramid.traversal import traversal_path_info
from pyramid.traversal import quote_path_segment
_marker = object()
@@ -135,11 +135,10 @@ def matcher(path):
d = {}
for k, v in m.groupdict().iteritems():
if k == star:
- d[k] = traversal_path(v)
+ d[k] = traversal_path_info(v)
else:
- encoded = unquote(v)
try:
- d[k] = encoded.decode('utf-8')
+ d[k] = v.decode('utf-8')
except UnicodeDecodeError, e:
raise URLDecodeError(
e.encoding, e.object, e.start, e.end, e.reason

0 comments on commit 7fe36a3

Please sign in to comment.
Something went wrong with that request. Please try again.