Skip to content

Commit

Permalink
fix(uri): create two new encode methods for escape
Browse files Browse the repository at this point in the history
create two new encode methods for encode and encode value
that also check for escapes

closes falconry#1872
  • Loading branch information
MinesJA committed Mar 15, 2021
1 parent 43f4c75 commit 1948028
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 22 deletions.
8 changes: 2 additions & 6 deletions falcon/response.py
Expand Up @@ -14,7 +14,6 @@

"""Response class."""

from functools import partial
import mimetypes

from falcon import DEFAULT_MEDIA_TYPE
Expand All @@ -31,13 +30,10 @@
)
from falcon.util import dt_to_http, http_cookies, structures, TimezoneGMT
from falcon.util.deprecation import deprecated
from falcon.util.uri import encode
from falcon.util.uri import encode_value
from falcon.util.uri import encode_check_escaped as uri_encode
from falcon.util.uri import encode_value_check_escaped as uri_encode_value


uri_encode = partial(encode, check_is_escaped=True)
uri_encode_value = partial(encode_value, check_is_escaped=True)

GMT_TIMEZONE = TimezoneGMT()

_STREAM_LEN_REMOVED_MSG = (
Expand Down
62 changes: 55 additions & 7 deletions falcon/util/uri.py
Expand Up @@ -68,13 +68,13 @@ def _create_char_encoder(allowed_chars):
return lookup.__getitem__


def _create_str_encoder(is_value):
def _create_str_encoder(is_value, check_is_escaped=False):

allowed_chars = _UNRESERVED if is_value else _ALL_ALLOWED
allowed_chars_plus_percent = allowed_chars + '%'
encode_char = _create_char_encoder(allowed_chars)

def encoder(uri, check_is_escaped=False):
def encoder(uri):
# PERF(kgriffs): Very fast way to check, learned from urlib.quote
if not uri.rstrip(allowed_chars):
return uri
Expand Down Expand Up @@ -137,16 +137,13 @@ def encoder(uri, check_is_escaped=False):
Args:
uri (str): URI or part of a URI to encode.
check_is_escaped (boolean): Indicates a uri may have already been escaped
and should be treated as such
Returns:
str: An escaped version of `uri`, where all disallowed characters
have been percent-encoded.
"""


encode_value = _create_str_encoder(True)
encode_value.name = 'encode_value'
encode_value.__doc__ = """Encodes a value string according to RFC 3986.
Expand All @@ -168,8 +165,59 @@ def encoder(uri, check_is_escaped=False):
uri (str): URI fragment to encode. It is assumed not to cross delimiter
boundaries, and so any reserved URI delimiter characters
included in it will be percent-encoded.
check_is_escaped (boolean): Indicates a uri may have already been escaped
and should be treated as such
Returns:
str: An escaped version of `uri`, where all disallowed characters
have been percent-encoded.
"""

encode_value_check_escaped = _create_str_encoder(True, True)
encode_value.name = 'encode_value'
encode_value.__doc__ = """Encodes a value string according to RFC 3986.
RFC 3986 defines a set of "unreserved" characters as well as a
set of "reserved" characters used as delimiters. Disallowed characters
are percent-encoded in a way that models ``urllib.parse.quote(safe="~")``
unless they appear to have been previously encoded. For example, '%26'
will not be encoded again as it follows the format of an encoded value.
All reserved characters are lumped together into a single set of
"delimiters", and everything in that set is escaped.
Note:
This utility is faster in the average case than the similar
`quote` function found in ``urlib``. It also strives to be easier
to use by assuming a sensible default of allowed characters.
Args:
uri (str): URI fragment to encode. It is assumed not to cross delimiter
boundaries, and so any reserved URI delimiter characters
included in it will be percent-encoded.
Returns:
str: An escaped version of `uri`, where all disallowed characters
have been percent-encoded.
"""

encode_check_escaped = _create_str_encoder(True, True)
encode_check_escaped.name = 'encode_value'
encode_check_escaped.__doc__ = """Encodes a full or relative URI according to RFC 3986.
RFC 3986 defines a set of "unreserved" characters as well as a
set of "reserved" characters used as delimiters. This function escapes
all other "disallowed" characters by percent-encoding them unless they
appear to have been previously encoded. For example, '%26' will not be
encoded again as it follows the format of an encoded value.
Note:
This utility is faster in the average case than the similar
`quote` function found in ``urlib``. It also strives to be easier
to use by assuming a sensible default of allowed characters.
Args:
uri (str): URI or part of a URI to encode.
Returns:
str: An escaped version of `uri`, where all disallowed characters
Expand Down
15 changes: 6 additions & 9 deletions tests/test_utils.py
Expand Up @@ -232,35 +232,32 @@ def test_uri_encode_double(self):
# NOTE(minesja): check_is_escaped added to allow option to
# retain behavior of ignoring already escaped values (#68)

from functools import partial
uri_encode = partial(uri.encode, check_is_escaped=True)

url = 'http://example.com/v1/fiz bit/messages'
expected = 'http://example.com/v1/fiz%20bit/messages'
assert uri_encode(uri_encode(url)) == expected
assert uri.uri_encode_check_escaped(uri.uri_encode_check_escaped(url)) == expected

url = 'http://example.com/v1/fizbit/messages?limit=3&e\u00e7ho=true'
expected = ('http://example.com/v1/fizbit/messages'
'?limit=3&e%C3%A7ho=true')
assert uri_encode(uri_encode(url)) == expected
assert uri.uri_encode_check_escaped(uri.uri_encode_check_escaped(url)) == expected

url = 'http://example.com/v1/fiz%bit/mess%ages/%'
expected = 'http://example.com/v1/fiz%25bit/mess%25ages/%25'
assert uri_encode(uri_encode(url)) == expected
assert uri.uri_encode_check_escaped(uri.uri_encode_check_escaped(url)) == expected

url = 'http://example.com/%%'
expected = 'http://example.com/%25%25'
assert uri_encode(uri_encode(url)) == expected
assert uri.uri_encode_check_escaped(uri.uri_encode_check_escaped(url)) == expected

# NOTE(kgriffs): Specific example cited in GH issue
url = 'http://something?redirect_uri=http%3A%2F%2Fsite'
assert uri.encode(url, check_is_escaped=True) == url
assert uri.encode(url) == url

hex_digits = 'abcdefABCDEF0123456789'
for c1 in hex_digits:
for c2 in hex_digits:
url = 'http://example.com/%' + c1 + c2
encoded = uri_encode(uri_encode(url))
encoded = uri.uri_encode_check_escaped(uri.uri_encode_check_escaped(url))
assert encoded == url

def test_uri_encode_value(self):
Expand Down

0 comments on commit 1948028

Please sign in to comment.