Skip to content

Commit

Permalink
Optimize _encode_invalid_chars (urllib3#1787)
Browse files Browse the repository at this point in the history
Co-authored-by: Seth Michael Larson <sethmichaellarson@gmail.com>
  • Loading branch information
pquentin and sethmlarson committed Jan 21, 2020
1 parent 07b0b95 commit 2747197
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Expand Up @@ -6,6 +6,8 @@ dev

* Drop support for EOL Python 3.4 (Pull #1774)

* Optimize _encode_invalid_chars (Pull #1787)


1.25.7 (2019-11-11)
-------------------
Expand Down
15 changes: 6 additions & 9 deletions src/urllib3/util/url.py
Expand Up @@ -216,18 +216,15 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"):

component = six.ensure_text(component)

# Normalize existing percent-encoded bytes.
# Try to see if the component we're encoding is already percent-encoded
# so we can skip all '%' characters but still encode all others.
percent_encodings = PERCENT_RE.findall(component)

# Normalize existing percent-encoded bytes.
for enc in percent_encodings:
if not enc.isupper():
component = component.replace(enc, enc.upper())
component, percent_encodings = PERCENT_RE.subn(
lambda match: match.group(0).upper(), component
)

uri_bytes = component.encode("utf-8", "surrogatepass")
is_percent_encoded = len(percent_encodings) == uri_bytes.count(b"%")

is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
encoded_component = bytearray()

for i in range(0, len(uri_bytes)):
Expand All @@ -237,7 +234,7 @@ def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"):
if (is_percent_encoded and byte == b"%") or (
byte_ord < 128 and byte.decode() in allowed_chars
):
encoded_component.extend(byte)
encoded_component += byte
continue
encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))

Expand Down

0 comments on commit 2747197

Please sign in to comment.