Skip to content

Commit 27d1c34

Browse files
CPython developersyouknowone
CPython developers
authored andcommitted
Update urllib from CPython 3.10.5
1 parent 7b643da commit 27d1c34

File tree

4 files changed

+83
-71
lines changed

4 files changed

+83
-71
lines changed

Lib/test/test_urllib.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
import unittest
1010
from unittest.mock import patch
1111
from test import support
12-
from test.support import os_helper, warnings_helper
12+
from test.support import os_helper
13+
from test.support import warnings_helper
1314
import os
1415
try:
1516
import ssl

Lib/urllib/parse.py

+46-37
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
import re
3131
import sys
32+
import types
3233
import collections
3334
import warnings
3435

@@ -179,6 +180,8 @@ def port(self):
179180
raise ValueError("Port out of range 0-65535")
180181
return port
181182

183+
__class_getitem__ = classmethod(types.GenericAlias)
184+
182185

183186
class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
184187
__slots__ = ()
@@ -369,9 +372,23 @@ def _fix_result_transcoding():
369372
def urlparse(url, scheme='', allow_fragments=True):
370373
"""Parse a URL into 6 components:
371374
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
372-
Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
373-
Note that we don't break the components up in smaller bits
374-
(e.g. netloc is a single string) and we don't expand % escapes."""
375+
376+
The result is a named 6-tuple with fields corresponding to the
377+
above. It is either a ParseResult or ParseResultBytes object,
378+
depending on the type of the url parameter.
379+
380+
The username, password, hostname, and port sub-components of netloc
381+
can also be accessed as attributes of the returned object.
382+
383+
The scheme argument provides the default value of the scheme
384+
component when no scheme is found in url.
385+
386+
If allow_fragments is False, no attempt is made to separate the
387+
fragment component from the previous component, which can be either
388+
path or query.
389+
390+
Note that % escapes are not expanded.
391+
"""
375392
url, scheme, _coerce_result = _coerce_args(url, scheme)
376393
splitresult = urlsplit(url, scheme, allow_fragments)
377394
scheme, netloc, url, query, fragment = splitresult
@@ -417,20 +434,33 @@ def _checknetloc(netloc):
417434
raise ValueError("netloc '" + netloc + "' contains invalid " +
418435
"characters under NFKC normalization")
419436

420-
def _remove_unsafe_bytes_from_url(url):
421-
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
422-
url = url.replace(b, "")
423-
return url
424-
425437
def urlsplit(url, scheme='', allow_fragments=True):
426438
"""Parse a URL into 5 components:
427439
<scheme>://<netloc>/<path>?<query>#<fragment>
428-
Return a 5-tuple: (scheme, netloc, path, query, fragment).
429-
Note that we don't break the components up in smaller bits
430-
(e.g. netloc is a single string) and we don't expand % escapes."""
440+
441+
The result is a named 5-tuple with fields corresponding to the
442+
above. It is either a SplitResult or SplitResultBytes object,
443+
depending on the type of the url parameter.
444+
445+
The username, password, hostname, and port sub-components of netloc
446+
can also be accessed as attributes of the returned object.
447+
448+
The scheme argument provides the default value of the scheme
449+
component when no scheme is found in url.
450+
451+
If allow_fragments is False, no attempt is made to separate the
452+
fragment component from the previous component, which can be either
453+
path or query.
454+
455+
Note that % escapes are not expanded.
456+
"""
457+
431458
url, scheme, _coerce_result = _coerce_args(url, scheme)
432-
url = _remove_unsafe_bytes_from_url(url)
433-
scheme = _remove_unsafe_bytes_from_url(scheme)
459+
460+
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
461+
url = url.replace(b, "")
462+
scheme = scheme.replace(b, "")
463+
434464
allow_fragments = bool(allow_fragments)
435465
key = url, scheme, allow_fragments, type(url), type(scheme)
436466
cached = _parse_cache.get(key, None)
@@ -441,31 +471,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
441471
netloc = query = fragment = ''
442472
i = url.find(':')
443473
if i > 0:
444-
if url[:i] == 'http': # optimize the common case
445-
url = url[i+1:]
446-
if url[:2] == '//':
447-
netloc, url = _splitnetloc(url, 2)
448-
if (('[' in netloc and ']' not in netloc) or
449-
(']' in netloc and '[' not in netloc)):
450-
raise ValueError("Invalid IPv6 URL")
451-
if allow_fragments and '#' in url:
452-
url, fragment = url.split('#', 1)
453-
if '?' in url:
454-
url, query = url.split('?', 1)
455-
_checknetloc(netloc)
456-
v = SplitResult('http', netloc, url, query, fragment)
457-
_parse_cache[key] = v
458-
return _coerce_result(v)
459474
for c in url[:i]:
460475
if c not in scheme_chars:
461476
break
462477
else:
463-
# make sure "url" is not actually a port number (in which case
464-
# "scheme" is really part of the path)
465-
rest = url[i+1:]
466-
if not rest or any(c not in '0123456789' for c in rest):
467-
# not a port number
468-
scheme, url = url[:i].lower(), rest
478+
scheme, url = url[:i].lower(), url[i+1:]
469479

470480
if url[:2] == '//':
471481
netloc, url = _splitnetloc(url, 2)
@@ -642,7 +652,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
642652
unquote('abc%20def') -> 'abc def'.
643653
"""
644654
if isinstance(string, bytes):
645-
raise TypeError('Expected str, got bytes')
655+
return unquote_to_bytes(string).decode(encoding, errors)
646656
if '%' not in string:
647657
string.split
648658
return string
@@ -744,9 +754,8 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
744754
if max_num_fields < num_fields:
745755
raise ValueError('Max number of fields exceeded')
746756

747-
pairs = [s1 for s1 in qs.split(separator)]
748757
r = []
749-
for name_value in pairs:
758+
for name_value in qs.split(separator):
750759
if not name_value and not strict_parsing:
751760
continue
752761
nv = name_value.split('=', 1)

Lib/urllib/request.py

+31-33
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
# install it
6565
urllib.request.install_opener(opener)
6666
67-
f = urllib.request.urlopen('http://www.python.org/')
67+
f = urllib.request.urlopen('https://www.python.org/')
6868
"""
6969

7070
# XXX issues:
@@ -163,18 +163,10 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
163163
164164
The *cadefault* parameter is ignored.
165165
166-
This function always returns an object which can work as a context
167-
manager and has methods such as
168166
169-
* geturl() - return the URL of the resource retrieved, commonly used to
170-
determine if a redirect was followed
171-
172-
* info() - return the meta-information of the page, such as headers, in the
173-
form of an email.message_from_string() instance (see Quick Reference to
174-
HTTP Headers)
175-
176-
* getcode() - return the HTTP status code of the response. Raises URLError
177-
on errors.
167+
This function always returns an object which can work as a
168+
context manager and has the properties url, headers, and status.
169+
See urllib.response.addinfourl for more detail on these properties.
178170
179171
For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
180172
object slightly modified. In addition to the three new methods above, the
@@ -210,6 +202,8 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
210202
context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
211203
cafile=cafile,
212204
capath=capath)
205+
# send ALPN extension to indicate HTTP/1.1 protocol
206+
context.set_alpn_protocols(['http/1.1'])
213207
https_handler = HTTPSHandler(context=context)
214208
opener = build_opener(https_handler)
215209
elif context:
@@ -895,10 +889,10 @@ def is_suburi(self, base, test):
895889
return True
896890
if base[0] != test[0]:
897891
return False
898-
common = posixpath.commonprefix((base[1], test[1]))
899-
if len(common) == len(base[1]):
900-
return True
901-
return False
892+
prefix = base[1]
893+
if prefix[-1:] != '/':
894+
prefix += '/'
895+
return test[1].startswith(prefix)
902896

903897

904898
class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
@@ -1823,7 +1817,7 @@ def retrieve(self, url, filename=None, reporthook=None, data=None):
18231817
hdrs = fp.info()
18241818
fp.close()
18251819
return url2pathname(_splithost(url1)[1]), hdrs
1826-
except OSError as msg:
1820+
except OSError:
18271821
pass
18281822
fp = self.open(url, data)
18291823
try:
@@ -2680,22 +2674,26 @@ def getproxies_registry():
26802674
# Returned as Unicode but problems if not converted to ASCII
26812675
proxyServer = str(winreg.QueryValueEx(internetSettings,
26822676
'ProxyServer')[0])
2683-
if '=' in proxyServer:
2684-
# Per-protocol settings
2685-
for p in proxyServer.split(';'):
2686-
protocol, address = p.split('=', 1)
2687-
# See if address has a type:// prefix
2688-
if not re.match('(?:[^/:]+)://', address):
2689-
address = '%s://%s' % (protocol, address)
2690-
proxies[protocol] = address
2691-
else:
2692-
# Use one setting for all protocols
2693-
if proxyServer[:5] == 'http:':
2694-
proxies['http'] = proxyServer
2695-
else:
2696-
proxies['http'] = 'http://%s' % proxyServer
2697-
proxies['https'] = 'https://%s' % proxyServer
2698-
proxies['ftp'] = 'ftp://%s' % proxyServer
2677+
if '=' not in proxyServer and ';' not in proxyServer:
2678+
# Use one setting for all protocols.
2679+
proxyServer = 'http={0};https={0};ftp={0}'.format(proxyServer)
2680+
for p in proxyServer.split(';'):
2681+
protocol, address = p.split('=', 1)
2682+
# See if address has a type:// prefix
2683+
if not re.match('(?:[^/:]+)://', address):
2684+
# Add type:// prefix to address without specifying type
2685+
if protocol in ('http', 'https', 'ftp'):
2686+
# The default proxy type of Windows is HTTP
2687+
address = 'http://' + address
2688+
elif protocol == 'socks':
2689+
address = 'socks://' + address
2690+
proxies[protocol] = address
2691+
# Use SOCKS proxy for HTTP(S) protocols
2692+
if proxies.get('socks'):
2693+
# The default SOCKS proxy type of Windows is SOCKS4
2694+
address = re.sub(r'^socks://', 'socks4://', proxies['socks'])
2695+
proxies['http'] = proxies.get('http') or address
2696+
proxies['https'] = proxies.get('https') or address
26992697
internetSettings.Close()
27002698
except (OSError, ValueError, TypeError):
27012699
# Either registry key not found etc, or the value in an

Lib/urllib/response.py

+4
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ def __init__(self, fp, headers, url, code=None):
7373
self.url = url
7474
self.code = code
7575

76+
@property
77+
def status(self):
78+
return self.code
79+
7680
def getcode(self):
7781
return self.code
7882

0 commit comments

Comments
 (0)