29
29
30
30
import re
31
31
import sys
32
+ import types
32
33
import collections
33
34
import warnings
34
35
@@ -179,6 +180,8 @@ def port(self):
179
180
raise ValueError ("Port out of range 0-65535" )
180
181
return port
181
182
183
+ __class_getitem__ = classmethod (types .GenericAlias )
184
+
182
185
183
186
class _NetlocResultMixinStr (_NetlocResultMixinBase , _ResultMixinStr ):
184
187
__slots__ = ()
@@ -369,9 +372,23 @@ def _fix_result_transcoding():
369
372
def urlparse (url , scheme = '' , allow_fragments = True ):
370
373
"""Parse a URL into 6 components:
371
374
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
372
- Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
373
- Note that we don't break the components up in smaller bits
374
- (e.g. netloc is a single string) and we don't expand % escapes."""
375
+
376
+ The result is a named 6-tuple with fields corresponding to the
377
+ above. It is either a ParseResult or ParseResultBytes object,
378
+ depending on the type of the url parameter.
379
+
380
+ The username, password, hostname, and port sub-components of netloc
381
+ can also be accessed as attributes of the returned object.
382
+
383
+ The scheme argument provides the default value of the scheme
384
+ component when no scheme is found in url.
385
+
386
+ If allow_fragments is False, no attempt is made to separate the
387
+ fragment component from the previous component, which can be either
388
+ path or query.
389
+
390
+ Note that % escapes are not expanded.
391
+ """
375
392
url , scheme , _coerce_result = _coerce_args (url , scheme )
376
393
splitresult = urlsplit (url , scheme , allow_fragments )
377
394
scheme , netloc , url , query , fragment = splitresult
@@ -417,20 +434,33 @@ def _checknetloc(netloc):
417
434
raise ValueError ("netloc '" + netloc + "' contains invalid " +
418
435
"characters under NFKC normalization" )
419
436
420
- def _remove_unsafe_bytes_from_url (url ):
421
- for b in _UNSAFE_URL_BYTES_TO_REMOVE :
422
- url = url .replace (b , "" )
423
- return url
424
-
425
437
def urlsplit (url , scheme = '' , allow_fragments = True ):
426
438
"""Parse a URL into 5 components:
427
439
<scheme>://<netloc>/<path>?<query>#<fragment>
428
- Return a 5-tuple: (scheme, netloc, path, query, fragment).
429
- Note that we don't break the components up in smaller bits
430
- (e.g. netloc is a single string) and we don't expand % escapes."""
440
+
441
+ The result is a named 5-tuple with fields corresponding to the
442
+ above. It is either a SplitResult or SplitResultBytes object,
443
+ depending on the type of the url parameter.
444
+
445
+ The username, password, hostname, and port sub-components of netloc
446
+ can also be accessed as attributes of the returned object.
447
+
448
+ The scheme argument provides the default value of the scheme
449
+ component when no scheme is found in url.
450
+
451
+ If allow_fragments is False, no attempt is made to separate the
452
+ fragment component from the previous component, which can be either
453
+ path or query.
454
+
455
+ Note that % escapes are not expanded.
456
+ """
457
+
431
458
url , scheme , _coerce_result = _coerce_args (url , scheme )
432
- url = _remove_unsafe_bytes_from_url (url )
433
- scheme = _remove_unsafe_bytes_from_url (scheme )
459
+
460
+ for b in _UNSAFE_URL_BYTES_TO_REMOVE :
461
+ url = url .replace (b , "" )
462
+ scheme = scheme .replace (b , "" )
463
+
434
464
allow_fragments = bool (allow_fragments )
435
465
key = url , scheme , allow_fragments , type (url ), type (scheme )
436
466
cached = _parse_cache .get (key , None )
@@ -441,31 +471,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
441
471
netloc = query = fragment = ''
442
472
i = url .find (':' )
443
473
if i > 0 :
444
- if url [:i ] == 'http' : # optimize the common case
445
- url = url [i + 1 :]
446
- if url [:2 ] == '//' :
447
- netloc , url = _splitnetloc (url , 2 )
448
- if (('[' in netloc and ']' not in netloc ) or
449
- (']' in netloc and '[' not in netloc )):
450
- raise ValueError ("Invalid IPv6 URL" )
451
- if allow_fragments and '#' in url :
452
- url , fragment = url .split ('#' , 1 )
453
- if '?' in url :
454
- url , query = url .split ('?' , 1 )
455
- _checknetloc (netloc )
456
- v = SplitResult ('http' , netloc , url , query , fragment )
457
- _parse_cache [key ] = v
458
- return _coerce_result (v )
459
474
for c in url [:i ]:
460
475
if c not in scheme_chars :
461
476
break
462
477
else :
463
- # make sure "url" is not actually a port number (in which case
464
- # "scheme" is really part of the path)
465
- rest = url [i + 1 :]
466
- if not rest or any (c not in '0123456789' for c in rest ):
467
- # not a port number
468
- scheme , url = url [:i ].lower (), rest
478
+ scheme , url = url [:i ].lower (), url [i + 1 :]
469
479
470
480
if url [:2 ] == '//' :
471
481
netloc , url = _splitnetloc (url , 2 )
@@ -642,7 +652,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
642
652
unquote('abc%20def') -> 'abc def'.
643
653
"""
644
654
if isinstance (string , bytes ):
645
- raise TypeError ( 'Expected str, got bytes' )
655
+ return unquote_to_bytes ( string ). decode ( encoding , errors )
646
656
if '%' not in string :
647
657
string .split
648
658
return string
@@ -744,9 +754,8 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
744
754
if max_num_fields < num_fields :
745
755
raise ValueError ('Max number of fields exceeded' )
746
756
747
- pairs = [s1 for s1 in qs .split (separator )]
748
757
r = []
749
- for name_value in pairs :
758
+ for name_value in qs . split ( separator ) :
750
759
if not name_value and not strict_parsing :
751
760
continue
752
761
nv = name_value .split ('=' , 1 )
0 commit comments