In [1]:
from urllib.parse import urlparse, urlunparse

# Parsing URL Example

In [2]:
url = 'http://user:pwd@NetLoc:80/path;param?query=arg#frag'
parsed = urlparse(url)
print(parsed)
print('scheme  :', parsed.scheme)
print('netloc  :', parsed.netloc)
print('path    :', parsed.path)
print('params  :', parsed.params)
print('query   :', parsed.query)
print('fragment:', parsed.fragment)
print('username:', parsed.username)
print('password:', parsed.password)
print('hostname:', parsed.hostname)
print('port    :', parsed.port)

ParseResult(scheme='http', netloc='user:pwd@NetLoc:80', path='/path', params='param', query='query=arg', fragment='frag')
scheme  : http
netloc  : user:pwd@NetLoc:80
path    : /path
params  : param
query   : query=arg
fragment: frag
username: user
password: pwd
hostname: netloc
port    : 80


# Unparsing URL Example

In [3]:
original = 'http://netloc/path;param?query=arg#frag'
print('ORIG  :', original)
parsed = urlparse(original)
print('PARSED:', parsed.geturl())

ORIG  : http://netloc/path;param?query=arg#frag
PARSED: http://netloc/path;param?query=arg#frag


# Parsing URL as Tuple

In [4]:
t = parsed[:]
# urlparse result is tuple type
# tuple is a list that contents can not be changed
# tuple declare : x = (element_a, element_b, etc)
# list declare  : x = [element_a, element_b, etc]
# both list & tuple use [:] to get element content 
print(type(t))
print(t)

<class 'tuple'>
('http', 'netloc', '/path', 'param', 'query=arg', 'frag')


# Unparsing URL from Tuple

In [5]:
print(urlunparse(t))

http://netloc/path;param?query=arg#frag


# Join URL

In [6]:
from urllib.parse import urljoin

In [7]:
print(urljoin('http://www.example.com/path/file.html',
              'anotherfile.html'))
print(urljoin('http://www.example.com/path/file.html',
              '../anotherfile.html'))

http://www.example.com/path/anotherfile.html
http://www.example.com/anotherfile.html


In [8]:
# ../ is taken into account when the second URL is considered
print(urljoin('https://support.apple.com/zh-tw/', 'ja-jp/ipad'))
print(urljoin('https://support.apple.com/zh-tw/', '../ja-jp/ipad'))
print(urljoin('https://support.apple.com/zh-tw', 'ja-jp/ipad'))
print(urljoin('https://support.apple.com/zh-tw', '../ja-jp/ipad'))

https://support.apple.com/zh-tw/ja-jp/ipad
https://support.apple.com/ja-jp/ipad
https://support.apple.com/ja-jp/ipad
https://support.apple.com/ja-jp/ipad


# Encoding Query Arguments

In [9]:
from urllib.parse import urlencode

In [10]:
query_args = {
    'q': 'query string',
    'foo': 'bar',
}
encoded_args = urlencode(query_args)
print('Encoded:', encoded_args)

Encoded: q=query+string&foo=bar


In [11]:
query_args_1 = {
    'foo': ['foo1', 'foo2'],
}
print('Single  :', urlencode(query_args_1))
print('Sequence:', urlencode(query_args_1, doseq=True))

Single  : foo=%5B%27foo1%27%2C+%27foo2%27%5D
Sequence: foo=foo1&foo=foo2


# decode the query string

In [12]:
from urllib.parse import parse_qs, parse_qsl

In [13]:
encoded = 'foo=foo1&foo=foo2'

print('parse_qs :', parse_qs(encoded))
print('parse_qsl:', parse_qsl(encoded))

parse_qs : {'foo': ['foo1', 'foo2']}
parse_qsl: [('foo', 'foo1'), ('foo', 'foo2')]


# Quote URL

In [14]:
from urllib.parse import quote, quote_plus, urlencode

In [15]:
url = 'http://localhost:8080/~juiyuhung/'
print('urlencode() :', urlencode({'url': url}))
print('quote()     :', quote(url))
print('quote_plus():', quote_plus(url))

urlencode() : url=http%3A%2F%2Flocalhost%3A8080%2F%7Ejuiyuhung%2F
quote()     : http%3A//localhost%3A8080/%7Ejuiyuhung/
quote_plus(): http%3A%2F%2Flocalhost%3A8080%2F%7Ejuiyuhung%2F


# Unquote URL

In [16]:
from urllib.parse import unquote, unquote_plus

In [17]:
print(unquote('http%3A//localhost%3A8080/%7Ejuiyuhung/'))
print(unquote_plus(
    'http%3A%2F%2Flocalhost%3A8080%2F%7Ejuiyuhung%2F'
))

http://localhost:8080/~juiyuhung/
http://localhost:8080/~juiyuhung/
