# 2 在Python中利用yarl高效处理url

## 2.1 利用yarl解析url信息

In [1]:
from yarl import URL

url = URL('https://github.com/CNFeffery/DataScienceStudyNotes/tree/master/%E5%8E%86%E5%8F%B2%E6%96%87%E7%AB%A0%E9%99%84%E4%BB%B6%E5%88%97%E8%A1%A8')

In [2]:
# url解码
url.human_repr()

'https://github.com/CNFeffery/DataScienceStudyNotes/tree/master/历史文章附件列表'

In [3]:
url.scheme

'https'

In [4]:
url.host

'github.com'

In [5]:
url.port

443

In [6]:
url.path

'/CNFeffery/DataScienceStudyNotes/tree/master/历史文章附件列表'

In [7]:
# 拆解path各层次
url.parts

('/', 'CNFeffery', 'DataScienceStudyNotes', 'tree', 'master', '历史文章附件列表')

In [8]:
# url中无显式的端口信息
url.explicit_port == None

True

In [9]:
URL('http://10.10.0.26:8000').explicit_port

8000

In [10]:
URL('http://10.10.0.26:8000#tag1').fragment

'tag1'

In [11]:
URL('https://github.com/CNFeffery/DataScienceStudyNotes#1-books-%E5%B7%B2%E6%9B%B4%E6%96%B0%E5%8D%9A%E5%AE%A2%E5%88%97%E8%A1%A8').fragment

'1-books-已更新博客列表'

In [12]:
url = URL('http://example.com/path?a=1&b=2&c=3&c=4&c=5')
query = url.query
query

<MultiDictProxy('a': '1', 'b': '2', 'c': '3', 'c': '4', 'c': '5')>

In [13]:
query['a'], query['b'], query['c']

('1', '2', '3')

In [14]:
query.getone('c')

'3'

In [15]:
query.getall('c')

['3', '4', '5']

## 2.2 利用yarl构造url

In [16]:
URL.build(
    scheme="http", 
    host="10.10.0.26", 
    port=8000, 
    path='/demo/page1', 
    query={
        'a': 1,
        'b': 'xxx',
        'c': [1, 2, 3]
    },
    fragment='tag1'
)

URL('http://10.10.0.26:8000/demo/page1?a=1&b=xxx&c=1&c=2&c=3#tag1')

In [17]:
url = URL('http://10.10.0.26:8000')
url

URL('http://10.10.0.26:8000')

In [18]:
url.with_host('10.10.0.27')

URL('http://10.10.0.27:8000')

In [19]:
url.with_path('/demo/api/v5/xxx')

URL('http://10.10.0.26:8000/demo/api/v5/xxx')

In [20]:
# 多步骤构建
url.with_path('/demo/api/v5/xxx').with_query({'a': 1, 'b': 'xxx', 'c': [1, 2]})

URL('http://10.10.0.26:8000/demo/api/v5/xxx?a=1&b=xxx&c=1&c=2')

In [21]:
# 替换参数
url.update_query({'a': 1}).update_query({'a': '2', 'b': 'xxx'})

URL('http://10.10.0.26:8000/?a=2&b=xxx')

In [22]:
# update_query()不会影响先前存在的其他参数
URL('http://10.10.0.26:8000/?a=1&a=2&c=1').update_query({'a': 3, 'b': 'xxx'})

URL('http://10.10.0.26:8000/?a=3&c=1&b=xxx')

In [23]:
# with_query()会清除先前的所有参数
URL('http://10.10.0.26:8000/?a=1&a=2&c=1').with_query({'a': 3, 'b': 'xxx'})

URL('http://10.10.0.26:8000/?a=3&b=xxx')

## 2.3 利用`/`、`%`运算符快捷合成url

In [24]:
(
    URL('http://10.10.0.26:8000') / 
    'api/v5' / 
    'xxx' % 
    {'a': 1, 'b': [1, 2, 3]} %
    {'c': 'xxx', 'd': 'yyy'}
)

URL('http://10.10.0.26:8000/api/v5/xxx?a=1&b=1&b=2&b=3&c=xxx&d=yyy')