# request模块

## 1、基本使用

urllib模块中请求网页使用urlopen()方法，而在requests模块中相应方法为get()。
其他请求例如post, put等，直接改变方法即可。

In [None]:
import requests

r = requests.get('https://www.baidu.com/')
print(type(r))
print(r.status_code)
print(type(r.text))
print(r.text)
print(r.cookies)

## 2、GET请求

如果需要额外参数，除了直接添加在url后，还可以直接使用字典，比urllib中通过字典转化为bytes类型方便的多。

In [None]:
import requests

data = {
    'name': 'laowang',
    'age': 22
}
r = requests.get('http://httpbin.org/get', params=data)
print(r.text)
# 这里r.text为str类型。
print(type(r.text))
# 使用json方法转化为字典类型
print(r.json())
print(type(r.json()))
# 需要注意的是，若返回结果不是json类型，则会报错。

### 2.1 抓取网页

In [None]:
import requests
# 正则模块
import re

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'
}
r = requests.get('https://www.zhihu.com/explore', headers=headers)
pattern = re.compile('explore-feed.*?question_link.*?>(.*?)</a>', re.S)
titles = re.findall(pattern, r.text)
print(titles)

### 2.2 抓取二进制数据

以下方法同样适用于音频，视频文件。

In [None]:
import requests

r = requests.get('https://github.com/favicon.ico')
print(r.text)
print(r.content)

# 第一个参数为文件名称，第二个参数为二进制写形式打开
with open('favicon.ico', 'wb') as f:
    f.write(r.content)

![image](https://github.com/DRNTT/SpiderImage/blob/master/ch3/result.PNG?raw=true)

## 3、高级用法 

### 3.1 文件上传

In [None]:
import requests

file = {'file': open('favicon.ico', 'rb')}
r = requests.post('http://httpbin.org/post', files=file)
print(r.text)

### 3.2 Cookies

In [None]:
import requests

r = requests.get('https://www.baidu.com')
print(r.cookies)
for key, value in r.cookies.items():
    print(key + '=' + value)

In [None]:
# 设置Cookies维持登录
import requests

headers = {
    'Cookie': '_zap=be42a0fe-c3b6-4865-ba6f-287c7706980d; d_c0="AAAmr-uMOA-PTmkd9aVdhl-DBvHne5bzFzY=|1554273075"; __gads=ID=b5d624b70a547e3b:T=1554273077:S=ALNI_MYC0jas_Y-tKGZrXqh_ZSopv6-vkw; l_n_c=1; q_c1=37ed72bcc6334143a4950ba2e2f117d2|1557103524000|1554273076000; _xsrf=9fc851771d2305aae2f03c03df54ecbd; r_cap_id="ODY5M2ZjNWFjOTVjNDNjNjg1MmYyMjliZmQxNGFiZDk=|1557103524|e0e3f46829d4a0dee0144b485e969a61a7640d45"; cap_id="YjczNjhkYWU1MDU1NGVhOWJjYTVhODE1YWJiNzQ4MmM=|1557103524|b7f551db75ea040cbf62110dc2cd56b595844422"; l_cap_id="NGZmNGZmNDZiMDk0NDk5MWFmNWQwYWI3NWRhMGI1Mjc=|1557103524|1449412a5b797a3d8974b60076cc34326b908bb3"; n_c=1; __utma=51854390.1956749130.1557103527.1557103527.1557103527.1; __utmc=51854390; __utmz=51854390.1557103527.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=51854390.000--|3=entry_date=20190403=1; tgw_l7_route=73af20938a97f63d9b695ad561c4c10c; capsion_ticket="2|1:0|10:1557105273|14:capsion_ticket|44:MThhYzVhMTc2NzYyNGVjOWFkMzM5NzNjYTZkMjFhMTA=|abd0b86dfb5bc1c486eb4ff8bdfac533edd687a2a9a6fc52ac46cf945318d7f9"; z_c0="2|1:0|10:1557105331|4:z_c0|92:Mi4xby1HbEF3QUFBQUFBQUNhdjY0dzREeVlBQUFCZ0FsVk5zOWk4WFFCQWFUVVVuVVNTUTJlSkJxNUk1QTNHSFE1MjJ3|9288086a815cfa2013765d36c57945ec12a8f60167f6590c44c98716f8866717"; unlock_ticket="ADDAkVw5ygomAAAAYAJVTbuRz1zw-IX71qI6IcawZAUoaRDhyWQuOg=="; tst=r; _xsrf=7twvXgoiE4DVP2pCR5OuNUdke7Z4ntD1',
    'Host': 'www.zhihu.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'
}

r = requests.get('https://www.zhihu.com', headers=headers)
print(r.text)


In [None]:
# 第二种方法，构造RequestsCookieJar对象
import requests

cookies = '_zap=be42a0fe-c3b6-4865-ba6f-287c7706980d; d_c0="AAAmr-uMOA-PTmkd9aVdhl-DBvHne5bzFzY=|1554273075"; __gads=ID=b5d624b70a547e3b:T=1554273077:S=ALNI_MYC0jas_Y-tKGZrXqh_ZSopv6-vkw; l_n_c=1; q_c1=37ed72bcc6334143a4950ba2e2f117d2|1557103524000|1554273076000; _xsrf=9fc851771d2305aae2f03c03df54ecbd; r_cap_id="ODY5M2ZjNWFjOTVjNDNjNjg1MmYyMjliZmQxNGFiZDk=|1557103524|e0e3f46829d4a0dee0144b485e969a61a7640d45"; cap_id="YjczNjhkYWU1MDU1NGVhOWJjYTVhODE1YWJiNzQ4MmM=|1557103524|b7f551db75ea040cbf62110dc2cd56b595844422"; l_cap_id="NGZmNGZmNDZiMDk0NDk5MWFmNWQwYWI3NWRhMGI1Mjc=|1557103524|1449412a5b797a3d8974b60076cc34326b908bb3"; n_c=1; __utma=51854390.1956749130.1557103527.1557103527.1557103527.1; __utmc=51854390; __utmz=51854390.1557103527.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=51854390.000--|3=entry_date=20190403=1; tgw_l7_route=73af20938a97f63d9b695ad561c4c10c; capsion_ticket="2|1:0|10:1557105273|14:capsion_ticket|44:MThhYzVhMTc2NzYyNGVjOWFkMzM5NzNjYTZkMjFhMTA=|abd0b86dfb5bc1c486eb4ff8bdfac533edd687a2a9a6fc52ac46cf945318d7f9"; z_c0="2|1:0|10:1557105331|4:z_c0|92:Mi4xby1HbEF3QUFBQUFBQUNhdjY0dzREeVlBQUFCZ0FsVk5zOWk4WFFCQWFUVVVuVVNTUTJlSkJxNUk1QTNHSFE1MjJ3|9288086a815cfa2013765d36c57945ec12a8f60167f6590c44c98716f8866717"; unlock_ticket="ADDAkVw5ygomAAAAYAJVTbuRz1zw-IX71qI6IcawZAUoaRDhyWQuOg=="; tst=r; _xsrf=7twvXgoiE4DVP2pCR5OuNUdke7Z4ntD1'
jar = requests.cookies.RequestsCookieJar()
headers = {
     'Host': 'www.zhihu.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'
}
for cookie in cookies.split(';'):
    # 1为分割一次
    key, value = cookie.split('=', 1)
    jar.set(key, value)
r = requests.get('https://www.zhihu.com', cookies=jar, headers=headers)
print(r.text)

### 3.3 会话维持

使用会话可以避免重复设置Cookies。

In [None]:
import requests

# 不使用Session
requests.get('http://httpbin.org/cookies/set/number/1234')
r = requests.get('http://httpbin.org/cookies')
print(r.text)

# 使用Session
s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/1234')
r = s.get('http://httpbin.org/cookies')
print(r.text)

### 3.4 Prepared Request

可以将请求表示为Request对象。

In [None]:
from requests import Request, Session

url = 'http://httpbin.org/post'
data = {
    'name': 'laowang'
}
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'
}
s = Session()
req = Request(method='POST', url=url, data=data, headers=headers)
prepped = s.prepare_request(req)
r = s.send(prepped)
print(r.text)