## Get
urllib是python的内置模块，它的request模块可以非常方便地抓取URL内容，也就是发送一个GET请求到指定的页面，然后返回HTTP的响应：

例如，对豆瓣的一个URLhttps://api.douban.com/v2/book/2129650 进行抓取，并返回响应：

In [7]:
from urllib import request

with request.urlopen('https://api.douban.com/v2/book/2129650') as f:
    data = f.read()
    print('Status:', f.status, f.reason)
    print('-'*50)
    for k, v in f.getheaders():
        print('%s: %s' % (k, v))
    print('-'*50)
    print('Data:', data.decode('utf-8'))   #data.decode()一样，默认utf-8

Status: 200 OK
--------------------------------------------------
Date: Mon, 27 Aug 2018 06:44:07 GMT
Content-Type: application/json; charset=utf-8
Content-Length: 2138
Connection: close
Vary: Accept-Encoding
X-Ratelimit-Remaining2: 98
X-Ratelimit-Limit2: 100
Expires: Sun, 1 Jan 2006 01:00:00 GMT
Pragma: no-cache
Cache-Control: must-revalidate, no-cache, private
Set-Cookie: bid=VJ6kOklAxhU; Expires=Tue, 27-Aug-19 06:44:07 GMT; Domain=.douban.com; Path=/
X-DOUBAN-NEWBID: VJ6kOklAxhU
X-DAE-Node: anson42
X-DAE-App: book
Server: dae
X-Frame-Options: SAMEORIGIN
--------------------------------------------------
Data: {"rating":{"max":10,"numRaters":16,"average":"7.4","min":0},"subtitle":"","author":["廖雪峰"],"pubdate":"2007","tags":[{"count":21,"name":"spring","title":"spring"},{"count":13,"name":"Java","title":"Java"},{"count":6,"name":"javaee","title":"javaee"},{"count":5,"name":"j2ee","title":"j2ee"},{"count":4,"name":"计算机","title":"计算机"},{"count":4,"name":"编程","title":"编程"},{"count":3,"na

### 通过访问url返回data数据

resq = request.urlopen(url)  #post则url变为request.Request(url=**,data=**)

json.loads(resq.read().decode())   #返回data数据

In [26]:
from urllib import request
import json
resq = request.urlopen('https://api.douban.com/v2/book/2129650')
json.loads(resq.read().decode())

{'rating': {'max': 10, 'numRaters': 16, 'average': '7.4', 'min': 0},
 'subtitle': '',
 'author': ['廖雪峰'],
 'pubdate': '2007',
 'tags': [{'count': 21, 'name': 'spring', 'title': 'spring'},
  {'count': 13, 'name': 'Java', 'title': 'Java'},
  {'count': 6, 'name': 'javaee', 'title': 'javaee'},
  {'count': 5, 'name': 'j2ee', 'title': 'j2ee'},
  {'count': 4, 'name': '计算机', 'title': '计算机'},
  {'count': 4, 'name': '编程', 'title': '编程'},
  {'count': 3, 'name': '藏书', 'title': '藏书'},
  {'count': 3, 'name': 'POJO', 'title': 'POJO'}],
 'origin_title': '',
 'image': 'https://img3.doubanio.com/view/subject/m/public/s2552283.jpg',
 'binding': '平装',
 'translator': [],
 'catalog': '',
 'pages': '509',
 'images': {'small': 'https://img3.doubanio.com/view/subject/s/public/s2552283.jpg',
  'large': 'https://img3.doubanio.com/view/subject/l/public/s2552283.jpg',
  'medium': 'https://img3.doubanio.com/view/subject/m/public/s2552283.jpg'},
 'alt': 'https://book.douban.com/subject/2129650/',
 'id': '2129650',
 

如果我们要想模拟浏览器发送GET请求，就需要使用Request对象，通过往Request对象添加HTTP头，我们就可以把请求伪装成浏览器。例如，模拟iPhone 6去请求豆瓣首页：

In [8]:
from urllib import request

req = request.Request('http://www.douban.com/')
req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25')
with request.urlopen(req) as f:
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('%s: %s' % (k, v))
    print('Data:', f.read().decode('utf-8'))

Status: 200 OK
Connection: close
Content-Type: text/html;
Content-Length: 1019
Data: <html><head><script>var ar=decodeURIComponent('http://www.douban.com%2f');t=setTimeout('location.replace(ar)',2000);var ct=0;function ck(){var x=document.createElement('script');x.src='http://bj-ac-001-3e5c:100/src/onlineinfo.js?t='+new Date().getTime();document.body.appendChild(x);if(typeof is_online=='number'&&is_online===1){return;}if(ct>5){location=ar;return;}setTimeout(ck,500);ct++;}function it(){if(typeof(ac_krb_redirect)==='function'){clearTimeout(t);ck();}else{location=ar;}}var eUa=['Edge','Maxthon','MetaSr'];var a=1;var ua=navigator.userAgent;for(var i=0;i<eUa.length;++i){if(ua.indexOf(eUa[i])>-1){a=0;break;}}if(a){lJs('http://bj-ac-001-3e5c:100/src/iwa/index.js?t=1535352283',it);}function lJs(src, f){var s=document.createElement('script');s.type='text/javascript';s.src=src;s.onload=s.onreadystatechange=function(){var r=s.readyState;if(r==='loaded'||r ==='complete'||!r){s.onreadystatechange=nu

# Post

如果要以POST发送一个请求，只需要把参数data以bytes形式传入。
我们模拟一个微博登录，先读取登录的邮箱和口令，然后按照weibo.cn的登录页的格式以username=xxx&password=xxx的编码传入：

In [None]:
from urllib import request, parse

print('Login to weibo.cn...')
email = input('Email: ')
passwd = input('Password: ')
login_data = parse.urlencode([
    ('username', email),
    ('password', passwd),
    ('entry', 'mweibo'),
    ('client_id', ''),
    ('savestate', '1'),
    ('ec', ''),
    ('pagerefer', 'https://passport.weibo.cn/signin/welcome?entry=mweibo&r=http%3A%2F%2Fm.weibo.cn%2F')
])

req = request.Request('https://passport.weibo.cn/sso/login')
req.add_header('Origin', 'https://passport.weibo.cn')
req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25')
req.add_header('Referer', 'https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F')

with request.urlopen(req, data=login_data.encode('utf-8')) as f:
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('%s: %s' % (k, v))
    print('Data:', f.read().decode('utf-8'))

Login to weibo.cn...


In [24]:
dict = {'id':1232,
            "title":"gkjgkj"}
tmp = json.dumps(dict)
print(tmp)

{"id": 1232, "title": "gkjgkj"}


In [23]:

data = [ { 'a' : 1, 'b' : 2, 'c' : 3, 'd' : 4, 'e' : 5 } ]

js1 = json.dumps(data)

print(js1)

[{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}]


In [1]:
from urllib import request, parse
import json
def fetch_data(url):
    req = request.Request(url)
    req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25')
    with request.urlopen(req) as f:
        return json.loads(f.read().decode('utf-8'))   #一定要用json.loads()，否则报错string indices must be integers

In [2]:
URL = 'https://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20weather.forecast%20where%20woeid%20%3D%202151330&format=json'
data = fetch_data(URL)
print(data)
assert data['query']['results']['channel']['location']['city'] == 'Beijing'
print('ok')

{'query': {'count': 1, 'created': '2018-08-27T07:36:59Z', 'lang': 'en-US', 'results': {'channel': {'units': {'distance': 'mi', 'pressure': 'in', 'speed': 'mph', 'temperature': 'F'}, 'title': 'Yahoo! Weather - Beijing, Beijing, CN', 'link': 'http://us.rd.yahoo.com/dailynews/rss/weather/Country__Country/*https://weather.yahoo.com/country/state/city-2151330/', 'description': 'Yahoo! Weather for Beijing, Beijing, CN', 'language': 'en-us', 'lastBuildDate': 'Mon, 27 Aug 2018 03:36 PM CST', 'ttl': '60', 'location': {'city': 'Beijing', 'country': 'China', 'region': ' Beijing'}, 'wind': {'chill': '90', 'direction': '155', 'speed': '14'}, 'atmosphere': {'humidity': '46', 'pressure': '1004.0', 'rising': '0', 'visibility': '16.1'}, 'astronomy': {'sunrise': '5:38 am', 'sunset': '6:53 pm'}, 'image': {'title': 'Yahoo! Weather', 'width': '142', 'height': '18', 'link': 'http://weather.yahoo.com', 'url': 'http://l.yimg.com/a/i/brand/purplelogo//uh/us/news-wea.gif'}, 'item': {'title': 'Conditions for Bei