In [None]:
# urllib提供了一系列用于操作URL的功能。

# Get
# urllib的request模块可以非常方便地抓取URL内容，也就是发送一个GET请求到指定的页面，然后返回HTTP的响应：
# 例如，对豆瓣的一个URLhttps://api.douban.com/v2/book/2129650进行抓取，并返回响应：
# 因为豆瓣需要验证，所以我换了一个

from urllib import request
import json

# 使用其他公开的测试API
try:
    with request.urlopen('https://httpbin.org/get') as f:
        data = f.read()
        print('Status:', f.status, f.reason)
        for k, v in f.getheaders():
            print('%s: %s' % (k, v))
        print('Data:', data.decode('utf-8'))
except Exception as e:
    print('Error:', e)

In [None]:
# 如果我们要想模拟浏览器发送GET请求，就需要使用Request对象，
# 通过往Request对象添加HTTP头，我们就可以把请求伪装成浏览器。
# 例如，模拟iPhone去请求豆瓣首页：

from urllib import request

req = request.Request('http://www.douban.com/')
req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25')
with request.urlopen(req) as f:
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('%s: %s' % (k, v))
    print('Data:', f.read().decode('utf-8'))

# 这样豆瓣会返回适合iPhone的移动版网页：

In [None]:
# Post
# 如果要以POST发送一个请求，只需要把参数data以bytes形式传入。
# 我们模拟一个微博登录，先读取登录的邮箱和口令，然后按照weibo.cn的登录页的格式以username=xxx&password=xxx的编码传入：
from urllib import request, parse

print('Login to weibo.cn...')
email = input('Email: ')
passwd = input('Password: ')
login_data = parse.urlencode([
    ('username', email),
    ('password', passwd),
    ('entry', 'mweibo'),
    ('client_id', ''),
    ('savestate', '1'),
    ('ec', ''),
    ('pagerefer', 'https://passport.weibo.cn/signin/welcome?entry=mweibo&r=http%3A%2F%2Fm.weibo.cn%2F')
])

req = request.Request('https://passport.weibo.cn/sso/login')
req.add_header('Origin', 'https://passport.weibo.cn')
req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25')
req.add_header('Referer', 'https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F')

with request.urlopen(req, data=login_data.encode('utf-8')) as f:
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('%s: %s' % (k, v))
    print('Data:', f.read().decode('utf-8'))


In [None]:
# Handler
# 如果还需要更复杂的控制，比如通过一个Proxy去访问网站，我们需要利用ProxyHandler来处理，示例代码如下：
import urllib

proxy_handler = urllib.request.ProxyHandler({'http': 'http://www.example.com:3128/'})
proxy_auth_handler = urllib.request.ProxyBasicAuthHandler()
proxy_auth_handler.add_password('realm', 'host', 'username', 'password')
opener = urllib.request.build_opener(proxy_handler, proxy_auth_handler)
with opener.open('http://www.example.com/login.html') as f:
    pass


In [9]:
# urllib提供的功能就是利用程序去执行各种HTTP请求。
# 如果要模拟浏览器完成特定功能，需要把请求伪装成浏览器。
# 伪装的方法是先监控浏览器发出的请求，再根据浏览器的请求头来伪装，User-Agent头就是用来标识浏览器的。

# 利用urllib读取JSON，然后将JSON解析为Python对象：
from urllib import request
import json

def fetch_data(url):
    # 添加浏览器伪装，模拟真实浏览器请求
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    # 创建请求对象
    req = request.Request(url, headers=headers)
    
    try:
        # 发送请求并获取响应
        with request.urlopen(req) as f:
            # 读取数据并解码
            data = f.read().decode('utf-8')
            # 将JSON字符串解析为Python对象
            return json.loads(data)
    except Exception as e:
        print(f"请求失败: {e}")
        return None

# 测试
URL = 'https://api.weatherapi.com/v1/current.json?key=b4e8f86b44654e6b86885330242207&q=Beijing&aqi=no'
data = fetch_data(URL)

if data:
    print("获取到的数据:")
    print(json.dumps(data, indent=2, ensure_ascii=False))
    
    # 验证数据
    assert data['location']['name'] == 'Beijing'
    print('ok')
else:
    print("无法获取数据")

获取到的数据:
{
  "location": {
    "name": "Beijing",
    "region": "Beijing",
    "country": "China",
    "lat": 39.9289,
    "lon": 116.3883,
    "tz_id": "Asia/Shanghai",
    "localtime_epoch": 1760515865,
    "localtime": "2025-10-15 16:11"
  },
  "current": {
    "last_updated_epoch": 1760515200,
    "last_updated": "2025-10-15 16:00",
    "temp_c": 21.4,
    "temp_f": 70.5,
    "is_day": 1,
    "condition": {
      "text": "Sunny",
      "icon": "//cdn.weatherapi.com/weather/64x64/day/113.png",
      "code": 1000
    },
    "wind_mph": 4.5,
    "wind_kph": 7.2,
    "wind_degree": 115,
    "wind_dir": "ESE",
    "pressure_mb": 1018.0,
    "pressure_in": 30.06,
    "precip_mm": 0.0,
    "precip_in": 0.0,
    "humidity": 64,
    "cloud": 0,
    "feelslike_c": 21.4,
    "feelslike_f": 70.5,
    "windchill_c": 21.1,
    "windchill_f": 69.9,
    "heatindex_c": 23.8,
    "heatindex_f": 74.9,
    "dewpoint_c": 6.8,
    "dewpoint_f": 44.3,
    "vis_km": 10.0,
    "vis_miles": 6.0,
    "uv": 0.