# Python Requests 库深度指南

## 学习目标
- 掌握 requests 库的核心功能和最佳实践
- 学会处理各种 HTTP 请求场景
- 了解会话管理、认证和错误处理
- 掌握生产环境下的请求优化技巧
- 学会异步请求处理

## 前言
requests 库是 Python 中最流行的 HTTP 客户端库，被誉为"HTTP for Humans"。它简化了 HTTP 请求的复杂性，提供了优雅而强大的 API。

## 为什么选择 requests？
- **简洁的 API**：相比 urllib，更加人性化
- **功能完整**：支持所有 HTTP 方法和高级特性
- **稳定可靠**：经过大量项目验证
- **社区支持**：丰富的文档和社区资源


In [None]:
import requests
import json
from typing import Dict, Any, Optional, List
import logging
from datetime import datetime, timedelta
import time
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import concurrent.futures
import asyncio
import aiohttp

# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# 检查 requests 版本
print(f"Requests 版本: {requests.__version__}")
print(f"Python 版本兼容性: OK")
print("-" * 50)


## 1. 基础 HTTP 请求

### 主要的 HTTP 方法
- **GET**：获取资源
- **POST**：创建资源
- **PUT**：更新资源（完整替换）
- **PATCH**：部分更新资源
- **DELETE**：删除资源
- **HEAD**：获取响应头信息
- **OPTIONS**：获取服务器支持的方法


In [None]:
# 基础请求示例

def demonstrate_basic_requests():
    """演示基础的 HTTP 请求"""
    
    print("=== 基础请求演示 ===")
    
    # 1. GET 请求
    print("\n1. GET 请求示例:")
    try:
        response = requests.get('https://httpbin.org/get')
        print(f"状态码: {response.status_code}")
        print(f"响应头: {dict(response.headers)}")
        print(f"响应内容类型: {response.headers.get('content-type')}")
        
        # 获取 JSON 数据
        data = response.json()
        print(f"请求的 URL: {data['url']}")
        print(f"服务器接收到的 Headers: {data['headers']}")
        
    except requests.exceptions.RequestException as e:
        logger.error(f"GET 请求失败: {e}")
    
    # 2. 带参数的 GET 请求
    print("\n2. 带参数的 GET 请求:")
    params = {
        'name': 'PyBackendPro',
        'course': 'Python Backend',
        'level': 'advanced'
    }
    
    try:
        response = requests.get('https://httpbin.org/get', params=params)
        data = response.json()
        print(f"请求 URL: {data['url']}")
        print(f"参数: {data['args']}")
        
    except requests.exceptions.RequestException as e:
        logger.error(f"带参数的 GET 请求失败: {e}")
    
    # 3. POST 请求
    print("\n3. POST 请求示例:")
    post_data = {
        'username': 'john_doe',
        'email': 'john@example.com',
        'action': 'register'
    }
    
    try:
        response = requests.post('https://httpbin.org/post', data=post_data)
        data = response.json()
        print(f"POST 数据: {data['form']}")
        print(f"Content-Type: {data['headers']['Content-Type']}")
        
    except requests.exceptions.RequestException as e:
        logger.error(f"POST 请求失败: {e}")

# 执行演示
demonstrate_basic_requests()


In [None]:
# 高级请求功能

class AdvancedRequestHandler:
    """
    高级请求处理器 - 生产环境推荐
    """
    
    def __init__(self, base_url: str = "", timeout: int = 30):
        self.base_url = base_url.rstrip('/')
        self.timeout = timeout
        self.session = requests.Session()
        
        # 配置默认重试策略
        retry_strategy = Retry(
            total=3,  # 最大重试次数
            backoff_factor=1,  # 退避因子
            status_forcelist=[429, 500, 502, 503, 504],  # 需要重试的状态码
            method_whitelist=["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"]
        )
        
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self.session.mount("http://", adapter)
        self.session.mount("https://", adapter)
        
        # 设置默认请求头
        self.session.headers.update({
            'User-Agent': 'PyBackendPro-Client/1.0',
            'Accept': 'application/json',
            'Content-Type': 'application/json'
        })
        
        self.logger = logging.getLogger(self.__class__.__name__)
    
    def request(self, method: str, endpoint: str, **kwargs) -> Optional[requests.Response]:
        """
        统一的请求方法
        
        Args:
            method: HTTP 方法
            endpoint: 接口端点
            **kwargs: 请求参数
            
        Returns:
            响应对象或 None
        """
        url = f"{self.base_url}/{endpoint.lstrip('/')}" if self.base_url else endpoint
        
        # 设置默认超时
        kwargs.setdefault('timeout', self.timeout)
        
        try:
            start_time = time.time()
            response = self.session.request(method, url, **kwargs)
            end_time = time.time()
            
            self.logger.info(
                f"{method} {url} - {response.status_code} - "
                f"{end_time - start_time:.2f}s - {len(response.content)} bytes"
            )
            
            # 检查响应状态
            response.raise_for_status()
            return response
            
        except requests.exceptions.Timeout:
            self.logger.error(f"请求超时: {method} {url}")
        except requests.exceptions.ConnectionError:
            self.logger.error(f"连接错误: {method} {url}")
        except requests.exceptions.HTTPError as e:
            self.logger.error(f"HTTP 错误: {e}")
        except Exception as e:
            self.logger.error(f"请求失败: {e}")
        
        return None
    
    def get(self, endpoint: str, params: Dict = None) -> Optional[Dict]:
        """GET 请求的便捷方法"""
        response = self.request('GET', endpoint, params=params)
        return response.json() if response else None
    
    def post(self, endpoint: str, data: Dict = None, json_data: Dict = None) -> Optional[Dict]:
        """POST 请求的便捷方法"""
        kwargs = {}
        if json_data:
            kwargs['json'] = json_data
        elif data:
            kwargs['data'] = data
            
        response = self.request('POST', endpoint, **kwargs)
        return response.json() if response else None
    
    def close(self):
        """关闭会话"""
        self.session.close()

# 使用示例
print("=== 高级请求处理器演示 ===")
handler = AdvancedRequestHandler(base_url="https://httpbin.org")

# GET 请求
get_result = handler.get("/get", params={"test": "advanced_handler"})
if get_result:
    print(f"GET 请求成功: {get_result['args']}")

# POST 请求
post_data = {"message": "Hello from advanced handler", "timestamp": datetime.now().isoformat()}
post_result = handler.post("/post", json_data=post_data)
if post_result:
    print(f"POST 请求成功: {post_result['json']}")

# 清理资源
handler.close()


## 2. 认证和会话管理

### 常见的认证方式
1. **Basic Authentication**：用户名密码认证
2. **Bearer Token**：令牌认证（JWT）
3. **API Key**：密钥认证
4. **OAuth**：第三方授权
5. **Session Cookies**：会话 Cookie

### 会话管理的重要性
- **连接复用**：提高性能
- **Cookie 持久化**：保持登录状态
- **请求头共享**：统一配置
- **连接池管理**：优化资源使用


In [None]:
# 认证示例

def demonstrate_authentication():
    """演示各种认证方式"""
    
    print("=== 认证方式演示 ===")
    
    # 1. Basic Authentication
    print("\n1. Basic 认证:")
    try:
        response = requests.get(
            'https://httpbin.org/basic-auth/user/pass',
            auth=('user', 'pass')
        )
        print(f"Basic 认证成功: {response.status_code}")
        print(f"认证信息: {response.json()}")
    except Exception as e:
        logger.error(f"Basic 认证失败: {e}")
    
    # 2. Bearer Token 认证
    print("\n2. Bearer Token 认证:")
    token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.example.token"
    headers = {'Authorization': f'Bearer {token}'}
    
    try:
        response = requests.get('https://httpbin.org/bearer', headers=headers)
        print(f"Bearer 认证状态: {response.status_code}")
        if response.status_code == 200:
            print(f"认证成功: {response.json()}")
    except Exception as e:
        logger.error(f"Bearer 认证测试失败: {e}")
    
    # 3. API Key 认证（通过请求头）
    print("\n3. API Key 认证:")
    api_headers = {'X-API-Key': 'your-secret-api-key-here'}
    
    try:
        response = requests.get('https://httpbin.org/get', headers=api_headers)
        data = response.json()
        print(f"API Key 已发送: {data['headers'].get('X-Api-Key', '未找到')}")
    except Exception as e:
        logger.error(f"API Key 认证测试失败: {e}")
    
    # 4. 会话管理示例
    print("\n4. 会话管理:")
    session = requests.Session()
    
    # 设置会话级别的认证
    session.auth = ('session_user', 'session_pass')
    session.headers.update({'User-Agent': 'PyBackendPro-Session/1.0'})
    
    try:
        # 第一个请求
        response1 = session.get('https://httpbin.org/get')
        print(f"会话请求1: {response1.status_code}")
        
        # 第二个请求（复用连接和认证）
        response2 = session.get('https://httpbin.org/get', params={'session': 'test'})
        print(f"会话请求2: {response2.status_code}")
        
        # 会话中的 Cookie 管理
        session.cookies.set('session_id', 'abc123')
        response3 = session.get('https://httpbin.org/cookies')
        cookies_data = response3.json()
        print(f"会话 Cookies: {cookies_data['cookies']}")
        
    except Exception as e:
        logger.error(f"会话管理测试失败: {e}")
    finally:
        session.close()

# 执行认证演示
demonstrate_authentication()


In [None]:
# 并发请求处理

def demonstrate_concurrent_requests():
    """演示并发请求处理"""
    
    print("=== 并发请求演示 ===")
    
    urls = [
        'https://httpbin.org/delay/1',
        'https://httpbin.org/delay/2', 
        'https://httpbin.org/delay/1',
        'https://httpbin.org/get',
        'https://httpbin.org/json'
    ]
    
    # 1. 顺序请求（慢）
    print("\n1. 顺序请求:")
    start_time = time.time()
    
    results_sequential = []
    for url in urls:
        try:
            response = requests.get(url, timeout=10)
            results_sequential.append(response.status_code)
        except Exception as e:
            results_sequential.append(f"Error: {e}")
    
    sequential_time = time.time() - start_time
    print(f"顺序请求完成: {sequential_time:.2f}s")
    print(f"结果: {results_sequential}")
    
    # 2. 并发请求（快）
    print("\n2. 并发请求:")
    start_time = time.time()
    
    def fetch_url(url):
        """获取单个 URL"""
        try:
            response = requests.get(url, timeout=10)
            return response.status_code
        except Exception as e:
            return f"Error: {e}"
    
    # 使用线程池
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        results_concurrent = list(executor.map(fetch_url, urls))
    
    concurrent_time = time.time() - start_time
    print(f"并发请求完成: {concurrent_time:.2f}s")
    print(f"结果: {results_concurrent}")
    print(f"性能提升: {sequential_time/concurrent_time:.1f}x")

# 执行并发请求演示
demonstrate_concurrent_requests()


## 3. 生产环境最佳实践

### 性能优化技巧
1. **使用连接池**：Session 对象复用连接
2. **设置合理超时**：避免请求无限等待
3. **启用压缩**：减少网络传输
4. **批量处理**：合并多个小请求
5. **缓存策略**：缓存常用响应

### 错误处理策略
1. **分类处理**：网络错误 vs HTTP 错误
2. **重试机制**：指数退避算法
3. **熔断器**：防止雪崩效应  
4. **监控告警**：及时发现问题

### 安全考虑
1. **验证 SSL 证书**：防止中间人攻击
2. **请求头安全**：避免泄露敏感信息
3. **输入验证**：验证响应数据
4. **速率限制**：避免被限流

### 调试技巧
1. **启用详细日志**：记录请求详情
2. **使用代理**：抓包分析
3. **Mock 测试**：模拟各种场景
4. **性能分析**：监控请求耗时


In [None]:
# 生产级 HTTP 客户端封装

class ProductionHTTPClient:
    """
    生产环境 HTTP 客户端
    包含完整的错误处理、重试、监控等功能
    """
    
    def __init__(self, 
                 base_url: str = "",
                 timeout: int = 30,
                 max_retries: int = 3,
                 retry_backoff_factor: float = 0.3):
        
        self.base_url = base_url.rstrip('/')
        self.timeout = timeout
        self.session = requests.Session()
        
        # 配置重试策略
        retry_strategy = Retry(
            total=max_retries,
            read=max_retries,
            connect=max_retries,
            backoff_factor=retry_backoff_factor,
            status_forcelist=[429, 500, 502, 503, 504],
            method_whitelist=["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"]
        )
        
        adapter = HTTPAdapter(max_retries=retry_strategy)
        self.session.mount("http://", adapter)
        self.session.mount("https://", adapter)
        
        # 默认请求头
        self.session.headers.update({
            'User-Agent': 'ProductionClient/1.0',
            'Accept': 'application/json',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive'
        })
        
        self.logger = logging.getLogger(self.__class__.__name__)
        
        # 统计信息
        self.stats = {
            'total_requests': 0,
            'successful_requests': 0,
            'failed_requests': 0,
            'total_time': 0.0
        }
    
    def _make_request(self, method: str, endpoint: str, **kwargs) -> Optional[requests.Response]:
        """执行请求的核心方法"""
        url = f"{self.base_url}/{endpoint.lstrip('/')}" if self.base_url else endpoint
        
        # 设置默认超时
        kwargs.setdefault('timeout', self.timeout)
        
        # 请求开始时间
        start_time = time.time()
        self.stats['total_requests'] += 1
        
        try:
            self.logger.info(f"发起请求: {method} {url}")
            
            response = self.session.request(method, url, **kwargs)
            
            # 记录耗时
            elapsed = time.time() - start_time
            self.stats['total_time'] += elapsed
            
            # 检查响应状态
            response.raise_for_status()
            
            self.stats['successful_requests'] += 1
            self.logger.info(
                f"请求成功: {method} {url} - {response.status_code} - "
                f"{elapsed:.3f}s - {len(response.content)} bytes"
            )
            
            return response
            
        except requests.exceptions.Timeout as e:
            self.stats['failed_requests'] += 1
            self.logger.error(f"请求超时: {method} {url} - {e}")
            
        except requests.exceptions.ConnectionError as e:
            self.stats['failed_requests'] += 1
            self.logger.error(f"连接错误: {method} {url} - {e}")
            
        except requests.exceptions.HTTPError as e:
            self.stats['failed_requests'] += 1
            self.logger.error(f"HTTP 错误: {method} {url} - {e}")
            
        except Exception as e:
            self.stats['failed_requests'] += 1
            self.logger.error(f"未知错误: {method} {url} - {e}")
        
        return None
    
    def get_stats(self) -> Dict[str, Any]:
        """获取客户端统计信息"""
        total = self.stats['total_requests']
        return {
            'total_requests': total,
            'successful_requests': self.stats['successful_requests'],
            'failed_requests': self.stats['failed_requests'],
            'success_rate': f"{(self.stats['successful_requests']/total*100):.1f}%" if total > 0 else "0%",
            'average_response_time': f"{(self.stats['total_time']/total):.3f}s" if total > 0 else "0s",
            'total_time': f"{self.stats['total_time']:.3f}s"
        }
    
    def get(self, endpoint: str, **kwargs) -> Optional[Dict]:
        """GET 请求"""
        response = self._make_request('GET', endpoint, **kwargs)
        try:
            return response.json() if response else None
        except ValueError:
            return {'raw_content': response.text} if response else None
    
    def post(self, endpoint: str, **kwargs) -> Optional[Dict]:
        """POST 请求"""
        response = self._make_request('POST', endpoint, **kwargs)
        try:
            return response.json() if response else None
        except ValueError:
            return {'raw_content': response.text} if response else None
    
    def __enter__(self):
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()
    
    def close(self):
        """关闭客户端"""
        self.session.close()
        self.logger.info("HTTP 客户端已关闭")

# 使用示例
print("=== 生产级 HTTP 客户端演示 ===")

with ProductionHTTPClient(base_url="https://httpbin.org") as client:
    # 执行一些请求
    result1 = client.get("/get", params={"test": "production"})
    result2 = client.post("/post", json={"message": "Hello Production!"})
    result3 = client.get("/status/404")  # 这个会失败
    
    # 查看统计信息
    stats = client.get_stats()
    print(f"\n客户端统计信息:")
    for key, value in stats.items():
        print(f"  {key}: {value}")

print("\n✅ requests 库教程完成！")
