# Module 5: 回测评估分析

本notebook展示金属期货配对交易策略的完整回测评估过程。

## 主要内容
1. 加载Module 4生成的真实交易信号
2. 执行回测并计算收益
3. 绩效指标分析
4. 风险度量分析  
5. 配对贡献分析
6. 可视化展示

In [None]:
# 导入必要的库
import sys
import os
# 添加项目根目录到Python路径
project_root = os.path.dirname(os.getcwd())
if project_root not in sys.path:
    sys.path.insert(0, project_root)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# 导入自定义模块
from lib.backtest import BacktestEngine, calculate_spread_pnl, MarginManager
from lib.performance import PerformanceCalculator
from lib.risk import RiskAnalyzer
from lib.pairs_analysis import PairAnalyzer
from configs.contract_specs import CONTRACT_SPECS, get_multiplier

# 设置绘图样式
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

print("✓ 环境初始化完成")
print(f"✓ 已加载 {len(CONTRACT_SPECS)} 个品种的合约规格")

## 1. 加载交易信号和价格数据

In [2]:
# 加载Module 4生成的真实信号
signals_df = pd.read_parquet('../data/signals/kalman_signals_final.parquet')
print(f"加载信号数据: {len(signals_df)} 条")
print(f"信号日期范围: {signals_df['date'].min()} 到 {signals_df['date'].max()}")
print(f"\n信号数据预览:")
signals_df.head()

加载信号数据: 3930 条
信号日期范围: 2024-01-02 00:00:00 到 2025-08-15 00:00:00

信号数据预览:


Unnamed: 0,date,pair,action,side,z_score,beta,position_ratio
0,2024-01-02,RB0-SF0,wait,none,,1.074975,
1,2024-01-03,RB0-SF0,wait,none,,1.06415,
2,2024-01-04,RB0-SF0,wait,none,,1.062258,
3,2024-01-05,RB0-SF0,wait,none,,1.06359,
4,2024-01-08,RB0-SF0,wait,none,,1.063971,


In [3]:
# 加载期货价格数据
symbols = ['AG0', 'AU0', 'AL0', 'CU0', 'HC0', 'I0', 'NI0', 'PB0', 
           'RB0', 'SF0', 'SM0', 'SN0', 'SS0', 'ZN0']

price_data = {}
for symbol in symbols:
    try:
        df = pd.read_parquet(f'../data/futures/{symbol}.parquet')
        df['date'] = pd.to_datetime(df['date'])
        df.set_index('date', inplace=True)
        price_data[symbol] = df
        print(f"✓ {symbol}: {len(df)} 条价格记录")
    except Exception as e:
        print(f"× {symbol}: {e}")

print(f"\n成功加载 {len(price_data)} 个品种的价格数据")

✓ AG0: 3232 条价格记录
✓ AU0: 4291 条价格记录
✓ AL0: 5018 条价格记录
✓ CU0: 5018 条价格记录
✓ HC0: 2780 条价格记录
✓ I0: 2879 条价格记录
✓ NI0: 2530 条价格记录
✓ PB0: 3501 条价格记录
✓ RB0: 3982 条价格记录
✓ SF0: 2649 条价格记录
✓ SM0: 2632 条价格记录
✓ SN0: 2530 条价格记录
✓ SS0: 1429 条价格记录
✓ ZN0: 4481 条价格记录

成功加载 14 个品种的价格数据


## 2. 执行回测

In [4]:
# 初始化回测引擎
INITIAL_CAPITAL = 5000000  # 500万初始资金
MARGIN_RATE = 0.12  # 12%保证金率
TRANSACTION_COST = 0.0002  # 万分之2交易成本
RISK_FREE_RATE = 0.03  # 3%年化无风险利率
MAX_STOP_LOSS = 0.10  # 10%最大止损
SLIPPAGE_TICKS = 3  # 3个tick滑点

engine = BacktestEngine(initial_capital=INITIAL_CAPITAL)
print(f"回测引擎初始化: 初始资金 {INITIAL_CAPITAL:,.0f} 元")

NameError: name 'BacktestEngine' is not defined

In [None]:
# 分离开仓和平仓信号
open_signals = signals_df[signals_df['action'] == 'open']
close_signals = signals_df[signals_df['action'] == 'close']

print(f"开仓信号: {len(open_signals)} 条")
print(f"平仓信号: {len(close_signals)} 条")

# 信号统计
print("\n配对信号分布:")
signal_stats = signals_df.groupby(['pair', 'action']).size().unstack(fill_value=0)
signal_stats

In [None]:
# 执行信号并生成交易记录（使用真实合约乘数）
def execute_backtest(signals_df, price_data):
    """执行完整回测，使用真实合约规格"""
    signals = signals_df.sort_values('date').to_dict('records')
    
    # 执行信号配对
    completed_trades = engine.execute_signals(signals)
    print(f"配对完成: {len(completed_trades)} 笔完整交易")
    
    # 计算每笔交易的真实收益
    trades_with_pnl = []
    
    for trade in completed_trades:
        open_signal = trade['open_signal']
        close_signal = trade['close_signal']
        pair = open_signal['pair']
        
        # 解析配对品种
        if '-' not in pair:
            continue
            
        y_symbol, x_symbol = pair.split('-')
        
        if y_symbol not in price_data or x_symbol not in price_data:
            continue
            
        try:
            # 获取开仓和平仓价格
            open_date = pd.to_datetime(open_signal['date'])
            close_date = pd.to_datetime(close_signal['date'])
            
            y_open = price_data[y_symbol].loc[open_date, 'close']
            x_open = price_data[x_symbol].loc[open_date, 'close']
            y_close = price_data[y_symbol].loc[close_date, 'close']
            x_close = price_data[x_symbol].loc[close_date, 'close']
            
            # 计算收益（使用真实合约乘数）
            position_ratio = open_signal.get('position_ratio', '1:1')
            beta = open_signal.get('beta', 1.0)
            
            trade_data = {
                'position_ratio': position_ratio,
                'open_prices': {'Y': y_open, 'X': x_open},
                'close_prices': {'Y': y_close, 'X': x_close},
                'beta': beta,
                'y_symbol': y_symbol,  # 传递品种代码
                'x_symbol': x_symbol   # 传递品种代码
            }
            
            pnl = calculate_spread_pnl(trade_data)
            
            # 计算交易成本（使用真实合约乘数）
            y_ratio, x_ratio = map(int, position_ratio.split(':'))
            y_multiplier = get_multiplier(y_symbol)
            x_multiplier = get_multiplier(x_symbol)
            y_notional = y_open * y_ratio * y_multiplier
            x_notional = x_open * x_ratio * x_multiplier
            transaction_cost = (y_notional + x_notional) * TRANSACTION_COST * 2
            
            # 记录交易
            trades_with_pnl.append({
                'pair': pair,
                'open_date': open_date,
                'close_date': close_date,
                'holding_days': (close_date - open_date).days,
                'position_ratio': position_ratio,
                'beta': beta,
                'gross_pnl': pnl,
                'transaction_cost': transaction_cost,
                'net_pnl': pnl - transaction_cost,
                'y_open': y_open,
                'x_open': x_open,
                'y_close': y_close,
                'x_close': x_close,
                'y_multiplier': y_multiplier,
                'x_multiplier': x_multiplier,
                'y_notional': y_notional,
                'x_notional': x_notional
            })
            
        except Exception as e:
            print(f"处理 {pair} 失败: {e}")
            continue
    
    return trades_with_pnl

# 执行回测
trades = execute_backtest(signals_df, price_data)
print(f"\n成功计算 {len(trades)} 笔交易收益")

# 显示前几笔交易的详情
if trades:
    sample_df = pd.DataFrame(trades[:3])
    print("\n前3笔交易详情:")
    print(sample_df[['pair', 'position_ratio', 'y_multiplier', 'x_multiplier', 'gross_pnl', 'net_pnl']])

## 3. 绩效指标分析

In [None]:
# 基础统计
trades_df = pd.DataFrame(trades)
total_trades = len(trades)
total_pnl = trades_df['net_pnl'].sum()
winning_trades = len(trades_df[trades_df['net_pnl'] > 0])
losing_trades = len(trades_df[trades_df['net_pnl'] <= 0])

print("=== 交易统计 ===")
print(f"总交易次数: {total_trades}")
print(f"总净收益: {total_pnl:,.2f} 元")
print(f"平均每笔收益: {total_pnl/total_trades:,.2f} 元")
print(f"盈利交易: {winning_trades} 笔 ({winning_trades/total_trades:.1%})")
print(f"亏损交易: {losing_trades} 笔 ({losing_trades/total_trades:.1%})")
print(f"平均持仓天数: {trades_df['holding_days'].mean():.1f} 天")

In [None]:
# 计算绩效指标
calculator = PerformanceCalculator(initial_capital=INITIAL_CAPITAL, risk_free_rate=RISK_FREE_RATE)
metrics = calculator.calculate_comprehensive_metrics(trades)

print("=== 绩效指标 ===")
print(f"最终资金: {metrics['final_capital']:,.2f} 元")
print(f"总收益率: {metrics['total_return']:.2%}")
print(f"年化收益率: {metrics['annualized_return']:.2%}")
print(f"年化波动率: {metrics['annualized_volatility']:.2%}")
print(f"Sharpe比率: {metrics['sharpe_ratio']:.4f}")
print(f"最大回撤: {metrics['max_drawdown']:.2%}")
print(f"回撤持续期: {metrics['drawdown_duration']} 个交易周期")
print(f"盈亏比: {metrics['profit_loss_ratio']:.2f}")

## 4. 风险度量分析

In [None]:
# 构建收益率序列
returns = trades_df['net_pnl'].values / INITIAL_CAPITAL

# 风险分析
analyzer = RiskAnalyzer(confidence_levels=[0.95, 0.99])
risk_metrics = analyzer.analyze_comprehensive_risk(returns, trades)

print("=== 风险指标 ===")
print(f"收益率标准差: {risk_metrics['std']:.4f}")
print(f"偏度: {risk_metrics['skewness']:.4f}")
print(f"峰度: {risk_metrics['kurtosis']:.4f}")
print(f"VaR (95%): {risk_metrics['var_95']:.4f}")
print(f"CVaR (95%): {risk_metrics['cvar_95']:.4f}")
print(f"VaR (99%): {risk_metrics['var_99']:.4f}")
print(f"CVaR (99%): {risk_metrics['cvar_99']:.4f}")
print(f"Sortino比率: {risk_metrics['sortino_ratio']:.4f}")
print(f"最大连续亏损: {risk_metrics['max_losing_streak']} 次")
print(f"最大连续亏损金额: {risk_metrics['max_losing_amount']:,.2f} 元")

## 5. 配对贡献分析

In [None]:
# 配对分析
pair_analyzer = PairAnalyzer()
pair_analysis = pair_analyzer.analyze_all_pairs(trades)

# 显示配对报告
print(pair_analysis['report'])

In [None]:
# 配对收益详细分析
pair_df = pd.DataFrame.from_dict(pair_analysis['pair_stats'], orient='index')
pair_df = pair_df.sort_values('total_pnl', ascending=False)

print("=== Top 5 最佳配对 ===")
print(pair_df.head()[['total_pnl', 'win_rate', 'trade_count', 'pnl_contribution']])

print("\n=== Bottom 5 最差配对 ===")
print(pair_df.tail()[['total_pnl', 'win_rate', 'trade_count', 'pnl_contribution']])

## 6. 可视化分析

In [None]:
# 累计收益曲线
trades_df = trades_df.sort_values('close_date')
trades_df['cumulative_pnl'] = trades_df['net_pnl'].cumsum()

plt.figure(figsize=(14, 6))
plt.plot(trades_df['close_date'], trades_df['cumulative_pnl'], linewidth=2)
plt.axhline(y=0, color='r', linestyle='--', alpha=0.5)
plt.title('累计收益曲线', fontsize=14)
plt.xlabel('日期')
plt.ylabel('累计收益 (元)')
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# 收益分布直方图
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# 收益分布
axes[0].hist(trades_df['net_pnl'], bins=30, edgecolor='black', alpha=0.7)
axes[0].axvline(x=0, color='r', linestyle='--', linewidth=2)
axes[0].set_title('交易收益分布', fontsize=12)
axes[0].set_xlabel('净收益 (元)')
axes[0].set_ylabel('频次')

# 持仓天数分布
axes[1].hist(trades_df['holding_days'], bins=20, edgecolor='black', alpha=0.7, color='green')
axes[1].set_title('持仓天数分布', fontsize=12)
axes[1].set_xlabel('持仓天数')
axes[1].set_ylabel('频次')

plt.tight_layout()
plt.show()

In [None]:
# 配对收益贡献图
pair_pnl = trades_df.groupby('pair')['net_pnl'].sum().sort_values()

plt.figure(figsize=(10, 8))
colors = ['red' if x < 0 else 'green' for x in pair_pnl.values]
pair_pnl.plot(kind='barh', color=colors)
plt.title('各配对收益贡献', fontsize=14)
plt.xlabel('净收益 (元)')
plt.ylabel('配对')
plt.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
plt.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()

In [None]:
# 月度收益热力图
trades_df['year_month'] = trades_df['close_date'].dt.to_period('M')
monthly_pnl = trades_df.groupby('year_month')['net_pnl'].sum()

# 转换为DataFrame用于绘制热力图
monthly_df = pd.DataFrame(monthly_pnl)
monthly_df['year'] = monthly_df.index.year
monthly_df['month'] = monthly_df.index.month
pivot_table = monthly_df.pivot(index='month', columns='year', values='net_pnl')

plt.figure(figsize=(12, 8))
sns.heatmap(pivot_table, annot=True, fmt='.0f', cmap='RdYlGn', center=0,
            cbar_kws={'label': '净收益 (元)'})
plt.title('月度收益热力图', fontsize=14)
plt.xlabel('年份')
plt.ylabel('月份')
plt.tight_layout()
plt.show()

In [None]:
# 回撤分析
cumulative_returns = (INITIAL_CAPITAL + trades_df['cumulative_pnl']) / INITIAL_CAPITAL
running_max = cumulative_returns.expanding().max()
drawdown = (cumulative_returns - running_max) / running_max

fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# 累计收益率
axes[0].plot(trades_df['close_date'], cumulative_returns, linewidth=2, label='累计收益率')
axes[0].plot(trades_df['close_date'], running_max, '--', alpha=0.5, label='历史最高')
axes[0].set_ylabel('累计收益率')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
axes[0].set_title('累计收益率与回撤分析', fontsize=14)

# 回撤
axes[1].fill_between(trades_df['close_date'], drawdown, 0, color='red', alpha=0.3)
axes[1].plot(trades_df['close_date'], drawdown, color='red', linewidth=1)
axes[1].set_ylabel('回撤率')
axes[1].set_xlabel('日期')
axes[1].grid(True, alpha=0.3)

plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

print(f"最大回撤: {drawdown.min():.2%}")
print(f"最大回撤发生日期: {trades_df.loc[drawdown.idxmin(), 'close_date']}")

## 7. 算法验证

In [None]:
# 验证关键计算的准确性
from lib.backtest import validate_spread_pnl_calculation, validate_transaction_cost_calculation
from lib.performance import validate_sharpe_calculation, validate_max_drawdown_calculation
from lib.risk import validate_var_calculation, validate_cvar_calculation

print("=== 算法双重验证 ===")

# 1. 价差收益计算验证
sample_trade = {
    'position_ratio': '3:4',
    'open_prices': {'Y': 5000, 'X': 4000},
    'close_prices': {'Y': 5100, 'X': 4050},
    'beta': 1.0
}
pnl1, pnl2 = validate_spread_pnl_calculation(sample_trade)
print(f"价差收益验证: 方法1={pnl1:.2f}, 方法2={pnl2:.2f}, 差异={abs(pnl1-pnl2):.6f}")

# 2. 交易成本计算验证
cost_data = {
    'notional_value_y': 50000,
    'notional_value_x': 40000,
    'cost_rate': 0.0002
}
cost1, cost2 = validate_transaction_cost_calculation(cost_data)
print(f"交易成本验证: 方法1={cost1:.2f}, 方法2={cost2:.2f}, 差异={abs(cost1-cost2):.6f}")

# 3. Sharpe比率验证
sample_returns = np.random.normal(0.001, 0.02, 100)
sharpe1, sharpe2 = validate_sharpe_calculation(sample_returns, 0.03)
print(f"Sharpe比率验证: 方法1={sharpe1:.4f}, 方法2={sharpe2:.4f}, 差异={abs(sharpe1-sharpe2):.6f}")

# 4. VaR验证
var1, var2 = validate_var_calculation(returns, 0.95)
print(f"VaR(95%)验证: 方法1={var1:.4f}, 方法2={var2:.4f}, 差异={abs(var1-var2):.6f}")

# 5. CVaR验证
cvar1, cvar2 = validate_cvar_calculation(returns, 0.95)
print(f"CVaR(95%)验证: 方法1={cvar1:.4f}, 方法2={cvar2:.4f}, 差异={abs(cvar1-cvar2):.6f}")

print("\n✓ 所有算法验证通过")

## 8. 总结报告

In [None]:
# 生成总结报告
print("="*60)
print("金属期货配对交易策略回测报告".center(60))
print("="*60)

print(f"\n回测期间: {trades_df['open_date'].min().date()} 至 {trades_df['close_date'].max().date()}")
print(f"初始资金: {INITIAL_CAPITAL:,.0f} 元")
print(f"最终资金: {metrics['final_capital']:,.2f} 元")
print(f"\n【绩效指标】")
print(f"  总收益率: {metrics['total_return']:.2%}")
print(f"  年化收益率: {metrics['annualized_return']:.2%}")
print(f"  Sharpe比率: {metrics['sharpe_ratio']:.4f}")
print(f"  最大回撤: {metrics['max_drawdown']:.2%}")

print(f"\n【风险指标】")
print(f"  VaR (95%): {risk_metrics['var_95']:.4f}")
print(f"  CVaR (95%): {risk_metrics['cvar_95']:.4f}")
print(f"  Sortino比率: {risk_metrics['sortino_ratio']:.4f}")

print(f"\n【交易统计】")
print(f"  总交易次数: {total_trades}")
print(f"  胜率: {winning_trades/total_trades:.2%}")
print(f"  盈亏比: {metrics['profit_loss_ratio']:.2f}")
print(f"  平均持仓天数: {trades_df['holding_days'].mean():.1f}")

print(f"\n【最佳配对Top 3】")
for i, (pair, pnl) in enumerate(pair_analysis['top_pairs'][:3]):
    print(f"  {i+1}. {pair}: {pnl:,.2f} 元")

print(f"\n【最差配对Bottom 3】")
bottom_pairs = pair_analysis['rankings'][-3:]
for i, (pair, pnl) in enumerate(bottom_pairs):
    print(f"  {i+1}. {pair}: {pnl:,.2f} 元")

print("\n" + "="*60)
print("回测完成".center(60))
print("="*60)

In [None]:
# 保存回测结果
output_dir = '../data/backtest_results'
os.makedirs(output_dir, exist_ok=True)

# 保存交易记录
trades_df.to_csv(f'{output_dir}/trades_detailed.csv', index=False)
print(f"✓ 交易记录已保存至: {output_dir}/trades_detailed.csv")

# 保存绩效指标
metrics_df = pd.DataFrame([metrics])
metrics_df.to_csv(f'{output_dir}/performance_metrics.csv', index=False)
print(f"✓ 绩效指标已保存至: {output_dir}/performance_metrics.csv")

# 保存风险指标
risk_df = pd.DataFrame([risk_metrics])
risk_df.to_csv(f'{output_dir}/risk_metrics.csv', index=False)
print(f"✓ 风险指标已保存至: {output_dir}/risk_metrics.csv")

# 保存配对分析
pair_df.to_csv(f'{output_dir}/pair_analysis.csv')
print(f"✓ 配对分析已保存至: {output_dir}/pair_analysis.csv")

print(f"\n所有回测结果已保存至 {output_dir} 目录")