# 新β算法配对交易回测

## 目标
- 使用改进的β算法（名义价值×波动率对齐）重新计算配对交易信号
- 执行完整回测并与原有方法对比
- 验证新算法的实际交易效果

In [None]:
# 导入必要库
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

from lib.beta_calculator import calculate_volatility_adjusted_beta, batch_calculate_improved_beta
from lib.strategy import KalmanFilter, generate_signals
from configs.contract_specs import CONTRACT_SPECS

print('✅ 环境配置完成')
print('🎯 准备使用新β算法进行回测')

## 1. 重新计算所有配对的β系数

In [None]:
# 1. 加载原始信号和价格数据
print('🔄 加载原始数据...')
original_signals = pd.read_parquet('../data/signals/kalman_signals_final.parquet')
pairs = original_signals['pair'].unique()

print(f'原始配对数: {len(pairs)} 个')
print(f'配对列表: {list(pairs)}')

# 2. 为每个配对计算新的β系数
print('\n🧮 使用新β算法重新计算配对系数...')

new_beta_results = []

for pair in pairs:
    try:
        y_symbol, x_symbol = pair.split('-')
        
        # 加载价格数据
        y_data = pd.read_parquet(f'../data/futures/{y_symbol}.parquet')
        x_data = pd.read_parquet(f'../data/futures/{x_symbol}.parquet')
        
        # 使用90天数据计算β (更稳定)
        y_prices = y_data['close'].tail(90).values
        x_prices = x_data['close'].tail(90).values
        
        # 对齐数据
        min_len = min(len(y_prices), len(x_prices))
        y_prices = y_prices[-min_len:]
        x_prices = x_prices[-min_len:]
        
        # 计算新β
        beta_result = calculate_volatility_adjusted_beta(y_prices, x_prices, y_symbol, x_symbol)
        
        # 获取原始β进行对比
        original_beta = original_signals[original_signals['pair'] == pair]['beta'].iloc[0]
        original_ratio = original_signals[original_signals['pair'] == pair]['position_ratio'].iloc[0]
        
        new_beta_results.append({
            'pair': pair,
            'y_symbol': y_symbol,
            'x_symbol': x_symbol,
            'original_beta': original_beta,
            'original_ratio': original_ratio,
            'new_beta': beta_result['beta'],
            'new_ratio': beta_result['position_ratio'],
            'y_notional': beta_result['y_notional'],
            'x_notional': beta_result['x_notional'],
            'y_volatility': beta_result['y_volatility'],
            'x_volatility': beta_result['x_volatility'],
            'risk_weighted_y': beta_result['risk_weighted_y'],
            'risk_weighted_x': beta_result['risk_weighted_x'],
            'explanation': beta_result['explanation']
        })
        
        print(f'✅ {pair}: {original_beta:.3f} → {beta_result["beta"]:.3f} ({original_ratio} → {beta_result["position_ratio"]})')
        
    except Exception as e:
        print(f'❌ {pair}: 计算失败 - {e}')

# 转换为DataFrame
new_beta_df = pd.DataFrame(new_beta_results)
print(f'\n✅ 新β计算完成: {len(new_beta_df)} 个配对')

## 2. β系数变化分析

In [None]:
# 分析β系数的变化情况
print('📊 β系数变化详细分析')
print('=' * 80)

if len(new_beta_df) > 0:
    # 计算变化幅度
    new_beta_df['beta_change'] = new_beta_df['new_beta'] - new_beta_df['original_beta']
    new_beta_df['beta_change_pct'] = (new_beta_df['beta_change'] / new_beta_df['original_beta'] * 100).round(1)
    
    # 计算风险平衡度
    new_beta_df['risk_balance'] = np.minimum(
        new_beta_df['risk_weighted_y'], 
        new_beta_df['risk_weighted_x']
    ) / np.maximum(
        new_beta_df['risk_weighted_y'], 
        new_beta_df['risk_weighted_x']
    )
    
    # 按变化幅度排序
    new_beta_df_sorted = new_beta_df.sort_values('beta_change_pct', key=abs, ascending=False)
    
    print('配对        | 原β      | 新β      | 变化    | 原比例 | 新比例 | 风险平衡度')
    print('-' * 80)
    
    for _, row in new_beta_df_sorted.iterrows():
        change_symbol = '📈' if row['beta_change'] > 0 else '📉' if row['beta_change'] < 0 else '➡️'
        print(f'{row["pair"]:10s} | {row["original_beta"]:7.3f} | {row["new_beta"]:7.3f} | '
              f'{change_symbol}{row["beta_change_pct"]:+5.1f}% | {row["original_ratio"]:5s} | '
              f'{row["new_ratio"]:5s} | {row["risk_balance"]:8.3f}')
    
    # 统计分析
    avg_change = new_beta_df['beta_change_pct'].abs().mean()
    significant_changes = (new_beta_df['beta_change_pct'].abs() > 20).sum()
    avg_risk_balance = new_beta_df['risk_balance'].mean()
    
    print(f'\n📊 变化统计:')  
    print(f'  平均变化幅度: {avg_change:.1f}%')
    print(f'  显著变化配对: {significant_changes}/{len(new_beta_df)} ({significant_changes/len(new_beta_df)*100:.0f}%)')
    print(f'  平均风险平衡度: {avg_risk_balance:.3f}')
    
    # 保存新β结果
    new_beta_df.to_csv('../data/signals/new_beta_results.csv', index=False)
    print(f'\n💾 新β结果已保存: data/signals/new_beta_results.csv')
else:
    print('❌ 没有成功计算出新β结果')

print('\n' + '=' * 80)

## 3. 生成基于新β的交易信号

In [None]:
# 生成基于新β算法的交易信号
print('🔄 生成基于新β算法的交易信号...')
print('=' * 80)

# 加载全部价格数据
all_symbols = ['AG0', 'AU0', 'AL0', 'CU0', 'NI0', 'PB0', 'SN0', 'ZN0', 
               'HC0', 'I0', 'RB0', 'SF0', 'SM0', 'SS0']

price_data = {}
for symbol in all_symbols:
    try:
        df = pd.read_parquet(f'../data/futures/{symbol}.parquet')
        df['date'] = pd.to_datetime(df['date'])
        df = df.set_index('date')['close']
        price_data[symbol] = df
        print(f'✅ {symbol}: {len(df)} 天数据')
    except Exception as e:
        print(f'❌ {symbol}: 加载失败 - {e}')

# 创建对齐的价格矩阵
aligned_data = pd.DataFrame(price_data)
aligned_data = aligned_data.dropna()

print(f'\n📊 对齐后数据: {len(aligned_data)} 天，{len(aligned_data.columns)} 个品种')
print(f'数据期间: {aligned_data.index.min().date()} 至 {aligned_data.index.max().date()}')

# 为每个配对生成新的信号
new_signals_all = []

for _, beta_row in new_beta_df.iterrows():
    pair = beta_row['pair']
    y_symbol = beta_row['y_symbol']
    x_symbol = beta_row['x_symbol']
    new_beta = beta_row['new_beta']
    new_ratio = beta_row['new_ratio']
    
    print(f'\n🔄 生成{pair}的新信号 (β={new_beta:.3f}, 比例={new_ratio})')
    
    try:
        # 获取价格序列
        y_prices = aligned_data[y_symbol]
        x_prices = aligned_data[x_symbol]
        
        # 使用Kalman滤波器计算动态hedge ratio
        kf = KalmanFilter()
        
        # 初始化参数（使用新β作为初始值）
        kf.x = np.array([[new_beta], [0]])  # [beta, beta_dot]
        
        hedge_ratios = []
        spreads = []
        
        for i in range(len(y_prices)):
            if i == 0:
                hedge_ratios.append(new_beta)
                spreads.append(0)
                continue
            
            # 更新Kalman滤波器
            observation = np.array([[y_prices.iloc[i] / x_prices.iloc[i]]])
            kf.update(observation)
            
            current_beta = float(kf.x[0, 0])
            hedge_ratios.append(current_beta)
            
            # 计算价差（使用新β）
            spread = y_prices.iloc[i] - current_beta * x_prices.iloc[i]
            spreads.append(spread)
        
        # 生成交易信号
        signals_df = pd.DataFrame({
            'date': y_prices.index,
            'y_price': y_prices.values,
            'x_price': x_prices.values,
            'beta': hedge_ratios,
            'spread': spreads
        })
        
        # 使用现有的信号生成逻辑
        pair_signals = generate_signals(
            y_prices.values,
            x_prices.values, 
            y_symbol,
            x_symbol,
            hedge_ratios,
            lookback=60,
            entry_threshold=1.5,
            exit_threshold=0.5
        )
        
        # 更新β和比例
        for signal in pair_signals:
            signal['beta'] = new_beta
            signal['position_ratio'] = new_ratio
            signal['beta_method'] = 'volatility_adjusted'
        
        new_signals_all.extend(pair_signals)
        
        print(f'  ✅ 生成{len(pair_signals)}个信号')
        
    except Exception as e:
        print(f'  ❌ {pair}信号生成失败: {e}')

print(f'\n✅ 新信号生成完成: {len(new_signals_all)} 个信号')

# 转换为DataFrame并保存
new_signals_df = pd.DataFrame(new_signals_all)
if len(new_signals_df) > 0:
    new_signals_df.to_parquet('../data/signals/new_beta_signals.parquet', index=False)
    print(f'💾 新信号已保存: data/signals/new_beta_signals.parquet')
    
    # 统计新信号
    open_count = len(new_signals_df[new_signals_df['action'] == 'open'])
    close_count = len(new_signals_df[new_signals_df['action'] == 'close'])
    print(f'📈 新信号统计: {open_count}个开仓, {close_count}个平仓')
else:
    print('❌ 没有生成有效的新信号')

## 4. 执行新β算法回测

In [None]:
# 执行基于新β算法的回测
print('🚀 执行新β算法回测...')
print('=' * 80)

if len(new_signals_df) > 0:
    # 使用现有的回测引擎，但用新的信号数据
    
    # 设置回测参数
    initial_capital = 5000000  # 500万
    margin_rate = 0.12
    stop_loss_rate = 0.10
    commission_rate = 0.0002
    
    # 模拟回测执行（简化版）
    trades_new_beta = []
    current_positions = {}
    total_pnl = 0
    
    # 按日期排序处理信号
    trading_signals = new_signals_df[new_signals_df['action'].isin(['open', 'close'])]
    trading_signals = trading_signals.sort_values('date')
    
    print(f'处理交易信号: {len(trading_signals)} 个')
    
    for _, signal in trading_signals.iterrows():
        date = signal['date']
        pair = signal['pair']
        action = signal['action']
        
        if action == 'open' and pair not in current_positions:
            # 开仓
            y_symbol, x_symbol = pair.split('-')
            
            # 获取配对的新β信息
            pair_beta_info = new_beta_df[new_beta_df['pair'] == pair].iloc[0]
            
            # 获取开仓价格
            try:
                y_price = aligned_data.loc[date, y_symbol]
                x_price = aligned_data.loc[date, x_symbol]
            except:
                continue
            
            # 解析新的仓位比例
            ratio_parts = pair_beta_info['new_ratio'].split(':')
            y_ratio = int(ratio_parts[0])
            x_ratio = int(ratio_parts[1])
            
            # 计算名义价值和保证金
            y_notional = y_price * y_ratio * CONTRACT_SPECS[y_symbol]['multiplier']
            x_notional = x_price * x_ratio * CONTRACT_SPECS[x_symbol]['multiplier']
            total_notional = y_notional + x_notional
            total_margin = total_notional * margin_rate
            
            # 记录持仓
            current_positions[pair] = {
                'y_symbol': y_symbol,
                'x_symbol': x_symbol,
                'y_open': y_price,
                'x_open': x_price,
                'y_ratio': y_ratio,
                'x_ratio': x_ratio,
                'open_date': date,
                'total_margin': total_margin,
                'total_notional': total_notional,
                'y_multiplier': CONTRACT_SPECS[y_symbol]['multiplier'],
                'x_multiplier': CONTRACT_SPECS[x_symbol]['multiplier']
            }
        
        elif action == 'close' and pair in current_positions:
            # 平仓
            position = current_positions[pair]
            
            # 获取平仓价格
            try:
                y_close = aligned_data.loc[date, position['y_symbol']]
                x_close = aligned_data.loc[date, position['x_symbol']]
            except:
                continue
            
            # 计算PnL
            y_pnl = (y_close - position['y_open']) * position['y_ratio'] * position['y_multiplier']
            x_pnl = -(x_close - position['x_open']) * position['x_ratio'] * position['x_multiplier']
            gross_pnl = y_pnl + x_pnl
            commission = position['total_notional'] * commission_rate * 2
            net_pnl = gross_pnl - commission
            
            # 记录交易
            trade_record = {
                'pair': pair,
                'open_date': position['open_date'],
                'close_date': date,
                'holding_days': (pd.to_datetime(date) - pd.to_datetime(position['open_date'])).days,
                'y_symbol': position['y_symbol'],
                'x_symbol': position['x_symbol'],
                'y_open': position['y_open'],
                'x_open': position['x_open'],
                'y_close': y_close,
                'x_close': x_close,
                'y_ratio': position['y_ratio'],
                'x_ratio': position['x_ratio'],
                'y_pnl': y_pnl,
                'x_pnl': x_pnl,
                'gross_pnl': gross_pnl,
                'commission': commission,
                'net_pnl': net_pnl,
                'beta_method': 'volatility_adjusted'
            }
            
            trades_new_beta.append(trade_record)
            total_pnl += net_pnl
            
            # 移除持仓
            del current_positions[pair]
    
    print(f'\n✅ 新β算法回测完成:')
    print(f'  执行交易: {len(trades_new_beta)} 笔')
    print(f'  总净PnL: ¥{total_pnl:,.2f}')
    print(f'  投资回报率: {total_pnl/initial_capital*100:.2f}%')
    
    if len(trades_new_beta) > 0:
        # 保存新回测结果
        trades_new_df = pd.DataFrame(trades_new_beta)
        trades_new_df.to_parquet('../data/backtest/new_beta_backtest_results.parquet', index=False)
        print(f'💾 新回测结果已保存: data/backtest/new_beta_backtest_results.parquet')
        
        # 基本统计
        win_rate_new = (trades_new_df['net_pnl'] > 0).mean() * 100
        avg_holding_new = trades_new_df['holding_days'].mean()
        max_profit_new = trades_new_df['net_pnl'].max()
        max_loss_new = trades_new_df['net_pnl'].min()
        
        print(f'\n📊 新β回测基本统计:')
        print(f'  胜率: {win_rate_new:.1f}%')
        print(f'  平均持仓: {avg_holding_new:.1f} 天')
        print(f'  最大盈利: ¥{max_profit_new:,.2f}')
        print(f'  最大亏损: ¥{max_loss_new:,.2f}')
    
else:
    print('❌ 无法执行回测，缺少新信号数据')

print('\n' + '=' * 80)

## 5. 新旧β方法回测结果对比

In [None]:
# 对比新旧β方法的回测结果
print('📊 新旧β方法回测结果详细对比')
print('=' * 100)

try:
    # 加载原回测结果
    original_trades = pd.read_parquet('../data/backtest/enhanced_trades_margin_stop.parquet')
    vnpy_trades = pd.read_csv('../data/backtest/vnpy_exact_execution.csv')
    
    # 加载新β回测结果
    if len(trades_new_beta) > 0:
        new_beta_trades = pd.DataFrame(trades_new_beta)
        
        # 计算对比指标
        comparison_metrics = {
            '指标': ['交易笔数', '总净PnL', '胜率(%)', '平均持仓(天)', '最大盈利', '最大亏损', 'ROI(%)'],
            '原回测(传统β)': [
                len(original_trades),
                f'¥{original_trades["net_pnl"].sum():,.0f}',
                f'{(original_trades["net_pnl"] > 0).mean() * 100:.1f}',
                f'{original_trades["holding_days"].mean():.1f}',
                f'¥{original_trades["net_pnl"].max():,.0f}',
                f'¥{original_trades["net_pnl"].min():,.0f}',
                f'{original_trades["net_pnl"].sum()/5000000*100:.2f}'
            ],
            'VnPy验证': [
                len(vnpy_trades),
                f'¥{vnpy_trades["net_pnl"].sum():,.0f}',
                f'{(vnpy_trades["net_pnl"] > 0).mean() * 100:.1f}',
                f'{vnpy_trades["holding_days"].mean():.1f}',
                f'¥{vnpy_trades["net_pnl"].max():,.0f}',
                f'¥{vnpy_trades["net_pnl"].min():,.0f}',
                f'{vnpy_trades["net_pnl"].sum()/5000000*100:.2f}'
            ],
            '新β算法': [
                len(new_beta_trades),
                f'¥{new_beta_trades["net_pnl"].sum():,.0f}',
                f'{(new_beta_trades["net_pnl"] > 0).mean() * 100:.1f}',
                f'{new_beta_trades["holding_days"].mean():.1f}',
                f'¥{new_beta_trades["net_pnl"].max():,.0f}',
                f'¥{new_beta_trades["net_pnl"].min():,.0f}',
                f'{new_beta_trades["net_pnl"].sum()/5000000*100:.2f}'
            ]
        }
        
        comparison_df = pd.DataFrame(comparison_metrics)
        print(comparison_df.to_string(index=False))
        
        # 计算改进效果
        original_pnl = original_trades['net_pnl'].sum()
        new_pnl = new_beta_trades['net_pnl'].sum()
        pnl_improvement = (new_pnl - original_pnl) / original_pnl * 100
        
        print(f'\n🎯 关键改进指标:')
        print(f'  PnL改进: ¥{new_pnl - original_pnl:+,.0f} ({pnl_improvement:+.1f}%)')
        
        original_win_rate = (original_trades['net_pnl'] > 0).mean() * 100
        new_win_rate = (new_beta_trades['net_pnl'] > 0).mean() * 100
        win_rate_improvement = new_win_rate - original_win_rate
        print(f'  胜率改进: {win_rate_improvement:+.1f}%')
        
        # 风险指标对比
        original_sharpe = calculate_sharpe_ratio(original_trades['net_pnl'])
        new_sharpe = calculate_sharpe_ratio(new_beta_trades['net_pnl'])
        print(f'  夏普比率: {original_sharpe:.3f} → {new_sharpe:.3f} ({new_sharpe-original_sharpe:+.3f})')
        
        print(f'\n🏆 新β算法评估:')
        if pnl_improvement > 5:
            print('  ✅ 显著改进 - 建议立即采用新算法')
        elif pnl_improvement > 0:
            print('  ✅ 略有改进 - 可以考虑采用新算法')
        else:
            print('  ⚠️ 需要进一步优化 - 分析具体原因')
    
    else:
        print('❌ 无法进行对比，缺少新β回测结果')
        
except Exception as e:
    print(f'❌ 对比分析失败: {e}')
    import traceback
    traceback.print_exc()

def calculate_sharpe_ratio(pnl_series, risk_free_rate=0.03):
    """计算夏普比率"""
    if len(pnl_series) == 0:
        return 0
    
    returns = pnl_series / 5000000  # 转换为收益率
    excess_return = returns.mean() * 252 - risk_free_rate  # 年化超额收益
    volatility = returns.std() * np.sqrt(252)  # 年化波动率
    
    return excess_return / volatility if volatility > 0 else 0

print('\n' + '=' * 100)
print('🎉 新β算法回测对比分析完成')
print('=' * 100)