In [1]:
import sys
sys.path.append('/public/src')
from factor_evaluation_server import FactorEvaluation,DataService # type: ignore
import numpy as np
import pandas as pd
import math
from scipy.special import gamma

In [2]:
ds=DataService()
df=ds['ETHUSDT_15m_2020_2025']['2021-10-01':]

In [3]:
evaluator=FactorEvaluation(df=df,future_return_periods=10)

# 定义因子！

In [4]:
def factor(df):
    """
    计算TSRJV_BNS因子 (基于BNS检验的加权跳跃波动不对称)
    公式：TSRJV = [Σ(T_{BNS,t}/Φ_{1-α}^{-1} * SRJV_t)] / [Σ(T_{BNS,t}/Φ_{1-α}^{-1})]
    其中 Φ_{1-α}^{-1} = 2.33 (99%置信水平)
    """
    # 计算对数收益率
    close_prices = df['close'].values
    log_returns = np.log(close_prices[1:]) - np.log(close_prices[:-1])
    
    # 常数
    mu1 = math.sqrt(2/math.pi)
    mu1_inv_sq = 1 / (mu1 ** 2)
    mu_4_3 = (2**(2/3)) * gamma(7/6) / gamma(0.5)
    critical_value = norm.ppf(0.99)  # Φ_{1-α}^{-1} = 2.33
    
    # 初始化结果数组
    tsrjv_bns_values = np.full(len(df), np.nan)
    
    # 观察期长度 (30天 = 2880根K线)
    observation_period = 2880
    
    for i in range(observation_period, len(df)):
        start_idx = i - observation_period + 1
        end_idx = i
        window_returns = log_returns[start_idx:end_idx]
        
        numerator = 0
        denominator = 0
        
        # 存储每天的SRJV和T_BNS
        daily_srjv = []
        daily_t_bns = []
        
        # 计算每天的SRJV和T_BNS
        for day_start in range(0, len(window_returns), 96):
            day_end = min(day_start + 96, len(window_returns))
            if day_end - day_start < 10:
                continue
                
            daily_returns = window_returns[day_start:day_end]
            n = len(daily_returns)
            
            # 计算BV (积分波动率估计)
            abs_returns = np.abs(daily_returns)
            bv = mu1_inv_sq * (n/(n-1)) * np.sum(abs_returns[:-1] * abs_returns[1:])
            
            # 计算正向和负向波动
            positive_returns = daily_returns[daily_returns > 0]
            negative_returns = daily_returns[daily_returns < 0]
            rv_plus = np.sum(positive_returns ** 2)
            rv_minus = np.sum(negative_returns ** 2)
            
            # 计算RJVP和RJVN
            rjvp = max(rv_plus - bv/2, 0)
            rjvn = max(rv_minus - bv/2, 0)
            
            # 计算SRJV
            srjv = rjvp - rjvn
            daily_srjv.append(srjv)
            
            # 计算T_BNS统计量
            rv = np.sum(daily_returns ** 2)
            tp = 0
            for j in range(2, n):
                term = np.abs(daily_returns[j-2])**(4/3) * np.abs(daily_returns[j-1])**(4/3) * np.abs(daily_returns[j])**(4/3)
                tp += term
            tp = (mu_4_3**(-3)) * (n**2/(n-2)) * tp
            
            denom = ((math.pi/2)**2 + math.pi - 5) * (1/n) * max(1, tp/(bv**2))
            t_bns = (1 - bv/rv) / math.sqrt(denom) if denom > 0 else 0
            daily_t_bns.append(t_bns)
        
        # 计算加权平均值
        for t_bns, srjv in zip(daily_t_bns, daily_srjv):
            weight = t_bns / critical_value
            numerator += weight * srjv
            denominator += weight
        
        if denominator > 0:
            tsrjv_bns_values[i] = numerator / denominator
    
    return pd.Series(tsrjv_bns_values, index=df.index)

# 测试因子表现

In [5]:
evaluator.set_factor(
    factor_func=lambda df: factor(df),
    factor_name='factor'
)

result=evaluator.run_full_evaluation(run_stationarity_test=True)

ValueError: 因子函数执行失败: name 'norm' is not defined

In [None]:
from tabulate import tabulate

def display_factor_evaluation(result):
    # 1. 基础统计指标
    basic_metrics = [
        ["信息系数(IC)", result['correlation_analysis']['IC']],
        ["秩相关系数(Rank IC)", result['correlation_analysis']['Rank_IC']],
        ["信息比率(IR)", result['information_ratio']['IR']],
        ["分组数量", result['information_ratio']['n_groups']],
        ["数据点数", f"{result['factor_return_scatter']['data_points']:,}"],
        ["异常值(>3σ)", f"{result['factor_distribution']['outlier_analysis']['>3std']*100:.2f}%"],
        ["异常值(>5σ)", f"{result['factor_distribution']['outlier_analysis']['>5std']*100:.2f}%"]
    ]
    
    # 2. 分组分析
    group_stats = result['group_analysis']['group_stats']
    group_table = []
    for idx, row in group_stats.iterrows():
        group_table.append([
            idx,
            f"{row['val_min']:.4f}",
            f"{row['val_max']:.4f}",
            f"{row['val_mean']:.4f}",
            f"{row['return_mean']:.6f}",
            f"{row['return_std']:.4f}",
            row['count']
        ])
    
    # 3. 因子分布描述
    dist_stats = result['factor_distribution']['descriptive_stats']
    distribution_metrics = [
        ["均值", f"{dist_stats['mean']:.4f}"],
        ["标准差", f"{dist_stats['std']:.4f}"],
        ["最小值", f"{dist_stats['min']:.4f}"],
        ["1%分位数", f"{dist_stats['1%']:.4f}"],
        ["5%分位数", f"{dist_stats['5%']:.4f}"],
        ["25%分位数", f"{dist_stats['25%']:.4f}"],
        ["中位数", f"{dist_stats['50%']:.4f}"],
        ["75%分位数", f"{dist_stats['75%']:.4f}"],
        ["95%分位数", f"{dist_stats['95%']:.4f}"],
        ["99%分位数", f"{dist_stats['99%']:.4f}"],
        ["最大值", f"{dist_stats['max']:.4f}"]
    ]
    
    # 4. 分组相关性
    group_correlations = []
    for i, corr in enumerate(result['information_ratio']['group_correlations']):
        group_correlations.append([f"组 {i+1}", f"{corr:.6f}"])
    
    # 5. 滚动IC分析 - 完整展示
    rolling_ic = result['rolling_ic_analysis']['rolling_ic']
    rolling_rank_ic = result['rolling_ic_analysis']['rolling_rank_ic']
    window_centers = result['rolling_ic_analysis']['window_centers']
    cumsum_ic = result['rolling_ic_analysis']['cumsum_ic']
    cumsum_rank_ic = result['rolling_ic_analysis']['cumsum_rank_ic']
    
    # 创建滚动IC数据表格
    rolling_table = []
    for i in range(len(rolling_ic)):
        rolling_table.append([
            window_centers[i].strftime('%Y-%m-%d %H:%M:%S'),
            f"{rolling_ic[i]:.6f}",
            f"{rolling_rank_ic[i]:.6f}",
            f"{cumsum_ic[i]:.6f}",
            f"{cumsum_rank_ic[i]:.6f}"
        ])
    
    # 滚动IC汇总统计
    rolling_metrics = [
        ["滚动IC均值", f"{pd.Series(rolling_ic).mean():.6f}"],
        ["滚动IC标准差", f"{pd.Series(rolling_ic).std():.6f}"],
        ["滚动Rank IC均值", f"{pd.Series(rolling_rank_ic).mean():.6f}"],
        ["滚动Rank IC标准差", f"{pd.Series(rolling_rank_ic).std():.6f}"],
        ["累积IC终值", f"{cumsum_ic[-1]:.6f}"],
        ["累积Rank IC终值", f"{cumsum_rank_ic[-1]:.6f}"],
        ["初始窗口大小", result['rolling_ic_analysis']['initial_window']],
        ["滚动步长", result['rolling_ic_analysis']['rolling_step']]
    ]
    
    # 6. 时间窗口统计
    window_stats = []
    window_data = result['factor_distribution']['window_stats'][10]
    
    # 正确处理时间窗口统计数据
    if isinstance(window_data['stats'], pd.DataFrame):
        # 如果是DataFrame结构
        for idx, row in window_data['stats'].iterrows():
            window_stats.append([
                idx,
                f"{row['mean']:.4f}",
                f"{row['std']:.4f}",
                f"{row['skew']:.4f}",
                f"{row['lambda']:.4f}",
                row['count']
            ])
    else:
        # 如果是字典结构
        for time_range, stats in window_data['stats'].items():
            window_stats.append([
                time_range,
                f"{stats['mean']:.4f}",
                f"{stats['std']:.4f}",
                f"{stats['skew']:.4f}",
                f"{stats['lambda']:.4f}",
                stats['count']
            ])
    
    # # 7. 平稳性测试结果
    # stationarity = result['stationarity_test']
    # stationarity_metrics = [
    #     ["ADF统计量", stationarity['adf_statistic']],
    #     ["P值", stationarity['p_value']],
    #     ["1%临界值", stationarity['critical_values']['1%']],
    #     ["5%临界值", stationarity['critical_values']['5%']],
    #     ["10%临界值", stationarity['critical_values']['10%']],
    #     ["是否平稳", stationarity['is_stationary']],
    #     #["显著性水平", stationarity['alpha']]
    # ]
    
    # 打印所有结果
    print("="*80)
    print("因子评估核心指标".center(80))
    print("="*80)
    print(tabulate(basic_metrics, headers=["指标", "值"], tablefmt="grid", floatfmt=".6f"))
    
    # print("\n" + "="*80)
    # print("平稳性测试结果".center(80))
    # print("="*80)
    # print(tabulate(stationarity_metrics, headers=["指标", "值"], tablefmt="grid", floatfmt=".6f"))
    
    print("\n" + "="*80)
    print("分组分析结果".center(80))
    print("="*80)
    print(tabulate(group_table, headers=["分组", "最小值", "最大值", "平均值", "收益均值", "收益标准差", "样本数"], tablefmt="grid"))
    
    print("\n" + "="*80)
    print("因子分布统计".center(80))
    print("="*80)
    print(tabulate(distribution_metrics, headers=["统计量", "值"], tablefmt="grid", floatfmt=".4f"))
    
    print("\n" + "="*80)
    print("分组相关性".center(80))
    print("="*80)
    print(tabulate(group_correlations, headers=["分组", "相关性"], tablefmt="grid", floatfmt=".6f"))
    
    print("\n" + "="*80)
    print("滚动IC分析汇总".center(80))
    print("="*80)
    print(tabulate(rolling_metrics, headers=["指标", "值"], tablefmt="grid", floatfmt=".6f"))
    
    print("\n" + "="*80)
    print("滚动IC详细数据".center(80))
    print("="*80)
    print(tabulate(rolling_table, headers=["窗口中心", "滚动IC", "滚动Rank IC", "累积IC", "累积Rank IC"], tablefmt="grid"))
    
    print("\n" + "="*80)
    print("时间窗口统计".center(80))
    print("="*80)
    print(tabulate(window_stats, headers=["时间窗口", "均值", "标准差", "偏度", "Lambda", "样本数"], tablefmt="grid"))
    print(f"时间范围: {window_data['time_ranges']}")

# 调用展示函数
display_factor_evaluation(result)

                                    因子评估核心指标                                    
+---------------------+-------------------------+
| 指标                | 值                      |
| 信息系数(IC)        | 0.00889821625585493     |
+---------------------+-------------------------+
| 秩相关系数(Rank IC) | -0.00042223381382338026 |
+---------------------+-------------------------+
| 信息比率(IR)        | nan                     |
+---------------------+-------------------------+
| 分组数量            | 8                       |
+---------------------+-------------------------+
| 数据点数            | 129,326                 |
+---------------------+-------------------------+
| 异常值(>3σ)         | 8.50%                   |
+---------------------+-------------------------+
| 异常值(>5σ)         | 0.31%                   |
+---------------------+-------------------------+

                                     分组分析结果                                     
+--------+----------+----------+----------+------------+-----------