In [1]:
import sys
sys.path.append('/public/src')
from factor_evaluation_server import FactorEvaluation,DataService # type: ignore
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

In [2]:
ds=DataService()
df=ds['ETHUSDT_15m_2020_2025']['2021-10-01':]

In [3]:
evaluator=FactorEvaluation(df=df,future_return_periods=10)

# 定义因子！

In [4]:
def factor(df, regime_threshold=0.3):
    df = df.copy()
    
    # 1. 识别市场状态
    df['log_ret'] = np.log(df['close']).diff()
    df['volatility'] = df['log_ret'].rolling(20*96).std()
    df['vol_regime'] = (df['volatility'] > df['volatility'].quantile(0.7)).astype(int)
    
    # 计算价格趋势
    df['ma_fast'] = df['close'].rolling(50).mean()
    df['ma_slow'] = df['close'].rolling(200).mean()
    df['trend_regime'] = (df['ma_fast'] > df['ma_slow']).astype(int)
    
    # 2. 基础指标计算
    df['range'] = (df['high'] - df['low']) / df['open']
    df['fractal'] = df['range'].rolling(5).std() / df['range'].rolling(20).std().replace(0, 1e-5)
    df['is_up'] = (df['close'] > df['open']).astype(int)
    df['is_down'] = (df['close'] < df['open']).astype(int)
    
    # 3. 状态自适应重心计算
    def regime_aware_center(df, col_prefix):
        df[f'{col_prefix}_G_u'] = np.nan
        df[f'{col_prefix}_G_d'] = np.nan
        
        for regime in [0, 1]:
            regime_mask = (df['vol_regime'] == regime) & (df['trend_regime'] == regime)
            weights_up = df['fractal'] * df['volume'] * df['is_up'] * regime_mask
            weights_down = df['fractal'] * df['volume'] * df['is_down'] * regime_mask
            
            cum_weight_up = weights_up.cumsum()
            cum_weighted_idx_up = (pd.Series(range(len(df)), index=df.index) * weights_up).cumsum()
            
            cum_weight_down = weights_down.cumsum()
            cum_weighted_idx_down = (pd.Series(range(len(df)), index=df.index) * weights_down).cumsum()
            
            df.loc[regime_mask, f'{col_prefix}_G_u'] = cum_weighted_idx_up / cum_weight_up
            df.loc[regime_mask, f'{col_prefix}_G_d'] = cum_weighted_idx_down / cum_weight_down
        
        return df
    
    df = regime_aware_center(df, 'low_vol')
    df = regime_aware_center(df, 'high_vol')
    
    # 4. 状态自适应因子计算
    factor_series = pd.Series(0, index=df.index)
    
    for i in range(200, len(df)):
        regime = int(df['vol_regime'].iloc[i] and df['trend_regime'].iloc[i])
        col_prefix = 'high_vol' if regime else 'low_vol'
        
        # 使用状态特定的变量
        G_u = df[f'{col_prefix}_G_u'].iloc[i]
        G_d = df[f'{col_prefix}_G_d'].iloc[i]
        avg_fractal = df['avg_fractal'].iloc[i]
        
        if not np.isnan(G_u) and not np.isnan(G_d):
            # 简化的因子计算（避免滚动回归）
            imbalance = G_u - G_d
            factor_value = imbalance * avg_fractal
            
            # 状态内标准化
            regime_mask = (df['vol_regime'] == regime) & (df['trend_regime'] == regime)
            regime_mean = factor_series[regime_mask].rolling(1000).mean().iloc[i]
            regime_std = factor_series[regime_mask].rolling(1000).std().iloc[i]
            
            if regime_std > 1e-5:
                factor_value = (factor_value - regime_mean) / regime_std
            
            factor_series.iloc[i] = -factor_value
    
    return factor_series.fillna(0)

# 测试因子表现

In [5]:
evaluator.set_factor(
    factor_func=lambda df: factor(df),
    factor_name='factor'
)

result=evaluator.run_full_evaluation(run_stationarity_test=False)

ValueError: 因子函数执行失败: 'avg_fractal'

In [None]:
result['information_ratio']['group_correlations']
# 这表示不同分组之间的相关性，通常用于评估因子在不同市场状态下的表现一致性。
# 如果相关性较高，说明因子在不同市场状态下表现一致；如果相关性较低，说明因子在不同市场状态下表现差异较大。

[-0.01690796479513809,
 0.017082174178146198,
 0.0015363590771937847,
 0.0048825823401946414,
 -0.0102667659819026,
 0.009478070331710262,
 0.0028196744056216494,
 0.04323663360510031]

In [None]:
print(result)

{'correlation_analysis': {'IC': 0.02731286940144614, 'Rank_IC': 0.009464318952115096}, 'information_ratio': {'IR': 0.3790177409362612, 'group_correlations': [-0.01690796479513809, 0.017082174178146198, 0.0015363590771937847, 0.0048825823401946414, -0.0102667659819026, 0.009478070331710262, 0.0028196744056216494, 0.04323663360510031], 'n_groups': 8}, 'group_analysis': {'group_stats':        val_min  val_max  val_mean  return_mean  return_std  count
group                                                            
0     -29.6643  -7.8114  -10.8188      -0.0003      0.0134   6552
1      -7.8113  -5.4966   -6.5002      -0.0005      0.0127   6552
2      -5.4966  -4.2037   -4.8063      -0.0003      0.0123   6552
3      -4.2037  -3.2444   -3.7009      -0.0000      0.0112   6551
4      -3.2443  -2.4907   -2.8543       0.0001      0.0113   6552
5      -2.4904  -1.8699   -2.1712       0.0001      0.0114   6552
6      -1.8697   0.0000   -0.1080      -0.0002      0.0111  49503
7       1.6002   1.8