In [1]:
import sys
sys.path.append('/public/src')
from factor_evaluation_server import FactorEvaluation,DataService # type: ignore
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

In [2]:
ds=DataService()
df=ds['ETHUSDT_15m_2020_2025']['2021-10-01':]

In [3]:
evaluator=FactorEvaluation(df=df,future_return_periods=10)

# 定义因子！

In [4]:
def factor(df, halflife=47):
    df = df.copy()
    
    # 1. 基础指标计算（同方案1）
    df['range'] = (df['high'] - df['low']) / df['open']
    df['fractal'] = df['range'].rolling(5).std() / df['range'].rolling(20).std().replace(0, 1e-5)
    df['is_up'] = (df['close'] > df['open']).astype(int)
    df['is_down'] = (df['close'] < df['open']).astype(int)
    
    # 2. 连续重心计算（同方案1）
    weights_up = df['fractal'] * df['volume'] * df['is_up']
    weights_down = df['fractal'] * df['volume'] * df['is_down']
    
    df['cum_weight_up'] = weights_up.cumsum()
    df['cum_weighted_idx_up'] = (pd.Series(range(len(df)), index=df.index) * weights_up).cumsum()
    df['G_u'] = df['cum_weighted_idx_up'] / df['cum_weight_up']
    
    df['cum_weight_down'] = weights_down.cumsum()
    df['cum_weighted_idx_down'] = (pd.Series(range(len(df)), index=df.index) * weights_down).cumsum()
    df['G_d'] = df['cum_weighted_idx_down'] / df['cum_weight_down']
    
    # 3. 指数加权滚动回归
    df['avg_fractal'] = df['fractal'].rolling(24, min_periods=12).mean()
    regression_data = np.full(len(df), np.nan)
    
    # 计算指数权重
    decay_rate = 1 - np.exp(np.log(0.5) / halflife)
    weights = np.zeros(len(df))
    for i in range(len(df)):
        weights[i] = (1 - decay_rate) ** (len(df) - 1 - i)
    
    # 指数加权回归
    for i in range(100, len(df)):
        start = max(0, i - 200)  # 最大回溯200期
        window_indices = range(start, i)
        window_weights = weights[window_indices]
        
        X_window = df[['G_u', 'avg_fractal']].iloc[window_indices].values
        y_window = df['G_d'].iloc[window_indices].values
        
        valid_mask = ~np.isnan(X_window).any(axis=1) & ~np.isnan(y_window)
        X_valid = X_window[valid_mask]
        y_valid = y_window[valid_mask]
        w_valid = window_weights[valid_mask]
        
        if len(X_valid) < 10:
            continue
            
        # 加权线性回归
        W = np.diag(w_valid)
        XTW = X_valid.T @ W
        try:
            coeffs = np.linalg.inv(XTW @ X_valid) @ XTW @ y_valid
            pred = df[['G_u', 'avg_fractal']].iloc[i].values @ coeffs
            residual = df['G_d'].iloc[i] - pred
            regression_data[i] = residual * df['avg_fractal'].iloc[i]
        except np.linalg.LinAlgError:
            continue
    
    # 4. 因子中性化处理
    factor_series = pd.Series(-regression_data, index=df.index)
    
    # 市场中性化
    market_return = np.log(df['close']).diff().fillna(0)
    neutral_model = LinearRegression()
    for i in range(5000, len(df)):
        if i % 1000 == 0:  # 每1000期重新拟合
            X_neutral = market_return.iloc[:i].values.reshape(-1, 1)
            y_neutral = factor_series.iloc[:i].values
            neutral_model.fit(X_neutral, y_neutral)
        
        factor_series.iloc[i] -= neutral_model.predict([[market_return.iloc[i]]])[0]
    
    # 标准化处理
    rolling_std = factor_series.rolling(5000).std().replace(0, 1)
    factor_series = factor_series / rolling_std
    
    return factor_series.fillna(0)

# 测试因子表现

In [5]:
evaluator.set_factor(
    factor_func=lambda df: factor(df),
    factor_name='factor'
)

result=evaluator.run_full_evaluation(run_stationarity_test=False)

ValueError: 因子函数执行失败: Input y contains NaN.

In [None]:
result['information_ratio']['group_correlations']
# 这表示不同分组之间的相关性，通常用于评估因子在不同市场状态下的表现一致性。
# 如果相关性较高，说明因子在不同市场状态下表现一致；如果相关性较低，说明因子在不同市场状态下表现差异较大。

[-0.01690796479513809,
 0.017082174178146198,
 0.0015363590771937847,
 0.0048825823401946414,
 -0.0102667659819026,
 0.009478070331710262,
 0.0028196744056216494,
 0.04323663360510031]

In [None]:
print(result)

{'correlation_analysis': {'IC': 0.02731286940144614, 'Rank_IC': 0.009464318952115096}, 'information_ratio': {'IR': 0.3790177409362612, 'group_correlations': [-0.01690796479513809, 0.017082174178146198, 0.0015363590771937847, 0.0048825823401946414, -0.0102667659819026, 0.009478070331710262, 0.0028196744056216494, 0.04323663360510031], 'n_groups': 8}, 'group_analysis': {'group_stats':        val_min  val_max  val_mean  return_mean  return_std  count
group                                                            
0     -29.6643  -7.8114  -10.8188      -0.0003      0.0134   6552
1      -7.8113  -5.4966   -6.5002      -0.0005      0.0127   6552
2      -5.4966  -4.2037   -4.8063      -0.0003      0.0123   6552
3      -4.2037  -3.2444   -3.7009      -0.0000      0.0112   6551
4      -3.2443  -2.4907   -2.8543       0.0001      0.0113   6552
5      -2.4904  -1.8699   -2.1712       0.0001      0.0114   6552
6      -1.8697   0.0000   -0.1080      -0.0002      0.0111  49503
7       1.6002   1.8