In [1]:
import sys
sys.path.append('/public/src')
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import os
from sklearn.linear_model import LinearRegression

from factor_evaluation_server import FactorEvaluation, DataService
ds = DataService()
df = ds['ETHUSDT_15m_2020_2025']['2021-10-01':]

In [2]:
path="/public/data/factor_data/ETH_15m_factor_data.txt"
factors=pd.read_csv(path, sep='|')
factors.head()

Unnamed: 0.1,Unnamed: 0,ret_bollinger_rsi_signals,c_chu016,c_chu009,c_chu011,ret_hv_ratio_signals,ret_vao_signals,c_chu028,ret_ma120_cci_cross_sig_price,ret_ma20_volume_cross_signals,...,c_chu059,ret_mfi_sig_price,c_hide_009,c_chu044,c_hide_014,ret_rsi_ma120_cross_sig_price,c_hide_006,c_chu037,c_chu004,c_hide_010
0,2019-12-31 16:00:00,0,,,-0.0235,0,0,,0,0,...,,0,0.0,7111059.2053,0.0,0,0.0,,,0.0
1,2019-12-31 16:15:00,0,,-0.0,0.065,0,0,,0,0,...,,0,0.0,2926088.3661,0.0,0,0.0,0.0,,0.0
2,2019-12-31 16:30:00,0,,-0.0001,0.3217,0,0,,0,0,...,,0,0.0,2166516.5598,0.0,0,0.0,0.0001,,0.0
3,2019-12-31 16:45:00,0,,0.0,0.1571,0,0,,0,0,...,,0,0.0,135422.1719,0.0027,0,0.0,0.0,,0.0
4,2019-12-31 17:00:00,0,,-0.0001,-0.085,0,0,,0,0,...,,0,0.0,12405.6168,0.0008,0,0.0,0.0001,,0.0


In [3]:
for i in list(factors.columns):
    print(i)

Unnamed: 0
ret_bollinger_rsi_signals
c_chu016
c_chu009
c_chu011
ret_hv_ratio_signals
ret_vao_signals
c_chu028
ret_ma120_cci_cross_sig_price
ret_ma20_volume_cross_signals
c_chu026
c_chu015
c_chu029
c_chu012
c_chu058
c_hide_028
c_chu045
ret_ma_bbi_rsi_sig_price
c_chu023
c_hide_001
c_hide_032
c_chu019
c_chu032
c_chu049
c_chu051
c_hide_008
ret_macd_sig_price
c_chu022
ret_ma_arrangement_sig
ret_ma120_bolling_cross_sig_price
c_hide_019
c_hide_026
c_chu033
c_hide_030
ret_skdj_sig_price
ret_ma120_bbi_signals
c_chu043
ret_williams_r_sig_price
c_chu042
ret_ma20_rsi_macd_cross_sig_price
c_chu053
c_chu005
c_chu052
ret_cci_fibonacci_signals
ret_ma_cci_sig
c_chu021
c_hide_005
ret_ao_signals
ret_ma_atr_cross_sig_price
c_chu006
c_hide_021
c_hide_029
ret_rma_cross_sig_price
c_hide_002
ret_rsi_bb_ma_signal
c_chu010
c_chu048
c_chu041
c_hide_016
ret_kc_strategy
c_chu047
c_chu056
ret_ma120_macd_1_cross_sig_price
c_chu025
c_hide_011
c_chu018
c_hide_018
c_hide_031
c_chu054
ret_td_signals
c_chu001
c_chu040
c_

In [4]:
factors.index

RangeIndex(start=0, stop=192421, step=1)

In [5]:
def factor(df, window=20):
    """
    改进：引入分形市场理论，识别不同波动周期中的重心变化
    金融意义：捕捉市场结构变化，识别趋势转折点
    """
    df = df.copy()
    # 计算波动率分形维度
    df['range'] = (df['high'] - df['low']) / df['open']
    df['fractal'] = df['range'].rolling(5).std() / df['range'].rolling(20).std()
    
    daily_results = []
    for date, group in df.groupby(pd.Grouper(freq='D')):
        if len(group) < 4:
            continue
            
        time_idx = np.arange(len(group))
        # 分形维度加权
        fractal_weights = group['fractal'].values
        
        # 上涨K线分形加权重心
        up_mask = group['close'] > group['open']
        if up_mask.any():
            up_weights = fractal_weights[up_mask] * group.loc[up_mask, 'volume']
            G_u = np.sum(time_idx[up_mask] * up_weights) / up_weights.sum()
        else:
            G_u = np.nan
            
        # 下跌K线分形加权重心
        down_mask = group['close'] < group['open']
        if down_mask.any():
            down_weights = fractal_weights[down_mask] * group.loc[down_mask, 'volume']
            G_d = np.sum(time_idx[down_mask] * down_weights) / down_weights.sum()
        else:
            G_d = np.nan
            
        daily_results.append({
            'date': date,
            'G_u': G_u,
            'G_d': G_d,
            'avg_fractal': fractal_weights.mean()
        })
    
    daily_df = pd.DataFrame(daily_results).set_index('date')
    
    # 分形状态感知回归
    residuals = []
    for i in range(window, len(daily_df)):
        train_data = daily_df.iloc[i-window:i].dropna()
        if len(train_data) < 10:
            residuals.append(np.nan)
            continue
            
        # 分形状态作为交互项
        X = train_data[['G_u', 'avg_fractal']].values
        y = train_data['G_d'].values
        model = LinearRegression().fit(X, y)
        
        current = daily_df.iloc[i]
        if pd.isna(current['G_u']) or pd.isna(current['G_d']):
            residuals.append(np.nan)
        else:
            pred_G_d = model.predict([[current['G_u'], current['avg_fractal']]])[0]
            residuals.append(current['G_d'] - pred_G_d)
    
    daily_df = daily_df.iloc[window:]
    daily_df['residual'] = residuals
    
    # 分形波动调整
    fractal_level = daily_df['avg_fractal'].rolling(5).mean()
    daily_df['factor'] = daily_df['residual'] * fractal_level
    
    df['factor_value'] = np.nan
    for date, row in daily_df.iterrows():
        mask = (df.index.date == date.date())
        df.loc[mask, 'factor_value'] = row['factor']
    
    return (-df['factor_value']).clip(upper=12)

In [6]:
sig=factor(df)

In [7]:
# sig的index修改为arrange
sig = sig.reset_index(drop=True)

In [8]:
factors['sig']=sig

In [9]:
corr_matrix=factors.corr()

In [10]:
corr_matrix.iloc[-1,:]

# 我想看到这一行的全部数值
corr_matrix.iloc[-1,:].sort_values(ascending=False)

sig                  1.0000
c_chu056             0.0180
c_chu059             0.0169
c_chu058             0.0080
c_chu010             0.0070
                      ...  
low                 -0.0171
c_chu023            -0.0171
c_chu019            -0.0174
c_chu028            -0.0192
ret_stc_sig_price       NaN
Name: sig, Length: 142, dtype: float64