In [1]:
import pandas as pd
import numpy as np
import pyTSL as pt
import os
import datetime as dt
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
from scipy import stats
import matplotlib.pyplot as plt
from Config import factor_config as fc
from Config import performance_config as pc

head_future = pd.read_excel(os.path.join(fc.head_path, 'IC_head_future.xlsx')) # 每日主流合约

In [2]:
def read_file(date):
    '''
    根据日期读取对应主头合约文件
    '''
    read_date = date.strftime('%Y%m%d')
    next_idx = head_future[head_future['trade_date'] == date].index
    if next_idx == 0:
        return None
    read_future = head_future.iloc[next_idx - 1]['IC'].values[0]
    read_file = read_future + '_' + read_date + '.tdf'
    return read_file


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def cal_weighted_IC(x: pd.Series, y: pd.Series, weight:pd.Series) -> pd.Series:
    """
    计算加权IC
    :param x: 序列x
    :param y: 序列y
    :return: weighted_ic
    """
    weight = weight / weight.sum()
    weight_cov = (weight * x * y).sum() - (weight * x).sum() * (weight * y).sum()
    weight_sigma1 = ((weight * x ** 2).sum() - (weight * x).sum() ** 2) ** (1 / 2)
    weight_sigma2 = ((weight * y ** 2).sum() - (weight * y).sum() ** 2) ** (1 / 2)
    weighted_ic = weight_cov / (weight_sigma1 * weight_sigma2)
    return weighted_ic


def run(start_date, end_date):

    # 记录结果
    multi_index = pd.MultiIndex.from_tuples(
        [('因子', '因子名'), ('因子', '方向'), ('因子', '预测周期'), ('因子', '因子自相关性'),
        ('RankIC', 'IC均值'), ('RankIC', 'IC_IR'), ('RankIC', 't值'), ('RankIC', '方向延续概率'),
        ('WIC', 'IC均值'), ('WIC', 'IC_IR'), ('WIC', 't值'), ('WIC', '方向延续概率'),
        ])  # 统计的项目
    
    result = pd.DataFrame(index=multi_index, columns=[]).T
    writer = pd.ExcelWriter(os.path.join(pc.result_save_path, '单因子回测结果.xlsx'), engine='openpyxl')

    # 逐因子计算单因子表现
    for f_ in pc.factor_list:
        
        # 读取因子数据
        factor_data = pd.read_pickle(os.path.join(fc.factor_save_path, '%s.pkl' % f_))

        # 分离时间和日期
        factor_data['date_date'] = pd.to_datetime(factor_data['minute'].dt.date)
        factor_data['date_time'] = factor_data['minute'].dt.time

        # 读取测试时间段
        file_dates = factor_data['date_date'].drop_duplicates()
        file_dates = file_dates[(file_dates > start_date) & (file_dates <= end_date)]
        file_dates = file_dates.reset_index(drop=True)

        # 记录因子逐日数据
        day_result = pd.DataFrame(columns=['minute', 'T', 'IC', 'WIC', 'selfCorr', \
                                           '1', '2', '3', '4', '5', \
                                           '6', '7', '8', '9', '10'])

        # 逐日计算单因子表现
        for d_ in tqdm(file_dates, desc=f_):
            
            # 选取当日数据
            f_path = read_file(d_)
            f_return = pd.read_pickle(os.path.join(fc.kline_data, f_path))[['date', 'price', 'vol']]
            f_return['date'] = f_return.apply(lambda x: pt.DoubleToDatetime(x['date']), axis=1)
            f_return['minute'] = f_return['date'].dt.floor('T')
            f_data = factor_data[factor_data['date_date'] == d_]

            # 合并每分钟数据
            f_return['amount'] = f_return['price'] * f_return['vol']
            twap = f_return.groupby('minute').apply(lambda x: x['amount'].sum() / x['vol'].sum())
            twap = twap.dropna()
            twap = pd.DataFrame(twap, columns=['twap'])

            # 合并因子数据
            f_data = pd.merge(f_data, twap, on=['minute'])

            # 计算因子自相关性
            f_data['f_s1'] = f_data[f_].shift(1)
            self_corr = f_data[f_].corr(f_data['f_s1'], method='spearman')

            # 计算各个周期IC, WIC
            for t_ in pc.t:

                # 计算周期涨跌幅
                f_data[f'yield_{t_}s'] = f_data['twap'].pct_change(periods=t_)
                f_data[f'weight_{t_}s'] = sigmoid(f_data[f'yield_{t_}s'].abs() * 100) ** 0.5
                tmp = f_data[[f_, f'yield_{t_}s', f'weight_{t_}s']]
                tmp = tmp.dropna()

                # 计算指标
                ic = tmp[f'yield_{t_}s'].corr(tmp[f_], method='spearman')
                wic = cal_weighted_IC(f_data[f_], f_data[f'yield_{t_}s'], f_data[f'weight_{t_}s'])

                # 计算十分组图
                group_data = f_data.sort_values(f_, ascending=False)
                g1 = group_data[f'yield_{t_}s'].iloc[: int(len(f_data) * 0.1)]
                g2 = group_data[f'yield_{t_}s'].iloc[int(len(f_data) * 0.1): int(len(f_data) * 0.2)]
                g3 = group_data[f'yield_{t_}s'].iloc[int(len(f_data) * 0.2): int(len(f_data) * 0.3)]
                g4 = group_data[f'yield_{t_}s'].iloc[int(len(f_data) * 0.3): int(len(f_data) * 0.4)]
                g5 = group_data[f'yield_{t_}s'].iloc[int(len(f_data) * 0.4): int(len(f_data) * 0.5)]
                g6 = group_data[f'yield_{t_}s'].iloc[int(len(f_data) * 0.5): int(len(f_data) * 0.6)]
                g7 = group_data[f'yield_{t_}s'].iloc[int(len(f_data) * 0.6): int(len(f_data) * 0.7)]
                g8 = group_data[f'yield_{t_}s'].iloc[int(len(f_data) * 0.7): int(len(f_data) * 0.8)]
                g9 = group_data[f'yield_{t_}s'].iloc[int(len(f_data) * 0.8): int(len(f_data) * 0.9)]
                g10 = group_data[f'yield_{t_}s'].iloc[int(len(f_data) * 0.9): ]

                # 保存结果
                day_result.loc[len(day_result)] = [d_, t_, ic, wic, self_corr, \
                                                   g1.mean(), g2.mean(), g3.mean(), g4.mean(), g5.mean(), \
                                                   g6.mean(), g7.mean(), g8.mean(), g9.mean(), g10.mean()]
        

        # 计算单因子表现
        for t_ in pc.t:
            day_result_t = day_result[day_result['T'] == t_]

            # 因子指标
            f_name = f_
            f_direction = pc.factor_dict[f_]
            f_t = t_
            f_corr = day_result_t['selfCorr'].mean()

            # IC指标
            f_ic_mean = day_result_t['IC'].mean()
            f_icir = day_result_t['IC'].mean() / day_result_t['IC'].std()
            f_ic_tvalue = stats.ttest_1samp(day_result_t['IC'], 0, nan_policy='omit')[0]
            f_ic_direction_rate = (day_result_t['IC'] * day_result_t['IC'].shift(-1) > 0).mean()

            # WIC指标
            f_wic_mean = day_result_t['WIC'].mean()
            f_wicir = day_result_t['WIC'].mean() / day_result_t['WIC'].std()
            f_wic_tvalue = stats.ttest_1samp(day_result_t['WIC'], 0, nan_policy='omit')[0]
            f_wic_direction_rate = (day_result_t['WIC'] * day_result_t['WIC'].shift(-1) > 0).mean()

            # 记录结果
            result.loc[len(result)] = [
                f_name, f_direction, f_t, f_corr,
                f_ic_mean, f_icir, f_ic_tvalue, f_ic_direction_rate,
                f_wic_mean, f_wicir, f_wic_tvalue, f_wic_direction_rate
            ]

            # 绘制十分组图
            yield_groups = day_result_t[['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']]
            yield_groups = yield_groups.mean()
            yield_groups.plot(kind='bar')
            plt.savefig(os.path.join(pc.fig_save_path, f'{f_}_{t_}min.png'))
            plt.clf()


    result.to_excel(writer)
    writer.close()


if __name__ == '__main__':

    run(pc.start_date, pc.end_date)

alpha001_start15_corr10: 100%|██████████| 645/645 [01:59<00:00,  5.41it/s]
alpha002_start15: 100%|██████████| 645/645 [01:40<00:00,  6.41it/s]
alpha003_start15_roll10: 100%|██████████| 645/645 [01:39<00:00,  6.47it/s]
alpha004_start15_roll2_roll8: 100%|██████████| 645/645 [01:34<00:00,  6.82it/s]
alpha005_start15_roll5: 100%|██████████| 645/645 [01:16<00:00,  8.42it/s]
alpha006_start15_diff5: 100%|██████████| 645/645 [01:16<00:00,  8.44it/s]
alpha007_start15_diff5: 100%|██████████| 645/645 [01:16<00:00,  8.45it/s]
alpha008_start15_diff5: 100%|██████████| 645/645 [01:15<00:00,  8.51it/s]
alpha009_start15_alpha0.29: 100%|██████████| 645/645 [01:16<00:00,  8.44it/s]
alpha010_start15_roll10: 100%|██████████| 645/645 [01:18<00:00,  8.25it/s]
alpha011_start15_roll10: 100%|██████████| 645/645 [01:19<00:00,  8.11it/s]
alpha012_start15_roll10: 100%|██████████| 645/645 [01:15<00:00,  8.55it/s]
alpha013_start15: 100%|██████████| 645/645 [01:16<00:00,  8.40it/s]
alpha014_start15_shift10: 100%|████

<Figure size 640x480 with 0 Axes>