# 特征构造与测试

本文档将叙述构造高频特征并快速测试的一般流程

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import importlib
import sys

# 将你的本地路径添加在下面
sys.path.append('C:/Users/HBG/Desktop/Repositories/High-Frequency-Predictor')
sys.path.append('C:/Users/Administrator/Desktop/Repositories/High-Frequency-Predictor')

from hfp.hfp import HFP

In [9]:
import hfp.hfp
importlib.reload(hfp.hfp)

from hfp.hfp import HFP

#### 初始化测试器

In [7]:
data_path = 'D:/Documents/学习资料/HFData'  # 原始数据的路径
stock_num = 10  # 同时测试的股票数量。一只股票大约需要1G内存，请根据内存大小与cpu算力自行选择
hfp = HFP(data_path=data_path, stock_num=stock_num)

### 快速构造测试特征

#### 例子：反转
在以下例子中，我们测试了负的过去30s的平均收益率对下一个三秒的中间价收益率的预测力，对于000002来说，时序平均相关系数为0.0632，为正的天数比例为0.8807，这说明高频收益率有较强的反转效应。通过调整shift字段可以查看对未来不同时间跨度的收益率的预测力。

In [19]:
fml = 'neg{tsmean{ret,10}}'  # 具体的公式，字段意义详见文档
stats = hfp.test_factor(fml, shift=1)  # shift表明对下几个三秒的收益率进行预测
print()

corr = []
for key, value in stats.items():
    corr.append(value[0].mean_corr)
print(np.mean(corr))

000002.XSHE mean corr: 0.0632, positive_corr_ratio: 0.8807, corr_IR: 1.2437
000009.XSHE mean corr: 0.0985, positive_corr_ratio: 0.9506, corr_IR: 1.8338
000012.XSHE mean corr: 0.1222, positive_corr_ratio: 0.9876, corr_IR: 2.5295
000016.XSHE mean corr: 0.0698, positive_corr_ratio: 0.8285, corr_IR: 0.9644
000019.XSHE mean corr: 0.0528, positive_corr_ratio: 0.8174, corr_IR: 0.7558
000021.XSHE mean corr: -0.0045, positive_corr_ratio: 0.5041, corr_IR: -0.0778
000031.XSHE mean corr: 0.1435, positive_corr_ratio: 1.0000, corr_IR: 3.9561
000034.XSHE mean corr: 0.0049, positive_corr_ratio: 0.5892, corr_IR: 0.0961
000061.XSHE mean corr: 0.0835, positive_corr_ratio: 0.8880, corr_IR: 1.3340
000063.XSHE mean corr: -0.0552, positive_corr_ratio: 0.2058, corr_IR: -0.7580

0.05786547435551289


#### 例子：买卖价差

In [21]:
fml = 'tsmean{minus{BidSize1,OfferSize1},10}' 
stats = hfp.test_factor(fml, shift=1)
print()
corr = []
for key, value in stats.items():
    corr.append(value[0].mean_corr)
print(np.mean(corr))

000002.XSHE mean corr: 0.0359, positive_corr_ratio: 0.9918, corr_IR: 2.1950
000009.XSHE mean corr: 0.0548, positive_corr_ratio: 0.9959, corr_IR: 3.1099
000012.XSHE mean corr: 0.0574, positive_corr_ratio: 1.0000, corr_IR: 3.6174
000016.XSHE mean corr: 0.0578, positive_corr_ratio: 0.9958, corr_IR: 2.6814
000019.XSHE mean corr: 0.0528, positive_corr_ratio: 0.9917, corr_IR: 2.3405
000021.XSHE mean corr: 0.0286, positive_corr_ratio: 0.9587, corr_IR: 1.6675
000031.XSHE mean corr: 0.0605, positive_corr_ratio: 1.0000, corr_IR: 4.0425
000034.XSHE mean corr: 0.0331, positive_corr_ratio: 0.9585, corr_IR: 1.6277
000061.XSHE mean corr: 0.0555, positive_corr_ratio: 1.0000, corr_IR: 3.5302
000063.XSHE mean corr: 0.0216, positive_corr_ratio: 0.8848, corr_IR: 1.2420

0.04582037351130905


#### 例子2：买卖档相关系数

In [14]:
fml = 'tscorr{BidSize1,OfferSize1,10}' 
stats = hfp.test_factor(fml, shift=1)

000002.XSHE mean corr: 0.0008, positive_corr_ratio: 0.5267, corr_IR: 0.0661
000009.XSHE mean corr: 0.0014, positive_corr_ratio: 0.5144, corr_IR: 0.1046
000012.XSHE mean corr: -0.0002, positive_corr_ratio: 0.4813, corr_IR: -0.0135
000016.XSHE mean corr: -0.0014, positive_corr_ratio: 0.4895, corr_IR: -0.0991
000019.XSHE mean corr: -0.0003, positive_corr_ratio: 0.5353, corr_IR: -0.0118
000021.XSHE mean corr: -0.0003, positive_corr_ratio: 0.4711, corr_IR: -0.0221
000031.XSHE mean corr: 0.0008, positive_corr_ratio: 0.5289, corr_IR: 0.0516
000034.XSHE mean corr: 0.0013, positive_corr_ratio: 0.5602, corr_IR: 0.1043
000061.XSHE mean corr: 0.0001, positive_corr_ratio: 0.5270, corr_IR: 0.0038
000063.XSHE mean corr: 0.0027, positive_corr_ratio: 0.5761, corr_IR: 0.1920
