在沙盒环境下，计算期货品种衍生因子信号

In [1]:
import os,pdb,itertools
os.environ['ULTRON_DATA'] = 'keim'

In [2]:
import pandas as pd
import numpy as np
from ultron.env import *
from ultron.tradingday import *
from ultron.strategy.experimental.single_factor import SingleFactor
from ultron.kdutils.parallel import delayed, Parallel

/var/log/ultron/2022-09-27.log


In [3]:
enable_example_env()

2022-09-27 18:56:41,247 - [env.py:67] - ultron - INFO - enable example env will only read /home/kerry/ultron/rom/sandbox/keim


#### 选择中因子

In [4]:
sel_factor = pd.read_csv(os.path.join(g_project_data, 'sel_factor.csv'), index_col=0)
sel_factor.head()

Unnamed: 0,factor,window,weekday,bins
0,BM_MainFar_80D,23,5,5
1,BM_MainFar_80D,25,5,5
2,BM_MainFar_80D,27,5,5
3,BM_RecentFar_20D,5,1,5
4,BM_RecentFar_40D,3,1,3


#### 读取因子

In [5]:
total_data = pd.read_csv(os.path.join(g_project_data, 'factor.csv'), index_col=0)
factor_data = total_data[['trade_date','code'] + sel_factor['factor'].unique().tolist()]
factor_data.head()

Unnamed: 0,trade_date,code,BM_MainFar_80D,BM_RecentFar_20D,BM_RecentFar_40D,BM_RecentFar_80D,BM_RecentSecond_20D,BM_RecentSecond_40D,B_FarSpot,B_MainSpot,...,TS_MainFar,TS_RecentFar,TS_RecentSecond,T_DnIntraday_5D,T_DnVolatility_1_10D,T_DnVolatility_2_20D,WeightNetIntTotalChg5D,WeightShortVolRelTotIntChg,inventory,profitratio
0,2017-10-27,A,-0.033259,-0.026646,-0.019436,-0.041974,-0.023047,-0.013509,-0.042729,0.002378,...,-0.05761,-0.079619,-0.104757,-0.00835,-0.007715,-0.002168,-0.000633,-0.037579,,
1,2017-10-27,AL,-0.001423,0.001697,-0.000937,0.000587,0.001133,-0.000539,-0.076121,-0.084726,...,-0.069381,-0.068413,-0.067663,-0.005843,-0.008381,0.000165,-0.000352,-0.012891,-173.600006,-0.005896
2,2017-10-27,BU,-0.016537,0.059635,-0.032271,-0.034618,0.069999,-0.027086,-0.124574,-0.321128,...,-0.102761,-0.124225,-0.159247,-0.005098,-0.009538,0.001268,0.002481,0.275875,,0.055222
3,2017-10-27,C,0.007939,-0.005224,-0.014003,0.025361,-0.001541,-0.007955,-0.006522,0.172635,...,-0.063351,-0.072537,-0.092543,-0.002866,-0.003349,-0.000979,0.002547,0.245555,-369.799988,
4,2017-10-27,CF,-0.02396,0.002346,-0.028774,-0.008043,0.004838,-0.009858,0.05664,0.311288,...,-0.025782,-0.024471,-0.003359,-0.003975,-0.004373,-0.00142,-0.000643,-0.131799,-189.899994,0.123392


#### 加载行情

In [6]:
market_data = pd.read_csv(os.path.join(g_project_data, 'market_data.csv'), index_col=0)
market_data.head()

Unnamed: 0,trade_date,code,openPrice,highestPrice,lowestPrice,closePrice,turnoverVol
0,2017-10-27,A,4462.578191,4463.801485,4413.646412,4435.665713,158774
1,2017-10-27,AL,15625.658581,15658.904663,15430.931529,15449.92929,293630
2,2017-10-27,BU,3310.339921,3336.950371,3283.729472,3302.356787,461826
3,2017-10-27,C,2009.751001,2014.561895,2001.331936,2002.53466,375480
4,2017-10-27,CF,20517.496003,20531.174333,20408.069357,20449.104349,84032


#### 衍生因子计算

In [7]:
def _build(total_data, market_data, window, weekday, bins, columns):
    sf = SingleFactor(factor_data=None, market_data=None, codes=None, columns=None)
    normalize_data = sf.normalize(factor_data=total_data.dropna(subset=columns), 
                                    windows=window, columns=columns)
    normalize_data = normalize_data.sort_values(by=['trade_date','code'])
    normalize_data['trade_date'] = pd.to_datetime(normalize_data['trade_date'])
    dates = makeSchedule(normalize_data.trade_date.min(), 
                    normalize_data.trade_date.max(),
                    '1b', 'china.sse', BizDayConventions.Preceding)
    dates = [d for d in dates if d.weekday() == (weekday-1)]
    normalize_data = normalize_data.set_index('trade_date').loc[dates].reset_index()
    returns_data = sf.returns(market_data.set_index(['trade_date','code']), period=1)
    period_data = sf._transformer(normalize_data=normalize_data, 
                                    returns=returns_data, columns=columns, 
                                    period='1b')
    period_data['trade_date'] = pd.to_datetime(period_data['trade_date'])
    returns_data['trade_date'] = pd.to_datetime(returns_data['trade_date'])
    total_data = returns_data.merge(period_data, on=['trade_date','code'])

    #res = {}
    res = []
    for column in columns:
        position = sf.quantile(normalize_data=period_data, factor_name=column, n_bins=bins).reset_index()
        position['signal'] = np.where(position['group']==bins, 1, np.where(position['group']==1,
                                               -1,0))
        dct = {'position':position,
                'params':{'window':window,'weekday':weekday,'bins':bins},
                'factor':column}
        res.append(dct)
    return res

In [8]:
def transfer(res):
    new_name = res['factor'] + '_' + str(res['params']['window']) + '_' + str(
        res['params']['weekday']) + '_' + str(res['params']['bins'])
    rt = res['position'][['trade_date','code','signal']].rename(columns={'signal':new_name})
    return rt.set_index(['trade_date','code'])

In [9]:
parallel = Parallel(n_jobs=4, verbose=1, pre_dispatch='2*n_jobs')

#### 计算衍生因子

In [10]:
out = parallel(delayed(_build)(factor_data[['trade_date','code', v['factor']]], market_data,  
                   window=v['window'], weekday=v['weekday'], 
                   bins=v['bins'], columns=[v['factor']]) for v in sel_factor.to_dict(orient='records'))

In [11]:
results = list(itertools.chain.from_iterable(out))

In [12]:
res = [transfer(r) for r in results]

#### 生成信号数据

In [13]:
signal_dt = pd.concat(res, axis=1).reset_index()
signal_dt['trade_date'] = pd.to_datetime(signal_dt['trade_date'])
signal_dt = signal_dt.sort_values(by=['trade_date','code'])
signal_dt.tail()

Unnamed: 0,trade_date,code,BM_MainFar_80D_23_5_5,BM_MainFar_80D_27_5_5,BM_MainFar_80D_25_5_5,BM_RecentFar_20D_5_1_5,BM_RecentFar_40D_9_2_3,BM_RecentFar_40D_3_1_3,BM_RecentFar_40D_11_2_3,BM_RecentFar_40D_13_1_3,...,T_DnVolatility_1_10D_21_1_5,T_DnVolatility_2_20D_25_1_4,T_DnVolatility_2_20D_27_1_4,WeightNetIntTotalChg5D_11_5_3,WeightNetIntTotalChg5D_13_5_3,WeightShortVolRelTotIntChg_9_4_4,inventory_5_3_5,inventory_10_1_5,inventory_15_4_5,profitratio_5_3_5
37156,2022-06-22,TA,-1.0,-1.0,-1.0,0.0,-1.0,1.0,-1.0,0.0,...,0.0,0.0,0.0,-1.0,-1.0,-1.0,0.0,0.0,1.0,0.0
37157,2022-06-22,V,-1.0,-1.0,-1.0,0.0,0.0,1.0,0.0,0.0,...,-1.0,-1.0,-1.0,0.0,0.0,1.0,-1.0,0.0,-1.0,0.0
37158,2022-06-22,Y,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,1.0,1.0,0.0,0.0,-1.0,0.0,1.0
37159,2022-06-22,ZC,1.0,1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,...,1.0,-1.0,-1.0,0.0,0.0,0.0,,,,0.0
37160,2022-06-22,ZN,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0


#### 保存信号数据 用于其他例子使用

In [14]:
signal_dt.to_csv(os.path.join(g_project_data, 'signal.csv'),encoding='UTF-8')