在沙盒环境，通过寻优算法对因子进行深度挖掘，并通过自定义评估方式筛选表现较好

In [1]:
import os,pdb,itertools,copy,datetime
os.environ['ULTRON_DATA'] = 'keim'

In [2]:
import random
import numpy as np
import pandas as pd
from ultron.env import *
from ultron.factor.genetic.geneticist.operators import custom_transformer
from ultron.factor.genetic.geneticist.engine import Engine

/var/log/ultron/2022-09-27.log


In [3]:
enable_example_env()

2022-09-27 19:23:23,279 - [env.py:67] - ultron - INFO - enable example env will only read /home/kerry/ultron/rom/sandbox/keim


#### 加载行情数据

In [4]:
market_data = pd.read_csv(os.path.join(g_project_data, 'market_data.csv'), index_col=0)
market_data['trade_date'] = pd.to_datetime(market_data['trade_date'])
market_data.head()

Unnamed: 0,trade_date,code,openPrice,highestPrice,lowestPrice,closePrice,turnoverVol
0,2017-10-27,A,4462.578191,4463.801485,4413.646412,4435.665713,158774
1,2017-10-27,AL,15625.658581,15658.904663,15430.931529,15449.92929,293630
2,2017-10-27,BU,3310.339921,3336.950371,3283.729472,3302.356787,461826
3,2017-10-27,C,2009.751001,2014.561895,2001.331936,2002.53466,375480
4,2017-10-27,CF,20517.496003,20531.174333,20408.069357,20449.104349,84032


#### 收益率计算

In [5]:
def next_returs_impl(price_data, key, name):
    price_tb = price_data[key].unstack()
    price_tb.fillna(method='pad', inplace=True)
    return_tb = np.log(price_tb.shift(-1) / price_tb)
    return_tb = return_tb.replace([np.inf, -np.inf], np.nan)
    return_tb = return_tb.stack().reindex(price_data.index)
    return_tb.name = name
    return return_tb

In [6]:
next_rets = next_returs_impl(
    market_data.set_index(['trade_date','code']),'closePrice','nxt1_ret').reset_index()
next_rets['trade_date'] = pd.to_datetime(next_rets['trade_date'])
next_rets.head()

Unnamed: 0,trade_date,code,nxt1_ret
0,2017-10-27,A,-0.000276
1,2017-10-27,AL,0.001222
2,2017-10-27,BU,0.019945
3,2017-10-27,C,-0.00542
4,2017-10-27,CF,0.001003


#### 选择中因子

In [7]:
sel_factor = pd.read_csv(os.path.join(g_project_data, 'sel_factor.csv'), index_col=0)
sel_factor.head()

Unnamed: 0,factor,window,weekday,bins
0,BM_MainFar_80D,23,5,5
1,BM_MainFar_80D,25,5,5
2,BM_MainFar_80D,27,5,5
3,BM_RecentFar_20D,5,1,5
4,BM_RecentFar_40D,3,1,3


#### 读取因子

In [8]:
total_data = pd.read_csv(os.path.join(g_project_data, 'factor.csv'), index_col=0)
factor_data = total_data[['trade_date','code'] + sel_factor['factor'].unique().tolist()]
factor_data['trade_date'] = pd.to_datetime(factor_data['trade_date'])
factor_data.head()

Unnamed: 0,trade_date,code,BM_MainFar_80D,BM_RecentFar_20D,BM_RecentFar_40D,BM_RecentFar_80D,BM_RecentSecond_20D,BM_RecentSecond_40D,B_FarSpot,B_MainSpot,...,TS_MainFar,TS_RecentFar,TS_RecentSecond,T_DnIntraday_5D,T_DnVolatility_1_10D,T_DnVolatility_2_20D,WeightNetIntTotalChg5D,WeightShortVolRelTotIntChg,inventory,profitratio
0,2017-10-27,A,-0.033259,-0.026646,-0.019436,-0.041974,-0.023047,-0.013509,-0.042729,0.002378,...,-0.05761,-0.079619,-0.104757,-0.00835,-0.007715,-0.002168,-0.000633,-0.037579,,
1,2017-10-27,AL,-0.001423,0.001697,-0.000937,0.000587,0.001133,-0.000539,-0.076121,-0.084726,...,-0.069381,-0.068413,-0.067663,-0.005843,-0.008381,0.000165,-0.000352,-0.012891,-173.600006,-0.005896
2,2017-10-27,BU,-0.016537,0.059635,-0.032271,-0.034618,0.069999,-0.027086,-0.124574,-0.321128,...,-0.102761,-0.124225,-0.159247,-0.005098,-0.009538,0.001268,0.002481,0.275875,,0.055222
3,2017-10-27,C,0.007939,-0.005224,-0.014003,0.025361,-0.001541,-0.007955,-0.006522,0.172635,...,-0.063351,-0.072537,-0.092543,-0.002866,-0.003349,-0.000979,0.002547,0.245555,-369.799988,
4,2017-10-27,CF,-0.02396,0.002346,-0.028774,-0.008043,0.004838,-0.009858,0.05664,0.311288,...,-0.025782,-0.024471,-0.003359,-0.003975,-0.004373,-0.00142,-0.000643,-0.131799,-189.899994,0.123392


In [9]:
features = [col for col in factor_data.columns if col not in ['trade_date','code','inventory','profitratio']]

In [10]:
total_data = factor_data.merge(next_rets, on=['trade_date','code'])
total_data.head()

Unnamed: 0,trade_date,code,BM_MainFar_80D,BM_RecentFar_20D,BM_RecentFar_40D,BM_RecentFar_80D,BM_RecentSecond_20D,BM_RecentSecond_40D,B_FarSpot,B_MainSpot,...,TS_RecentFar,TS_RecentSecond,T_DnIntraday_5D,T_DnVolatility_1_10D,T_DnVolatility_2_20D,WeightNetIntTotalChg5D,WeightShortVolRelTotIntChg,inventory,profitratio,nxt1_ret
0,2017-10-27,A,-0.033259,-0.026646,-0.019436,-0.041974,-0.023047,-0.013509,-0.042729,0.002378,...,-0.079619,-0.104757,-0.00835,-0.007715,-0.002168,-0.000633,-0.037579,,,-0.000276
1,2017-10-27,AL,-0.001423,0.001697,-0.000937,0.000587,0.001133,-0.000539,-0.076121,-0.084726,...,-0.068413,-0.067663,-0.005843,-0.008381,0.000165,-0.000352,-0.012891,-173.600006,-0.005896,0.001222
2,2017-10-27,BU,-0.016537,0.059635,-0.032271,-0.034618,0.069999,-0.027086,-0.124574,-0.321128,...,-0.124225,-0.159247,-0.005098,-0.009538,0.001268,0.002481,0.275875,,0.055222,0.019945
3,2017-10-27,C,0.007939,-0.005224,-0.014003,0.025361,-0.001541,-0.007955,-0.006522,0.172635,...,-0.072537,-0.092543,-0.002866,-0.003349,-0.000979,0.002547,0.245555,-369.799988,,-0.00542
4,2017-10-27,CF,-0.02396,0.002346,-0.028774,-0.008043,0.004838,-0.009858,0.05664,0.311288,...,-0.024471,-0.003359,-0.003975,-0.004373,-0.00142,-0.000643,-0.131799,-189.899994,0.123392,0.001003


#### 自定义优秀种群保存函数

In [11]:
def save_model(gen, rootid,  sessionid,  best_programs, custom_params):
    pass

#### 自定义评估因子函数

In [12]:
def evaluation(factor_data, total_data, factor_sets, 
                   custom_params, default_value):
    returns = total_data[['trade_date','code','nxt1_ret']]
    factor_data = factor_data.reset_index()
    dt = factor_data.merge(returns,on=['trade_date','code'])
    factor_ic = dt.groupby(['trade_date']).apply(
        lambda x: x[['transformed', 'nxt1_ret']].corr(method='spearman').values[0, 1])
    ic = factor_ic.mean()
    return abs(ic)
    

#### 选择参与衍生的算子

In [13]:
operators_sets = ['SecurityMovingAverage','SecurityMovingDecay','SecurityMovingMax',
                 'SecurityMovingArgMax','SecurityMovingMin','SecurityMovingArgMin',
                 'SecurityMovingRank','SecurityMovingQuantile','SecurityMovingSum',
                 'SecurityMovingVariance','SecurityMovingStandardDeviation',
                 'SecurityMovingCountedPositive','SecurityMovingPositiveAverage',
                 'SecurityMovingPositiveDifferenceAverage',
                 'SecurityMovingNegativeDifferenceAverage','SecurityMovingRSI',
                 'SecurityDeltaValueHolder','SecurityShiftedValueHolder','SecurityMovingCorrelation']
operators_sets = custom_transformer(operators_sets)

In [14]:
tournament_size = 20 ### 初始种群数量
standard_score = 0.75 ### 筛选评估分

custom_params = {'tournament_size':tournament_size,
                 'standard_score':standard_score,
                 'rootid':'ultron10001'}

In [15]:
gentic = Engine(population_size=50, tournament_size=tournament_size,
               init_depth=2, generations=1000, n_jobs=1,
               stopping_criteria=100, p_crossover=0.1,
               p_point_mutation=0.5, p_subtree_mutation=0.1,
               p_hoist_mutation=0.1, p_point_replace=0.1,
               rootid=custom_params['rootid'], factor_sets=features,
               standard_score=standard_score, operators_set=operators_sets,
               backup_cycle=20, convergence=0.002, fitness=evaluation,
               save_model = save_model,
               custom_params=custom_params)

In [None]:
gentic.train(total_data=total_data)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   48.6s finished
[program.py line:64][INFO] name:ultron_1664277825062645,method:gen,gen:0,formual:SecurityMovingSum(2,SecurityMovingArgMin(10,'R_KbarReverse_2_40D')),fitness:0.004681,identification:87c0ee856d1d0b9af9d13ff800aaf84b
[program.py line:64][INFO] name:ultron_1664277818148191,method:gen,gen:0,formual:SecurityMovingPositiveAverage(10,SecurityMovingArgMax(10,'R_UpVolatility_1_20D')),fitness:0.005246,identification:13bca54b0d5ca4f7a94ed724a26097a2
[program.py line:64][INFO] name:ultron_1664277819672533,method:gen,gen:0,formual:SecurityMovingStandardDeviation(4,SecurityMovingPositiveDifferenceAverage(8,'BM_RecentFar_80D')),fitness:0.005389,identification:e8ed141eda7d3e96f571131d948e2551
[program.py line:64][INFO] name:ultron_1664277812923882,method:gen,gen:0,formual:SecurityMovingMax(4,SecurityMovingArgMax(4,'T_DnVolatility_1_10D')),fitness:0.00561

list index out of range ['TS_RecentFar', 'SecurityDeltaValueHolder', 'R_UpVolatility_1_60D', 'SecurityMovingRank', 'B_SecondSpot']
list index out of range ['R_UpVolatility_1_20D', 'SecurityMovingCountedPositive', 'SecurityMovingMax', 'BM_RecentFar_20D']
list index out of range ['R_UpVolatility_1_40D', 'B_SecondSpot', 'SecurityMovingRank', 'SecurityMovingMax', 'B_SecondSpot']
list index out of range ['T_DnVolatility_1_10D', 'B_RecentSpot', 'TS_RecentFar', 'SecurityMovingDecay', 'SecurityMovingMax', 'TS_MainFar']
list index out of range ['R_DnVolatility_1_5D', 'SecurityMovingPositiveDifferenceAverage', 'SecurityMovingArgMin', 'R_DnVolatility_1_5D']
list index out of range ['R_KbarReverse_2_40D', 'SecurityMovingCountedPositive', 'TS_MainFar', 'TS_MainFar']
list index out of range ['EqualNetIntTotalChg5D', 'SecurityMovingRank', 'B_SecondSpot', 'SecurityDeltaValueHolder', 'B_SecondSpot']
list index out of range ['B_FarSpot', 'SecurityMovingRSI', 'SecurityMovingVariance', 'SecurityMovingArgM

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   38.0s finished
[program.py line:64][INFO] name:ultron_1664277880445995,method:Point Mutation,gen:1,formual:SecurityMovingRSI(6,'TS_MainFar'),fitness:0.006551,identification:23f1a39040bb7e76d0897f2477b1fed3
[program.py line:64][INFO] name:ultron_1664277821681637,method:gen,gen:0,formual:SecurityMovingArgMin(10,SecurityMovingQuantile(2,'BM_RecentSecond_40D')),fitness:0.006793,identification:5523b6c48f134deb41b6f4756c22f5b7
[program.py line:64][INFO] name:ultron_1664277840645475,method:gen,gen:0,formual:SecurityMovingCountedPositive(8,SecurityMovingRSI(2,'BM_RecentFar_80D')),fitness:0.007026,identification:297e0c34950faeff3ee7566d42fb9215
[program.py line:64][INFO] name:ultron_1664277857255457,method:gen,gen:0,formual:SecurityMovingNegativeDifferenceAverage(8,SecurityMovingArgMax(10,'BM_RecentSecond_20D')),fitness:0.007043,identification:bb681b729932b08cc1aa4d919ec5f500
[program.py line:64][INFO] name:ultron_1664277893674795,method:Po

d_value:0.003135,convergence:0.002000,con_time:0
list index out of range ['BM_RecentSecond_20D', 'SecurityMovingRank', 'SecurityMovingRank', 'R_UpVolatility_1_20D', 'B_SecondSpot']
list index out of range ['BM_RecentFar_20D', 'SecurityMovingSum', 'BM_RecentFar_20D', 'SecurityMovingMin', 'BM_RecentFar_20D']
list index out of range ['B_SecondSpot', 'SecurityMovingRank', 'SecurityMovingStandardDeviation', 'SecurityMovingSum', 'B_SecondSpot']
list index out of range ['TS_MainFar', 'SecurityMovingMin', 'SecurityMovingMax', 'TS_MainFar']
list index out of range ['B_SecondSpot', 'SecurityMovingMax', 'SecurityMovingMax', 'TS_MainFar', 'SecurityMovingAverage', 'SecurityMovingSum', 'BM_RecentFar_20D', 'TS_MainFar']
