在沙盒环境，构建UMP自定义特征

In [1]:
import os, copy
os.environ['ULTRON_DATA'] = 'minden'
import pandas as pd
import numpy as np

In [2]:
from ultron.ump.trade import ml_feature
from ultron.kdutils.progress import Progress
from ultron.kdutils.parallel import Parallel,delayed
from ultron.ump.trade.ml_feature import FeatureBase, BuyFeatureMixin, SellFeatureMixin
from ultron.ump.technical.vwap import calc_vwap

In [3]:
from ultron.env import *

/var/log/ultron/2022-09-27.log


In [4]:
enable_example_env()

2022-09-27 19:14:54,736 - [env.py:67] - ultron - INFO - enable example env will only read /home/kerry/ultron/rom/sandbox/minden


#### 加载指标数据

In [5]:
indicator_data = pd.read_csv(os.path.join(g_project_data, 'indicator.csv'), index_col=0)
indicator_data['trade_date'] = pd.to_datetime(indicator_data['trade_date'])
indicator_data = indicator_data.rename(columns={'preClosePrice':'pre_close','openPrice':'open',
                        'closePrice':'close','highestPrice':'high','lowestPrice':'low',
                        'turnoverVol':'volume'}).drop(['turnoverValue'],axis=1).set_index(['trade_date'])
indicator_data['date'] = indicator_data.index
indicator_data['p_change'] = indicator_data['p_change'] * 100
indicator_data.head()

Unnamed: 0_level_0,code,pre_close,open,high,low,close,volume,p_change,atr21,atr14,key,date_week,date
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2018-10-15,300002,3.07,3.08,3.12,2.99,3.0,9880442,-2.306527,0.140328,0.140576,328,0,2018-10-15
2018-10-16,300002,3.0,3.02,3.05,2.92,2.93,10977328,-2.360987,0.139836,0.139821,1079,1,2018-10-16
2018-10-17,300002,2.93,2.98,3.05,2.96,3.03,12858515,3.35602,0.138891,0.138405,1830,2,2018-10-17
2018-10-18,300002,3.03,3.02,3.16,2.97,3.08,18707797,1.636698,0.141325,0.142091,2581,3,2018-10-18
2018-10-19,300002,3.08,3.03,3.22,3.03,3.19,18550310,3.509132,0.143643,0.145513,3332,4,2018-10-19


#### 加载交易记录

In [6]:
trader_data = pd.read_csv(os.path.join(g_project_data, 'trader_records.csv'), index_col=0)
trader_data['buy_date'] = pd.to_datetime(trader_data['buy_date'])
trader_data['sell_date'] = pd.to_datetime(trader_data['sell_date'])
trader_data.head()

Unnamed: 0,code,buy_date,buy_cnt,sell_date,buy_price,sell_price
0,300002,2021-03-18,1,2021-04-29,4.704794,4.523262
1,300002,2021-05-07,1,2021-08-31,4.433504,4.797264
2,300009,2018-11-01,1,2019-04-02,9.537966,10.757172
3,300009,2019-10-29,1,2020-02-14,10.516226,10.466948
4,300009,2020-02-18,1,2020-02-21,10.36179,10.53245


#### 自定义构建 VWAP特征

In [7]:
class FeatureVWap(FeatureBase, BuyFeatureMixin, SellFeatureMixin):
    """vwap特征，支持买入，卖出"""

    def __init__(self):
        self.vwap_keys = frozenset([21, 42, 60])

    def get_feature_keys(self, buy_feature):
        return [
            '{}vwap_ang{}'.format(self.feature_prefix(buy_feature=buy_feature),
                                  dk) for dk in self.vwap_keys
        ]

    def calc_feature(self, kl_pd, combine_kl_pd, day_ind, buy_feature):
        """
        根据买入或者卖出时的金融时间序列，以及交易日信息构造拟合波动率特征
        :param kl_pd: 择时阶段金融时间序列
        :param combine_kl_pd: 合并择时阶段之前1年的金融时间序列
        :param day_ind: 交易发生的时间索引，即对应self.kl_pd.key
        :param buy_feature: 是否是买入特征构造（bool）
        :return: 构造波动率特征的键值对字典
        """
        vwap_dict = {}
        for dk in self.vwap_keys:
            # 迭代预设角度周期，计算构建特征
            if day_ind - dk >= 0:
                # 如果择时时间序列够提取特征，使用kl_pd截取特征交易周期收盘价格
                vwap_close = kl_pd[day_ind - dk + 1:day_ind + 1]  #.close
            else:
                # 如果择时时间序列不够提取特征，使用combine_kl_pd截取特征交易周期，首先截取直到day_ind的时间序列
                combine_kl_pd = combine_kl_pd.loc[:kl_pd.index[day_ind]]
                # 如combine_kl_pd长度大于特征周期长度－> 截取combine_kl_pd[-dk:].close，
                # 否则取combine_kl_pd所有交易收盘价格
                vwap_close = combine_kl_pd[
                    -dk:] if combine_kl_pd.shape[0] > dk else combine_kl_pd
            ##
            vwap_price = calc_vwap(vwap_close)
            vwap_score = vwap_price.score
            # 标准vwap价格特征值
            vwap_score = 0 if np.isnan(vwap_score) else round(vwap_score, 3)
            # vwap特征键值对字典添加波动特征key和对应的波动特征值
            vwap_dict['{}vwap_ang{}'.format(
                self.feature_prefix(buy_feature=buy_feature), dk)] = vwap_score
        return vwap_dict

#### 将自定义特征加入计算队列

In [8]:
ml_feature.clear_user_feature()
ml_feature.append_user_feature(FeatureVWap)

#### 创建特征计算对象

In [9]:
mf = ml_feature.MlFeature()

#### 每支股票计算单方向特征

In [10]:
def calc_features(indicator_data, code, dates, buy_feature):
    alpha_res = []
    keys = indicator_data.loc[dates]['key'].values
    ## 暂时过滤0
    keys = [key - 1 for key in keys if key!=0]
    indicator_data = copy.deepcopy(indicator_data)
    if len(keys) < 1:
        return pd.DataFrame()
    for key in keys:
        rt = mf.make_feature_dict(kl_pd=indicator_data, combine_kl_pd=indicator_data, day_ind=key, 
                                  buy_feature=buy_feature)
        rt['key'] = key + 1 ## 此处为了模型训练 + 1
        alpha_res.append(rt)
        result = pd.DataFrame(alpha_res)
    result = result.merge(indicator_data[['key','date','code']], on=['key'])
    return result.drop(['key'],axis=1)

#### 创建每支股票开平特征

In [11]:
def create_features(code, indicator_data):
    trader_dt = trader_data.set_index('code').loc[[code]]
    if len(trader_dt) < 1:
        return
    buy_dates = trader_dt['buy_date'].dt.strftime('%Y-%m-%d').unique().tolist()
    sell_dates =  trader_dt['sell_date'].dt.strftime('%Y-%m-%d').unique().tolist()
    indicator_data = indicator_data.reset_index().set_index('code').loc[code].reset_index()
    indicator_data['key'] = indicator_data.index
    indicator_data = indicator_data.set_index('trade_date')
    buy_feature = calc_features(indicator_data, code, buy_dates, buy_feature=True)
    sell_feature = calc_features(indicator_data, code, sell_dates, buy_feature=False)
    name = "{0}.h".format(code)
    return {'name':code, 'sell': sell_feature, 'buy':buy_feature}

In [12]:
i = 0
res = []
codes = trader_data['code'].unique().tolist()
with Progress(len(codes), 0, 'feature progress') as pg:
    for code in codes:
        i += 1
        pg.show(i,ext='{0}'.format(code))
        rt = create_features(code, indicator_data)
        res.append(rt)

feature progress:300724:100.0%

In [13]:
buy_res = []
sell_res = []
for r in res:
    buy_res.append(r['buy'])
    sell_res.append(r['sell'])

In [14]:
buy_features = pd.concat(buy_res,axis=0)
sell_features = pd.concat(sell_res,axis=0)

##### 开仓特征

In [15]:
buy_features.head()

Unnamed: 0,buy_deg_ang42,buy_deg_ang252,buy_deg_ang60,buy_deg_ang21,buy_price_rank120,buy_price_rank90,buy_price_rank60,buy_price_rank252,buy_wave_score1,buy_wave_score2,...,buy_atr_std,buy_jump_down_power,buy_diff_down_days,buy_jump_up_power,buy_diff_up_days,buy_vwap_ang42,buy_vwap_ang60,buy_vwap_ang21,date,code
0,2.22,-5.584,-7.734,3.557,0.35,0.467,0.7,0.262,0.141,-0.042,...,0.652,0,0,0,0,1.207,0.635,1.066,2021-03-18,300002
1,-2.04,-17.635,2.442,-0.175,0.138,0.183,0.208,0.065,-0.13,-0.09,...,0.219,0,0,0,0,-0.357,0.108,-0.082,2021-05-07,300002
0,3.32,3.32,3.32,3.32,0.615,0.615,0.615,0.615,0.264,0.298,...,0.414,0,0,0,0,0.69,0.69,0.69,2018-11-01,300009
1,3.625,6.23,4.166,6.708,0.858,0.844,0.85,0.877,0.033,-0.099,...,1.41,0,0,0,0,0.718,0.809,0.508,2019-10-29,300009
2,7.294,3.1,3.581,3.996,0.842,0.789,0.8,0.867,0.645,0.449,...,1.757,0,0,0,0,0.469,0.524,0.209,2020-02-18,300009


#### 平仓特征

In [16]:
sell_features.head()

Unnamed: 0,sell_deg_ang42,sell_deg_ang252,sell_deg_ang60,sell_deg_ang21,sell_price_rank120,sell_price_rank90,sell_price_rank60,sell_price_rank252,sell_wave_score1,sell_wave_score2,sell_wave_score3,sell_jump_down_power,sell_diff_down_days,sell_jump_up_power,sell_diff_up_days,sell_vwap_ang42,sell_vwap_ang60,sell_vwap_ang21,date,code
0,-0.976,-17.29,3.093,0.218,0.183,0.244,0.3,0.087,-0.117,-0.1,-0.088,0,0,0,0,-0.126,0.312,0.094,2021-04-29,300002
1,5.843,-11.472,4.456,-5.604,0.75,0.667,0.5,0.512,0.77,0.62,0.493,0,0,0,0,0.247,0.332,-0.107,2021-08-31,300002
0,12.596,0.172,13.236,2.613,0.991,0.989,0.983,0.991,1.156,1.11,1.055,0,0,0,0,1.165,1.242,1.351,2019-04-02,300009
1,7.311,3.35,3.013,5.4,0.767,0.711,0.75,0.786,0.833,0.53,0.394,0,0,0,0,0.528,0.58,0.3,2020-02-14,300009
2,6.606,2.583,4.882,1.756,0.883,0.844,0.883,0.925,1.072,0.748,0.566,0,0,0,0,0.517,0.598,0.273,2020-02-21,300009


#### 保存特征数据 用于其他例子使用

In [17]:
buy_features.to_csv(os.path.join(g_project_data, 'buy_features.csv'),encoding='UTF-8')
sell_features.to_csv(os.path.join(g_project_data, 'sell_features.csv'),encoding='UTF-8')