在沙盒环境，构建UMP主裁模型和边裁模型

In [1]:
import os, copy
os.environ['ULTRON_DATA'] = 'minden'
import pandas as pd
import numpy as np

In [2]:
from ultron.env import *
from ultron.ump.core import env
os.environ['UMP_MODEL_PATH'] = g_project_data

/var/log/ultron/2022-10-10.log


In [3]:
from ultron.ump.ump.main_base import MainBase
from ultron.ump.ump.main_mul import MainMul
from ultron.ump.ump.base import BuyUmpMixin, ump_main_make_xy
from ultron.ump.model.principles import Principles
from ultron.ump.trade.ml_feature import FeatureBase, BuyFeatureMixin, SellFeatureMixin
from ultron.ump.technical.vwap import calc_vwap
from ultron.ump.trade import ml_feature

In [4]:
from ultron.ump.ump.main_deg import MainDeg
from ultron.ump.ump.main_price import MainPrice
from ultron.ump.ump.main_wave import MainWave
from ultron.ump.ump.main_full import MainFull

In [5]:
from ultron.ump.ump.edge_deg import EdgeDeg
from ultron.ump.ump.edeg_price import EdgePrice
from ultron.ump.ump.edge_wave import EdgeWave
from ultron.ump.ump.edge_full import EdgeFull

In [6]:
env.g_enable_ml_feature = True

#### 加载买入特征

In [7]:
buy_features = pd.read_csv(os.path.join(g_project_data, 'buy_features.csv'), index_col=0)
buy_features['date'] = pd.to_datetime(buy_features['date'])
buy_features.head()

Unnamed: 0,buy_deg_ang42,buy_deg_ang252,buy_deg_ang60,buy_deg_ang21,buy_price_rank120,buy_price_rank90,buy_price_rank60,buy_price_rank252,buy_wave_score1,buy_wave_score2,...,buy_atr_std,buy_jump_down_power,buy_diff_down_days,buy_jump_up_power,buy_diff_up_days,buy_vwap_ang42,buy_vwap_ang60,buy_vwap_ang21,date,code
0,2.22,-5.584,-7.734,3.557,0.35,0.467,0.7,0.262,0.141,-0.042,...,0.652,0,0,0,0,1.207,0.635,1.066,2021-03-18,300002
1,-2.04,-17.635,2.442,-0.175,0.138,0.183,0.208,0.065,-0.13,-0.09,...,0.219,0,0,0,0,-0.357,0.108,-0.082,2021-05-07,300002
0,3.32,3.32,3.32,3.32,0.615,0.615,0.615,0.615,0.264,0.298,...,0.414,0,0,0,0,0.69,0.69,0.69,2018-11-01,300009
1,3.625,6.23,4.166,6.708,0.858,0.844,0.85,0.877,0.033,-0.099,...,1.41,0,0,0,0,0.718,0.809,0.508,2019-10-29,300009
2,7.294,3.1,3.581,3.996,0.842,0.789,0.8,0.867,0.645,0.449,...,1.757,0,0,0,0,0.469,0.524,0.209,2020-02-18,300009


#### 加载卖出特征

In [8]:
sell_features = pd.read_csv(os.path.join(g_project_data, 'sell_features.csv'), index_col=0)
sell_features['date'] = pd.to_datetime(sell_features['date'])
sell_features.head()

Unnamed: 0,sell_deg_ang42,sell_deg_ang252,sell_deg_ang60,sell_deg_ang21,sell_price_rank120,sell_price_rank90,sell_price_rank60,sell_price_rank252,sell_wave_score1,sell_wave_score2,sell_wave_score3,sell_jump_down_power,sell_diff_down_days,sell_jump_up_power,sell_diff_up_days,sell_vwap_ang42,sell_vwap_ang60,sell_vwap_ang21,date,code
0,-0.976,-17.29,3.093,0.218,0.183,0.244,0.3,0.087,-0.117,-0.1,-0.088,0,0,0,0,-0.126,0.312,0.094,2021-04-29,300002
1,5.843,-11.472,4.456,-5.604,0.75,0.667,0.5,0.512,0.77,0.62,0.493,0,0,0,0,0.247,0.332,-0.107,2021-08-31,300002
0,12.596,0.172,13.236,2.613,0.991,0.989,0.983,0.991,1.156,1.11,1.055,0,0,0,0,1.165,1.242,1.351,2019-04-02,300009
1,7.311,3.35,3.013,5.4,0.767,0.711,0.75,0.786,0.833,0.53,0.394,0,0,0,0,0.528,0.58,0.3,2020-02-14,300009
2,6.606,2.583,4.882,1.756,0.883,0.844,0.883,0.925,1.072,0.748,0.566,0,0,0,0,0.517,0.598,0.273,2020-02-21,300009


#### 加载交易记录

In [9]:
trader_data = pd.read_csv(os.path.join(g_project_data, 'trader_records.csv'), index_col=0)
trader_data['buy_date'] = pd.to_datetime(trader_data['buy_date'])
trader_data['sell_date'] = pd.to_datetime(trader_data['sell_date'])
trader_data.head()

Unnamed: 0,code,buy_date,buy_cnt,sell_date,buy_price,sell_price
0,300002,2021-03-18,1,2021-04-29,4.7048,4.5233
1,300002,2021-05-07,1,2021-08-31,4.4335,4.7973
2,300009,2018-11-01,1,2019-04-02,9.538,10.7572
3,300009,2019-10-29,1,2020-02-14,10.5162,10.4669
4,300009,2020-02-18,1,2020-02-21,10.3618,10.5324


#### 合并数据集生成订单集

In [10]:
orders_pd = trader_data.merge(
    buy_features.rename(columns={'date': 'buy_date'}),
    on=['buy_date',
        'code']).merge(sell_features.rename(columns={'date': 'sell_date'}))
orders_pd.head()

Unnamed: 0,code,buy_date,buy_cnt,sell_date,buy_price,sell_price,buy_deg_ang42,buy_deg_ang252,buy_deg_ang60,buy_deg_ang21,...,sell_wave_score1,sell_wave_score2,sell_wave_score3,sell_jump_down_power,sell_diff_down_days,sell_jump_up_power,sell_diff_up_days,sell_vwap_ang42,sell_vwap_ang60,sell_vwap_ang21
0,300002,2021-03-18,1,2021-04-29,4.7048,4.5233,2.22,-5.584,-7.734,3.557,...,-0.117,-0.1,-0.088,0,0,0,0,-0.126,0.312,0.094
1,300002,2021-05-07,1,2021-08-31,4.4335,4.7973,-2.04,-17.635,2.442,-0.175,...,0.77,0.62,0.493,0,0,0,0,0.247,0.332,-0.107
2,300009,2018-11-01,1,2019-04-02,9.538,10.7572,3.32,3.32,3.32,3.32,...,1.156,1.11,1.055,0,0,0,0,1.165,1.242,1.351
3,300009,2019-10-29,1,2020-02-14,10.5162,10.4669,3.625,6.23,4.166,6.708,...,0.833,0.53,0.394,0,0,0,0,0.528,0.58,0.3
4,300009,2020-02-18,1,2020-02-21,10.3618,10.5324,7.294,3.1,3.581,3.996,...,1.072,0.748,0.566,0,0,0,0,0.517,0.598,0.273


In [11]:
diff_cols = ['code','buy_date','buy_cnt','sell_date','buy_price','sell_price']
cols = [col for col in orders_pd.columns if col not in diff_cols]

#### 创建训练数据集

In [12]:
orders_pd['profit'] = orders_pd['sell_price'] - orders_pd['buy_price']
orders_pd['profit_cg'] = (orders_pd['sell_price'] - orders_pd['buy_price']) / (
    orders_pd['buy_price'] * orders_pd['buy_cnt'])
orders_pd['profit_cg_hunder'] = orders_pd['profit_cg'] * 100
orders_pd['keep_days'] = (orders_pd['sell_date'] -
                          orders_pd['buy_date']).apply(lambda x: x.days)
orders_pd['result'] = np.where(orders_pd['profit'] > 0, 1, -1)
orders_pd['trade_date'] = orders_pd['buy_date']
orders_pd['key'] = orders_pd.index

orders_pd['ml_features'] = orders_pd.apply(lambda x: x[cols].to_dict(), axis=1)
orders_pd = orders_pd.set_index('trade_date')
orders_pd.head()

Unnamed: 0_level_0,code,buy_date,buy_cnt,sell_date,buy_price,sell_price,buy_deg_ang42,buy_deg_ang252,buy_deg_ang60,buy_deg_ang21,...,sell_vwap_ang42,sell_vwap_ang60,sell_vwap_ang21,profit,profit_cg,profit_cg_hunder,keep_days,result,key,ml_features
trade_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-03-18,300002,2021-03-18,1,2021-04-29,4.7048,4.5233,2.22,-5.584,-7.734,3.557,...,-0.126,0.312,0.094,-0.1815,-0.0386,-3.8585,42,-1,0,"{'buy_deg_ang42': 2.22, 'buy_deg_ang252': -5.5..."
2021-05-07,300002,2021-05-07,1,2021-08-31,4.4335,4.7973,-2.04,-17.635,2.442,-0.175,...,0.247,0.332,-0.107,0.3638,0.082,8.2048,116,1,1,"{'buy_deg_ang42': -2.04, 'buy_deg_ang252': -17..."
2018-11-01,300009,2018-11-01,1,2019-04-02,9.538,10.7572,3.32,3.32,3.32,3.32,...,1.165,1.242,1.351,1.2192,0.1278,12.7827,152,1,2,"{'buy_deg_ang42': 3.32, 'buy_deg_ang252': 3.32..."
2019-10-29,300009,2019-10-29,1,2020-02-14,10.5162,10.4669,3.625,6.23,4.166,6.708,...,0.528,0.58,0.3,-0.0493,-0.0047,-0.4686,108,-1,3,"{'buy_deg_ang42': 3.625, 'buy_deg_ang252': 6.2..."
2020-02-18,300009,2020-02-18,1,2020-02-21,10.3618,10.5324,7.294,3.1,3.581,3.996,...,0.517,0.598,0.273,0.1707,0.0165,1.647,3,1,4,"{'buy_deg_ang42': 7.294, 'buy_deg_ang252': 3.1..."


#### 自定义特征

In [13]:
class FeatureVWap(FeatureBase, BuyFeatureMixin, SellFeatureMixin):
    """vwap特征，支持买入，卖出"""

    def __init__(self):
        self.vwap_keys = frozenset([21, 42, 60])

    def get_feature_keys(self, buy_feature):
        return [
            '{}vwap_ang{}'.format(self.feature_prefix(buy_feature=buy_feature),
                                  dk) for dk in self.vwap_keys
        ]

    def calc_feature(self, kl_pd, combine_kl_pd, day_ind, buy_feature):
        """
        根据买入或者卖出时的金融时间序列，以及交易日信息构造拟合波动率特征
        :param kl_pd: 择时阶段金融时间序列
        :param combine_kl_pd: 合并择时阶段之前1年的金融时间序列
        :param day_ind: 交易发生的时间索引，即对应self.kl_pd.key
        :param buy_feature: 是否是买入特征构造（bool）
        :return: 构造波动率特征的键值对字典
        """
        vwap_dict = {}
        for dk in self.vwap_keys:
            # 迭代预设角度周期，计算构建特征
            if day_ind - dk >= 0:
                # 如果择时时间序列够提取特征，使用kl_pd截取特征交易周期收盘价格
                vwap_close = kl_pd[day_ind - dk + 1:day_ind + 1]  #.close
            else:
                # 如果择时时间序列不够提取特征，使用combine_kl_pd截取特征交易周期，首先截取直到day_ind的时间序列
                combine_kl_pd = combine_kl_pd.loc[:kl_pd.index[day_ind]]
                # 如combine_kl_pd长度大于特征周期长度－> 截取combine_kl_pd[-dk:].close，
                # 否则取combine_kl_pd所有交易收盘价格
                vwap_close = combine_kl_pd[
                    -dk:] if combine_kl_pd.shape[0] > dk else combine_kl_pd
            ##
            vwap_price = calc_vwap(vwap_close)
            vwap_score = vwap_price.score
            # 标准vwap价格特征值
            vwap_score = 0 if np.isnan(vwap_score) else round(vwap_score, 3)
            # vwap特征键值对字典添加波动特征key和对应的波动特征值
            vwap_dict['{}vwap_ang{}'.format(
                self.feature_prefix(buy_feature=buy_feature), dk)] = vwap_score
        return vwap_dict

#### 自定义主裁模型

In [14]:
class MainMul(MainBase, BuyUmpMixin):

    class MulFiter(Principles):

        @ump_main_make_xy
        def make_xy(self, **kwarg):
            regex = 'result|{}|{}|{}|{}|{}'.format(
                ml_feature.FeatureDeg().get_feature_ump_keys(
                    ump_cls=MainMul)[-1],
                ml_feature.FeaturePrice().get_feature_ump_keys(
                    ump_cls=MainMul)[-1],
                ml_feature.FeatureWave().get_feature_ump_keys(
                    ump_cls=MainMul)[-1],
                ml_feature.FeatureAtr().get_feature_ump_keys(
                    ump_cls=MainMul)[-1],
                FeatureVWap().get_feature_ump_keys(
                    ump_cls=MainMul)[-1],
            )
            mul_df = self.order_has_ret.filter(regex=regex)
            return mul_df

    def get_predict_col(self):
        return [
            ml_feature.FeatureDeg().get_feature_ump_keys(ump_cls=MainMul)[-1],
            ml_feature.FeaturePrice().get_feature_ump_keys(
                ump_cls=MainMul)[-1],
            ml_feature.FeatureWave().get_feature_ump_keys(ump_cls=MainMul)[-1],
            ml_feature.FeatureAtr().get_feature_ump_keys(ump_cls=MainMul)[-1],
            FeatureVWap().get_feature_ump_keys(ump_cls=MainMul)[-1]
        ]

    def get_fiter_class(self):
        return MainMul.MulFiter

    @classmethod
    def class_unique_id(cls):
        """
        具体ump类关键字唯一名称，类方法：return 'mul_main'
        主要针对外部user设置自定义ump使用, 需要user自己保证class_unique_id的唯一性，内部不做检测
        具体使用见ABuUmpManager中extend_ump_block方法
        """
        return 'mul_main'

#### 训练主裁模型

In [15]:
MainFull.ump_main_clf_dump(orders_pd, p_ncs=slice(20, 40, 1), market_name='cn')
MainPrice.ump_main_clf_dump(orders_pd, p_ncs=slice(20, 40, 1), market_name='cn')
MainWave.ump_main_clf_dump(orders_pd, p_ncs=slice(20, 40, 1), market_name='cn')
MainMul.ump_main_clf_dump(orders_pd, p_ncs=slice(20, 40, 1), market_name='cn')
MainDeg.ump_main_clf_dump(orders_pd, p_ncs=slice(20, 40, 1), market_name='cn')

please wait! dump_pickle....: /home/kerry/ultron/rom/sandbox/minden/ump/ump_main_cn_full_main
please wait! dump_pickle....: /home/kerry/ultron/rom/sandbox/minden/ump/ump_main_cn_price_main
please wait! dump_pickle....: /home/kerry/ultron/rom/sandbox/minden/ump/ump_main_cn_wave_main
please wait! dump_pickle....: /home/kerry/ultron/rom/sandbox/minden/ump/ump_main_cn_mul_main
please wait! dump_pickle....: /home/kerry/ultron/rom/sandbox/minden/ump/ump_main_cn_deg_main


MainDeg: is_buy_ump:True predict_col:['buy_deg_ang42', 'buy_deg_ang252', 'buy_deg_ang60', 'buy_deg_ang21']

#### 训练边裁模型

In [16]:
EdgeDeg.ump_edge_clf_dump(orders_pd, market_name='cn')
EdgePrice.ump_edge_clf_dump(orders_pd, market_name='cn')
EdgeWave.ump_edge_clf_dump(orders_pd, market_name='cn')
EdgeFull.ump_edge_clf_dump(orders_pd, market_name='cn')

please wait! dump_pickle....: /home/kerry/ultron/rom/sandbox/minden/ump/ump_edge_cn_deg_edge
please wait! dump_pickle....: /home/kerry/ultron/rom/sandbox/minden/ump/ump_edge_cn_price_edge
please wait! dump_pickle....: /home/kerry/ultron/rom/sandbox/minden/ump/ump_edge_cn_wave_edge
please wait! dump_pickle....: /home/kerry/ultron/rom/sandbox/minden/ump/ump_edge_cn_full_edge


EdgeFull: is_buy_ump:True predict_col:['buy_deg_ang42', 'buy_deg_ang252', 'buy_deg_ang60', 'buy_deg_ang21', 'buy_price_rank120', 'buy_price_rank90', 'buy_price_rank60', 'buy_price_rank252', 'buy_wave_score1', 'buy_wave_score2', 'buy_wave_score3', 'buy_atr_std']