In [1]:
import os
os.environ['IGNORE_WARNINGS'] = '0'
os.environ['DB_URL'] = 'mysql+mysqlconnector://read:1234@127.0.0.1:3306/quant'

In [2]:
import pandas as pd

In [3]:
from jdw import EntropyAPI
from jdw.kdutils.file_utils import load_pickle
from ultron.sentry.api import *
from ultron.tradingday import *



In [4]:
factors_data = load_pickle('factors.h5')
factors_data.head()

Unnamed: 0,trade_date,CCI10,ROC20,RSI,VOL240,aiDaNp60,aiDaPE60,aiEtopZ180,aiSude,hkHoldRatioAll,hkHoldRatioB,hkHoldVolChgB120,lpnpQ,code
0,2021-03-01,-92.376,-4.6243,20.6704,0.0061,0.5981,0.306,1.506,10.0528,10.3092,2.5879,18.9855,-0.2538,1
1,2021-03-01,130.465,13.5899,90.0293,0.0086,0.1124,-0.0446,-1.5296,-3.2607,6.5233,2.2121,12.8189,0.1523,2
2,2021-03-01,-65.5586,-10.1681,56.9712,0.0243,0.3891,0.3139,0.833,-0.2638,1.5967,0.1913,-36.4546,-0.343,63
3,2021-03-01,119.543,-6.817,63.2887,0.03,0.186,0.211,1.1419,-1.7645,1.4818,0.3724,-48.5819,-0.3287,66
4,2021-03-01,43.3794,18.6217,80.9278,0.0073,0.0776,-0.0589,-1.0135,-14.3274,2.7526,0.6946,82.6065,-0.3225,69


In [5]:
begin_date = factors_data['trade_date'].min()
end_date = factors_data['trade_date'].max()

In [6]:
universe = 'sh50'
dummy_name = 'dummy_test_f1r_open'
industry_level = 1
industry_name = 'sw'

In [7]:
features = [
    col for col in factors_data.columns if col not in [
        'trade_date','code']]

In [8]:
batch = 20  ## 过去20个周期的数据
freq = 4  ## 每间隔周期
horizon = 1  ## 预测周期
offset = 0  ## 偏移周期

In [9]:
model_pams = {"n_estimators": 150, "max_depth": 3, "max_features": "auto"}
model_name = 'RandomForestRegressor'

In [10]:
carnot = EntropyAPI.StockCarnot(model_name=model_name,
                                  model_params=model_pams,
                                  universe=universe,
                                  features=features,
                                  batch=batch,
                                  freq=freq,
                                  horizon=horizon,
                                  offset=offset,
                                  yield_name='returns')

In [11]:
start_date = advanceDateByCalendar(
            'china.sse', begin_date,
            "-{}b".format(carnot._batch + carnot._freq + 1),
            BizDayConventions.Following)

In [12]:
#### 加载收益率
yields_data = carnot.fetch_yields(
                begin_date=start_date,
                end_date=end_date,
                universe=carnot._universe_class(
                    u_name=carnot._universe))
yields_data.head()

2023-03-27 14:36:12,416 - ultron - INFO - start create yields data


Unnamed: 0,trade_date,code,nxt1_ret
0,2021-01-18,600000,-0.00999
69,2021-01-18,600009,-0.0196
138,2021-01-18,600016,-0.001974
207,2021-01-18,600028,-6e-06
276,2021-01-18,600030,0.024371


In [13]:
#### 加载行业数据
industry_data = carnot.fetch_industry(
            begin_date=start_date,
            end_date=end_date,
            universe=carnot._universe_class(
                u_name=carnot._universe))
industry_data.head()  

2023-03-27 14:36:14,272 - ultron - INFO - start fetch industry data


Unnamed: 0,trade_date,code,industry_code,industry
0,2021-01-18,600000,1030321,银行
1,2021-01-18,600009,1030319,交通运输
2,2021-01-18,600016,1030321,银行
3,2021-01-18,600028,1030303,化工
4,2021-01-18,600030,1030322,非银金融


In [14]:
factors_data = carnot.industry_fillna(
            industry_data=industry_data, 
            factors_data=factors_data).fillna(0)
factors_data.head()

Unnamed: 0,trade_date,CCI10,ROC20,RSI,VOL240,aiDaNp60,aiDaPE60,aiEtopZ180,aiSude,hkHoldRatioAll,hkHoldRatioB,hkHoldVolChgB120,lpnpQ,code
0,2021-03-01,-92.376,-4.6243,20.6704,0.0061,0.5981,0.306,1.506,10.0528,10.3092,2.5879,18.9855,-0.2538,1
1,2021-03-01,130.465,13.5899,90.0293,0.0086,0.1124,-0.0446,-1.5296,-3.2607,6.5233,2.2121,12.8189,0.1523,2
2,2021-03-01,-65.5586,-10.1681,56.9712,0.0243,0.3891,0.3139,0.833,-0.2638,1.5967,0.1913,-36.4546,-0.343,63
3,2021-03-01,119.543,-6.817,63.2887,0.03,0.186,0.211,1.1419,-1.7645,1.4818,0.3724,-48.5819,-0.3287,66
4,2021-03-01,43.3794,18.6217,80.9278,0.0073,0.0776,-0.0589,-1.0135,-14.3274,2.7526,0.6946,82.6065,-0.3225,69


In [15]:
#### 数据去极值标准化
is_train = True
if is_train:
    total_data = factors_data.merge(yields_data,
                on=['trade_date', 'code'])
    total_data = carnot.factors_normal(
                    total_data, 
                    carnot._alpha_model.features + ['nxt1_ret'])
else:
    total_data = factors_data
    total_data = carnot.factors_normal(total_data,
                    carnot._alpha_model.features)

2023-03-27 14:38:04,380 - ultron - INFO - start factors normal


In [16]:
models = carnot.create_models(total_data=total_data,
                                    begin_date=begin_date,
                                    end_date=end_date)

2023-03-27 14:38:43,762 - ultron - INFO - start train 2021-03-04 00:00:00 model
2023-03-27 14:38:44,124 - ultron - INFO - start train 2021-03-10 00:00:00 model
2023-03-27 14:38:44,500 - ultron - INFO - start train 2021-03-16 00:00:00 model
2023-03-27 14:38:44,874 - ultron - INFO - start train 2021-03-22 00:00:00 model
2023-03-27 14:38:45,244 - ultron - INFO - start train 2021-03-26 00:00:00 model
2023-03-27 14:38:45,625 - ultron - INFO - start train 2021-04-01 00:00:00 model
2023-03-27 14:38:45,983 - ultron - INFO - start train 2021-04-08 00:00:00 model
2023-03-27 14:38:46,362 - ultron - INFO - start train 2021-04-14 00:00:00 model
2023-03-27 14:38:46,736 - ultron - INFO - start train 2021-04-20 00:00:00 model
2023-03-27 14:38:47,134 - ultron - INFO - start train 2021-04-26 00:00:00 model
2023-03-27 14:38:47,502 - ultron - INFO - start train 2021-04-30 00:00:00 model


假定前面准备的训练数据total_data临时当做预测时使用的数据,区别在于不合并收益率数据即yields_data

In [18]:
factors_data = carnot.predict(models=models,
                                    total_data=total_data,
                                    begin_date=begin_date,
                                    end_date=end_date)

In [19]:
factors_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,RandomForestRegressor_1052831871
trade_date,code,Unnamed: 2_level_1
2021-03-04,600000,0.619481
2021-03-04,600009,-1.168046
2021-03-04,600016,0.549107
2021-03-04,600028,0.142212
2021-03-04,600030,0.224651
