在沙盒环境，通过寻优算法对多种机器学习模型及数据进行寻优，目前选择3种基础机器学习模型 XGBClassifier RandomForestClassifier LGBMClassifier。通过寻优挖掘并选择表现好的前10 precision，放入集成学习模型中

In [1]:
import os,pdb,itertools,copy,datetime
os.environ['ULTRON_DATA'] = 'keim'

In [2]:
import numpy as np
import pandas as pd
from ultron.env import *
from ultron.utilities.utils import NumpyEncoder
from ultron.optimize.geneticist.genetic import Gentic
from ultron.optimize.model.treemodel import XGBClassifier
from ultron.optimize.model.treemodel import LGBMClassifier
from ultron.optimize.model.treemodel import RandomForestClassifier

/var/log/ultron/2022-09-27.log


In [3]:
enable_example_env()

2022-09-27 19:23:01,060 - [env.py:67] - ultron - INFO - enable example env will only read /home/kerry/ultron/rom/sandbox/keim


#### 加载训练集

In [4]:
train_data = pd.read_csv(os.path.join(g_project_data, 'train_datas.csv'), index_col=0)
train_data.head()

Unnamed: 0,trade_date,code,BM_MainFar_80D,BM_RecentFar_20D,BM_RecentFar_40D,BM_RecentFar_80D,BM_RecentSecond_20D,BM_RecentSecond_40D,B_FarSpot,B_MainSpot,...,TS_RecentSecond,T_DnIntraday_5D,T_DnVolatility_1_10D,T_DnVolatility_2_20D,WeightNetIntTotalChg5D,WeightShortVolRelTotIntChg,inventory,profitratio,value,signal
0,2021-02-05,A,0.108297,0.028482,0.048874,0.112566,0.037459,0.018997,-0.115713,-0.633489,...,0.080587,-0.010746,-0.011118,-0.001603,0.000839,0.192546,,,1.130717,1.0
1,2021-02-05,AL,-0.017185,-0.010713,-0.028431,-0.043583,-0.015898,-0.022686,0.01055,-0.058663,...,-0.001708,-0.006366,-0.006237,6e-06,-0.000552,-0.095002,-71.400002,-0.126352,0.936497,1.0
2,2021-02-05,BU,0.002268,0.074625,0.144817,0.146292,0.060386,0.13083,-0.087043,-0.147928,...,-0.052928,-0.006808,-0.006575,0.003424,0.001066,0.126363,-61.389999,0.083217,0.250348,1.0
3,2021-02-05,C,0.021003,-0.003571,0.046976,0.034767,0.002104,0.030268,0.103967,0.17228,...,0.009364,-0.003704,-0.007573,-0.000428,-0.000842,-0.037971,-427.600006,,0.696008,1.0
4,2021-02-05,CF,0.013602,0.00397,0.006347,0.023968,-0.001802,0.002904,-0.024857,-0.031358,...,-0.034307,-0.00451,-0.007883,-0.000114,-0.000248,-0.029101,-618.409973,0.015356,0.971867,1.0


#### 当前使用3种机器学习模型: XGBClassifier RandomForestClassifier LGBMClassifier

#### 构建模型的参数集

In [5]:
class Parameter(object):
    
    @classmethod
    def XGBClassifier(cls, **kwargs):
        return {
        'max_depth': [i for i in range(6, 15, 2)],
        'n_estimators': [i for i in range(50, 100, 10)],
        'learning_rate': [(i / 100) for i in range(1, 10, 2)],
        'objective': ['multi:softmax'],
        'num_class': [2]}
    
    @classmethod
    def RandomForestClassifier(cls, **kwargs):
        return {
        'n_estimators': [i for i in range(1, 100, 1)],
        'max_features': ['auto']} 
    
    @classmethod
    def LGBMClassifier(cls, **kwargs):
        return {
        'max_depth': [i for i in range(1, 10, 2)],
        'n_estimators': [i for i in range(1, 100, 4)],
        'learning_rate': [(i / 100) for i in range(1, 10, 2)]
    }


In [6]:
params_sets = {
            'XGBClassifier':Parameter.XGBClassifier(),
            'RandomForestClassifier':Parameter.RandomForestClassifier(),
            'LGBMClassifier':Parameter.LGBMClassifier(),
        }

#### 构建基础模型集

In [7]:
model_sets = ['XGBClassifier', 'RandomForestClassifier','LGBMClassifier']

In [8]:
features = [col for col in train_data.columns if col not in ['trade_date','code',
                                                             'value','signal','inventory','profitratio']]

In [9]:
X = train_data[['trade_date','code'] + features].set_index(['trade_date','code']).fillna(0)
Y = train_data[['trade_date','code','signal']].set_index(['trade_date','code']).fillna(0)

#### 创建挖掘引擎

- 1.系统默认3种评估模式，也可自定义datetime估模式
- 2.系统模式评估模式中，已经做了训练集和测试集分离

In [10]:
def save_model(gen, run_details): ## 每一代优秀模型回调
    res = []
    for detail in run_details:
        features = detail._identification
        model_name = detail._model_name
        params = json.dumps(detail._params, cls=NumpyEncoder)
        fitness = detail._raw_fitness
        method = detail._init_method
        res.append({
                'features': features,
                'model_name': model_name,
                'params': params,
                'fitness': fitness,
                'gen': gen,
                'method': method
        }
    )

In [11]:
gentic = Gentic(model_sets=model_sets,
                        params_sets=params_sets,
                        stopping_criteria=100,
                        convergence=0.002,
                        population_size=20,
                        tournament_size=5,
                        p_point_mutation=0.7,
                        p_crossover=0.2,
                        standard_score=0.7,
                        save_model=save_model)

In [None]:
gentic.train(features, X=X, Y=Y, mode='accuracy', n_splits=2) # mode 为模型评估方式

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   49.9s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


name:LGBMClassifier_1664277813536701,gen:0,params:{'max_depth': 7, 'n_estimators': 61, 'learning_rate': 0.03},fitness:0.5223833065675809,method:full,token:e7585a7f1ae8976039b18cfa035eabc3
name:LGBMClassifier_1664277817665537,gen:0,params:{'max_depth': 5, 'n_estimators': 37, 'learning_rate': 0.05},fitness:0.5227289821366718,method:full,token:c945a967ce458605fdc6a44d2a10a781
name:LGBMClassifier_1664277795551815,gen:0,params:{'max_depth': 9, 'n_estimators': 1, 'learning_rate': 0.07},fitness:0.5231608629218605,method:full,token:2ca3db9d7d189b0108460713e8c18fa6
name:LGBMClassifier_1664277795646378,gen:0,params:{'max_depth': 3, 'n_estimators': 33, 'learning_rate': 0.01},fitness:0.5232477129151878,method:full,token:ea13514028a51362317e70dd5ccf3323
name:LGBMClassifier_1664277795161571,gen:0,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.09},fitness:0.5319068911691012,method:full,token:78a79fb8d5e556c211b330ebde80b4e6
ExpendTime:49.934094,Generation:0,Tournament:5, Fitness Mean:

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    8.2s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


name:LGBMClassifier_1664277848458381,gen:1,params:{'max_depth': 1, 'n_estimators': 85, 'learning_rate': 0.01},fitness:0.5259314256924232,method:Point Mutation,token:7c1af451ca50ac512ef14ddbbfa5d57d
name:LGBMClassifier_1664277843014196,gen:1,params:{'max_depth': 3, 'n_estimators': 37, 'learning_rate': 0.03},fitness:0.5259325053194122,method:Reproduction,token:f0bf8bf69feecdcf478d2383ae7b3790
name:LGBMClassifier_1664277848053015,gen:1,params:{'max_depth': 1, 'n_estimators': 49, 'learning_rate': 0.03},fitness:0.5285289332535609,method:Point Mutation,token:7acc827e0a4782f5571966580af14f25
name:LGBMClassifier_1664277842583285,gen:1,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.05},fitness:0.5291354886886581,method:Crossover,token:c6b088141f2f70923e8f33ebe3bf684a
name:LGBMClassifier_1664277795161571,gen:0,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.09},fitness:0.5319068911691012,method:full,token:78a79fb8d5e556c211b330ebde80b4e6
ExpendTime:8.192187,Generat

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   10.7s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


name:LGBMClassifier_1664277855376565,gen:2,params:{'max_depth': 1, 'n_estimators': 65, 'learning_rate': 0.03},fitness:0.5277498174380751,method:Point Mutation,token:abda78074399a6556710655b61dbab5f
name:LGBMClassifier_1664277859210483,gen:2,params:{'max_depth': 1, 'n_estimators': 41, 'learning_rate': 0.05},fitness:0.5279229776112353,method:Point Mutation,token:7f91e1cec9f9dd737e0c5ab101428170
name:LGBMClassifier_1664277848053015,gen:1,params:{'max_depth': 1, 'n_estimators': 49, 'learning_rate': 0.03},fitness:0.5285289332535609,method:Point Mutation,token:7acc827e0a4782f5571966580af14f25
name:LGBMClassifier_1664277842583285,gen:1,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.05},fitness:0.5291354886886581,method:Crossover,token:c6b088141f2f70923e8f33ebe3bf684a
name:LGBMClassifier_1664277795161571,gen:0,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.09},fitness:0.5319068911691012,method:full,token:78a79fb8d5e556c211b330ebde80b4e6
ExpendTime:10.727080,Gene

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.7s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


name:LGBMClassifier_1664277842583285,gen:1,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.05},fitness:0.5291354886886581,method:Crossover,token:c6b088141f2f70923e8f33ebe3bf684a
name:LGBMClassifier_1664277864898325,gen:3,params:{'max_depth': 1, 'n_estimators': 49, 'learning_rate': 0.09},fitness:0.529135848564321,method:Point Mutation,token:23eb8da9ebe6dc240e9095a146302214
name:LGBMClassifier_1664277860679235,gen:3,params:{'max_depth': 1, 'n_estimators': 49, 'learning_rate': 0.07},fitness:0.5299149943694453,method:Point Mutation,token:165499b071cd8f9811d4f224936c17b6
name:LGBMClassifier_1664277862954595,gen:3,params:{'max_depth': 1, 'n_estimators': 85, 'learning_rate': 0.05},fitness:0.5300017693886763,method:Point Mutation,token:18aab853959323414ecb35450bb518e6
name:LGBMClassifier_1664277795161571,gen:0,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.09},fitness:0.5319068911691012,method:full,token:78a79fb8d5e556c211b330ebde80b4e6
ExpendTime:6.721141,Genera

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.6s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


name:LGBMClassifier_1664277869048638,gen:4,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.07},fitness:0.5300016944145798,method:Reproduction,token:a1286b36ac032d292dc98cd20948def5
name:LGBMClassifier_1664277862954595,gen:3,params:{'max_depth': 1, 'n_estimators': 85, 'learning_rate': 0.05},fitness:0.5300017693886763,method:Point Mutation,token:18aab853959323414ecb35450bb518e6
name:LGBMClassifier_1664277869921106,gen:4,params:{'max_depth': 1, 'n_estimators': 97, 'learning_rate': 0.03},fitness:0.5309540003928643,method:Point Mutation,token:4f2569cc1cd7aae7afa58ea18717f0a0
name:LGBMClassifier_1664277870508957,gen:4,params:{'max_depth': 1, 'n_estimators': 69, 'learning_rate': 0.09},fitness:0.53147421565849,method:Crossover,token:b49a47cf0082e24343ca95249fba9868
name:LGBMClassifier_1664277795161571,gen:0,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.09},fitness:0.5319068911691012,method:full,token:78a79fb8d5e556c211b330ebde80b4e6
ExpendTime:5.558229,Generatio

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    8.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


name:LGBMClassifier_1664277872660515,gen:5,params:{'max_depth': 1, 'n_estimators': 85, 'learning_rate': 0.09},fitness:0.5302620344670916,method:Crossover,token:51755cdb8c5f4b6bc714c4bee1cb67dc
name:LGBMClassifier_1664277879845439,gen:5,params:{'max_depth': 1, 'n_estimators': 85, 'learning_rate': 0.07},fitness:0.5308680200990558,method:Point Mutation,token:d122af5160a4af505be4ffbe789b8f85
name:LGBMClassifier_1664277869921106,gen:4,params:{'max_depth': 1, 'n_estimators': 97, 'learning_rate': 0.03},fitness:0.5309540003928643,method:Point Mutation,token:4f2569cc1cd7aae7afa58ea18717f0a0
name:LGBMClassifier_1664277870508957,gen:4,params:{'max_depth': 1, 'n_estimators': 69, 'learning_rate': 0.09},fitness:0.53147421565849,method:Crossover,token:b49a47cf0082e24343ca95249fba9868
name:LGBMClassifier_1664277795161571,gen:0,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.09},fitness:0.5319068911691012,method:full,token:78a79fb8d5e556c211b330ebde80b4e6
ExpendTime:7.986535,Generation:5

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    9.3s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


name:LGBMClassifier_1664277886768081,gen:6,params:{'max_depth': 1, 'n_estimators': 57, 'learning_rate': 0.05},fitness:0.530780780240427,method:Point Mutation,token:c08d9597702f717c8f96e40d2f7d4ad3
name:LGBMClassifier_1664277879845439,gen:5,params:{'max_depth': 1, 'n_estimators': 85, 'learning_rate': 0.07},fitness:0.5308680200990558,method:Point Mutation,token:d122af5160a4af505be4ffbe789b8f85
name:LGBMClassifier_1664277869921106,gen:4,params:{'max_depth': 1, 'n_estimators': 97, 'learning_rate': 0.03},fitness:0.5309540003928643,method:Point Mutation,token:4f2569cc1cd7aae7afa58ea18717f0a0
name:LGBMClassifier_1664277870508957,gen:4,params:{'max_depth': 1, 'n_estimators': 69, 'learning_rate': 0.09},fitness:0.53147421565849,method:Crossover,token:b49a47cf0082e24343ca95249fba9868
name:LGBMClassifier_1664277795161571,gen:0,params:{'max_depth': 1, 'n_estimators': 53, 'learning_rate': 0.09},fitness:0.5319068911691012,method:full,token:78a79fb8d5e556c211b330ebde80b4e6
ExpendTime:9.329350,Generati