In [1]:
import os
os.environ['IGNORE_WARNINGS'] = '0'
os.environ['DB_URL'] = 'mysql+mysqlconnector://read:1234@127.0.0.1:3306/quant'

In [23]:
import pandas as pd

In [2]:
from jdw import EntropyAPI
from jdw.kdutils.file_utils import load_pickle
from ultron.sentry.api import *



In [3]:
#### 加载外部因子
factors_data = load_pickle('factors.h5')
factors_data.head()

Unnamed: 0,trade_date,CCI10,ROC20,RSI,VOL240,aiDaNp60,aiDaPE60,aiEtopZ180,aiSude,hkHoldRatioAll,hkHoldRatioB,hkHoldVolChgB120,lpnpQ,code
0,2021-03-01,-92.376,-4.6243,20.6704,0.0061,0.5981,0.306,1.506,10.0528,10.3092,2.5879,18.9855,-0.2538,1
1,2021-03-01,130.465,13.5899,90.0293,0.0086,0.1124,-0.0446,-1.5296,-3.2607,6.5233,2.2121,12.8189,0.1523,2
2,2021-03-01,-65.5586,-10.1681,56.9712,0.0243,0.3891,0.3139,0.833,-0.2638,1.5967,0.1913,-36.4546,-0.343,63
3,2021-03-01,119.543,-6.817,63.2887,0.03,0.186,0.211,1.1419,-1.7645,1.4818,0.3724,-48.5819,-0.3287,66
4,2021-03-01,43.3794,18.6217,80.9278,0.0073,0.0776,-0.0589,-1.0135,-14.3274,2.7526,0.6946,82.6065,-0.3225,69


In [4]:
begin_date = factors_data['trade_date'].min()
end_date = factors_data['trade_date'].max()

In [5]:
universe = 'sh50'
dummy_name = 'dummy_test_f1r_open'
industry_level = 1
industry_name = 'sw'

In [6]:
### 与导入因子数据的列名保持一致
factor_columns = [
    col for col in factors_data.columns if col not in [
        'trade_date','code']]

In [7]:
operators_sets = [
    'AVG', 'DIFF', 'LOG', 'SQRT', 'ABS', 'ACOSH', 'ASINH', 'CEIL', 'FLOOR',
    'ROUND', 'RETURNSimple', 'RETURNLog', 'ADDED', 'SUBBED', 'MUL', 'DIV',
    'MINIMUM', 'MAXIMUM', 'CSMean', 'CSRes', 'EMA', 'MA', 'MADecay', 'MMAX',
    'MMIN', 'MRANK', 'MQUANTILE', 'MSUM', 'MVARIANCE', 'MSTD', 'MNPOSITIVE',
    'MAPOSITIVE', 'RSI', 'MARETURNLog', 'DELTA', 'SHIFT', 'MCORR'
]

In [8]:
configure = {
    'evaluate': 'both_evaluate',
    'method': 'fitness',
    'generations': 3
}

In [9]:
gentic = EntropyAPI.StockGeneticist(offset=0,
                                    horizon=1,
                                    factor_columns=factor_columns,
                                    universe=universe,
                                    dummy_name=dummy_name,
                                    industry_name=industry_name,
                                    industry_level=industry_level,
                                    operators=operators_sets,
                                    callback_save=None)

##### 分批加载数据

In [10]:
#### 加载行业数据
industry_data = gentic.fetch_industry(
            begin_date=begin_date,
            end_date=end_date,
            universe=gentic._universe_class(u_name=gentic._universe))
industry_data.head()

2023-03-27 11:02:09,529 - ultron - INFO - start fetch industry data


Unnamed: 0,trade_date,code,industry_code,industry
0,2021-03-01,600000,1030321,银行
1,2021-03-01,600009,1030319,交通运输
2,2021-03-01,600016,1030321,银行
3,2021-03-01,600028,1030303,化工
4,2021-03-01,600030,1030322,非银金融


In [11]:
## 中位数填充
factors_data = gentic.industry_fillna(industry_data=industry_data,
                                            factors_data=factors_data)
factors_data = factors_data.sort_values(by=['trade_date', 'code'])
factors_data.head()

2023-03-27 11:03:36,370 - ultron - INFO - start industry median data ...


Unnamed: 0,trade_date,code,industry_code,CCI10,ROC20,RSI,VOL240,aiDaNp60,aiDaPE60,aiEtopZ180,aiSude,hkHoldRatioAll,hkHoldRatioB,hkHoldVolChgB120,lpnpQ
0,2021-03-01,600000,1030321,-119.6217,8.8477,58.427,0.0018,-0.0569,-0.1589,-1.5026,-5.8641,1.6866,0.2442,59.139,0.4224
1,2021-03-01,600009,1030319,27.2477,-15.0396,59.802,0.0155,4.9168,-1.2064,0.5353,-40.882,17.3997,1.8328,0.3477,-1.3205
2,2021-03-01,600016,1030321,-61.5385,0.7797,60.4651,0.0028,0.1291,0.1194,1.3054,-8.5077,1.8952,0.4104,46.3217,-1.1608
3,2021-03-01,600028,1030303,25.1962,13.6816,70.1389,0.0014,0.3707,0.1727,0.7672,-2.6646,1.1873,0.3424,164.5312,1.8884
4,2021-03-01,600030,1030322,-279.3646,-14.5145,36.7089,0.0162,0.0,0.0,0.0,0.0,3.2118,0.2678,-43.515,0.576


In [12]:
#### 加载收益率
yileds_data = gentic.fetch_yields(
            begin_date=begin_date,
            end_date=end_date,
            universe=gentic._universe_class(
                u_name=gentic._universe))
yileds_data.head()

2023-03-27 11:04:09,114 - ultron - INFO - start create yields data


Unnamed: 0,trade_date,code,nxt1_ret
0,2021-03-01,600000,-0.010454
45,2021-03-01,600009,-0.024293
90,2021-03-01,600016,-0.005817
135,2021-03-01,600028,-0.04249
180,2021-03-01,600030,-0.018164


In [14]:
### 收益率+因子进行去极值 标准化
factors_data = gentic.factors_normal(factors_data)
factors_data.head()

2023-03-27 11:05:49,955 - ultron - INFO - start factors normal


Unnamed: 0,trade_date,code,CCI10,ROC20,RSI,VOL240,aiDaNp60,aiDaPE60,aiEtopZ180,aiSude,hkHoldRatioAll,hkHoldRatioB,hkHoldVolChgB120,lpnpQ
0,2021-03-01,600000,-0.939564,1.362608,0.17108,-0.869477,-0.86242,-1.620156,-2.459725,-0.817891,-0.695902,-0.684572,0.6703,0.133576
1,2021-03-01,600009,0.719682,-1.343814,0.275447,0.260389,3.7565,-3.266156,0.025443,-3.930504,3.00514,1.31776,-0.185906,-2.066765
2,2021-03-01,600016,-0.283374,0.448507,0.325778,-0.787005,-0.343856,0.028359,0.964561,-1.327078,-0.644832,-0.475087,0.483636,-1.86515
3,2021-03-01,600028,0.696506,1.910287,1.060053,-0.902466,0.32972,0.344082,0.308239,-0.201632,-0.818141,-0.560797,2.205178,1.984341
4,2021-03-01,600030,-2.744249,-1.284321,-1.4774,0.318119,-0.703784,-0.678909,-0.627342,0.3116,-0.322502,-0.654825,-0.8247,0.32749


In [15]:
total_data = factors_data.merge(
    yileds_data, on=['trade_date', 'code'])
total_data.head()

Unnamed: 0,trade_date,code,CCI10,ROC20,RSI,VOL240,aiDaNp60,aiDaPE60,aiEtopZ180,aiSude,hkHoldRatioAll,hkHoldRatioB,hkHoldVolChgB120,lpnpQ,nxt1_ret
0,2021-03-01,600000,-0.939564,1.362608,0.17108,-0.869477,-0.86242,-1.620156,-2.459725,-0.817891,-0.695902,-0.684572,0.6703,0.133576,-0.010454
1,2021-03-01,600009,0.719682,-1.343814,0.275447,0.260389,3.7565,-3.266156,0.025443,-3.930504,3.00514,1.31776,-0.185906,-2.066765,-0.024293
2,2021-03-01,600016,-0.283374,0.448507,0.325778,-0.787005,-0.343856,0.028359,0.964561,-1.327078,-0.644832,-0.475087,0.483636,-1.86515,-0.005817
3,2021-03-01,600028,0.696506,1.910287,1.060053,-0.902466,0.32972,0.344082,0.308239,-0.201632,-0.818141,-0.560797,2.205178,1.984341,-0.04249
4,2021-03-01,600030,-2.744249,-1.284321,-1.4774,0.318119,-0.703784,-0.678909,-0.627342,0.3116,-0.322502,-0.654825,-0.8247,0.32749,-0.018164


In [16]:
dummy_data = gentic.dummy_data(
    begin_date=begin_date, end_date=end_date)
dummy_data.head()

2023-03-27 11:06:36,646 - ultron - INFO - start fetch dummy data


Unnamed: 0,trade_date,code
0,2021-03-01,600000
1,2021-03-01,600009
2,2021-03-01,600016
3,2021-03-01,600028
4,2021-03-01,600031


In [17]:
total_data = total_data.merge(dummy_data, on=[
            'trade_date', 'code'
        ])
total_data.head()

Unnamed: 0,trade_date,code,CCI10,ROC20,RSI,VOL240,aiDaNp60,aiDaPE60,aiEtopZ180,aiSude,hkHoldRatioAll,hkHoldRatioB,hkHoldVolChgB120,lpnpQ,nxt1_ret
0,2021-03-01,600000,-0.939564,1.362608,0.17108,-0.869477,-0.86242,-1.620156,-2.459725,-0.817891,-0.695902,-0.684572,0.6703,0.133576,-0.010454
1,2021-03-01,600009,0.719682,-1.343814,0.275447,0.260389,3.7565,-3.266156,0.025443,-3.930504,3.00514,1.31776,-0.185906,-2.066765,-0.024293
2,2021-03-01,600016,-0.283374,0.448507,0.325778,-0.787005,-0.343856,0.028359,0.964561,-1.327078,-0.644832,-0.475087,0.483636,-1.86515,-0.005817
3,2021-03-01,600028,0.696506,1.910287,1.060053,-0.902466,0.32972,0.344082,0.308239,-0.201632,-0.818141,-0.560797,2.205178,1.984341,-0.04249
4,2021-03-01,600031,-0.182755,0.555507,-0.100298,-0.168466,-0.119981,-0.957314,-2.146686,0.448007,1.211716,0.314703,-0.458019,-0.050491,-0.017044


#### 挖掘因子

In [19]:
evolution_sets = gentic.calculate_result(total_data=total_data,
                                     configure=configure,
                                     custom_params=None)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.




[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    5.9s remaining:    5.9s




[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    6.6s finished
2023-03-27 11:08:28,485 - ultron - INFO - ExpendTime:6.627854,Generation:0,Tournament:20, Fitness Mean:3.971363,Fitness Max:11.903532,Fitness Min:1.489015




[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.




[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    6.1s remaining:    6.1s




[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    7.3s finished
2023-03-27 11:08:35,779 - ultron - INFO - ExpendTime:7.292835,Generation:1,Tournament:20, Fitness Mean:7.329111,Fitness Max:23.941481,Fitness Min:2.478728
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.




[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    8.1s remaining:    8.1s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    8.7s finished
2023-03-27 11:08:44,449 - ultron - INFO - ExpendTime:8.668575,Generation:2,Tournament:20, Fitness Mean:8.316611,Fitness Max:23.941481,Fitness Min:4.101271


In [20]:
def _create_program(best_programs):
    result = [best_program.output() for best_program in best_programs]
    result = pd.DataFrame(result)
    result.sort_values(by='fitness', ascending=False, inplace=True)
    result['ranking'] = result.rank(ascending=False)['fitness']
    result.rename(columns={'name': 'fid'}, inplace=True)
    return result

In [21]:
def _create_metrics(best_programs):
    def _transform_metrics(program):
        res = []
        for c in ['short', 'long', 'both']:
            metrics_data = program._retain_data.__getattribute__(
                        '{0}_evaluate'.format(c))._asdict()
            metrics_data = pd.DataFrame([metrics_data
                                                 ]).drop(['category'], axis=1)
            new_name = zip(metrics_data.columns, [
                        "{0}_{1}".format(c, col)
                        for col in metrics_data.columns
                    ])
            metrics_data.rename(columns=dict(new_name), inplace=True)
            res.append(metrics_data)
        result = pd.concat(res, axis=1)
        result['direction'] = program._retain_data.direction
        result['fid'] = program._name
        return result.set_index('fid')

    results = [
                _transform_metrics(best_program)
                for best_program in best_programs
            ]
    return pd.concat(results, axis=0).reset_index()

In [24]:
programs = _create_program(evolution_sets.values())
metrics = _create_metrics(best_programs=evolution_sets.values())

In [25]:
programs

Unnamed: 0,fid,method,gen,features,formual,fitness,update_time,ranking
30,ultron_1679886514429459,Crossover,1,519b6bd4f8847cc44d0b449335eac4d6,"FLOOR(EMA(18,'VOL240'))",23.941481,2023-03-27 11:08:33.714723,1.0
33,ultron_1679886523061195,Crossover,2,320b556f36c62f89bcda5a12a085a842,"FLOOR(EMA(18,'hkHoldRatioAll'))",14.188003,2023-03-27 11:08:42.530592,2.0
29,ultron_1679886510371095,Crossover,1,8a49b8e67f391cd271b9b8dfa0fbde2b,"MADecay(2,'VOL240')",12.875609,2023-03-27 11:08:29.685542,3.0
19,ultron_1679886504932383,Gen,0,45ba0f5f8dd32de665a907ecbb047917,"MMAX(14,'VOL240')",11.903532,2023-03-27 11:08:24.466175,4.0
18,ultron_1679886507331448,Gen,0,a8fc8e57fc8acfde38ce189734bcd2be,FLOOR('hkHoldRatioAll'),11.470176,2023-03-27 11:08:27.165708,5.0
28,ultron_1679886512827561,Point Mutation,1,1ffc49b0c5fe333742c75109f10623fc,"MMAX(14,MMAX(14,'hkHoldRatioAll'))",8.489087,2023-03-27 11:08:31.913775,6.0
32,ultron_1679886523884145,Reproduction,2,da2bab48273d768905db4b5fc1a7e6de,"EMA(16,'hkHoldRatioAll')",8.250015,2023-03-27 11:08:42.942068,7.0
27,ultron_1679886515846153,Reproduction,1,13f2f8c6d8c5c918ec1a09ef3ebca6d4,"EMA(16,'aiDaPE60')",7.628184,2023-03-27 11:08:34.923071,8.0
26,ultron_1679886515068379,Point Mutation,1,4f444aa0151acd11e84c1d7790cd3159,"EMA(12,'hkHoldRatioAll')",7.580679,2023-03-27 11:08:34.534184,9.0
25,ultron_1679886511274021,Subtree Mutation,1,c2e70cd93785a71c25f94717f0ec0fd4,"MNPOSITIVE(14,RSI(4,MSUM(8,MNPOSITIVE(2,MVARIA...",6.582463,2023-03-27 11:08:30.637005,11.0


In [26]:
metrics

Unnamed: 0,fid,short_returns_mean,short_returns_std,short_sharp,short_turnover,short_maxdd,short_returns_mdd,short_win_rate,short_ic,short_ir,...,both_returns_std,both_sharp,both_turnover,both_maxdd,both_returns_mdd,both_win_rate,both_ic,both_ir,both_fitness,direction
0,ultron_1679886507571625,0.128543,0.082981,1.549062,0.068651,0.025165,5.108114,0.465116,0.01267927,0.04914245,...,0.236183,1.05839,0.126296,0.050924,4.908765,0.465116,0.012258,0.048511,1.489015,1
1,ultron_1679886505152669,-0.088955,0.175169,-0.507825,0.053233,0.077447,-1.14859,0.55814,0.0406904,0.1440582,...,0.262063,0.999648,0.111074,0.121716,2.152303,0.488372,0.010232,0.035735,1.535209,-1
2,ultron_1679886507803327,-0.173496,0.074504,-2.328684,0.187049,0.048575,-3.571742,0.418605,0.007145356,0.0390472,...,0.129755,2.084204,0.47934,0.055331,4.887572,0.534884,0.032763,0.213284,1.565491,1
3,ultron_1679886506680803,-0.300118,0.075178,-3.992114,0.659825,0.067972,-4.415341,0.372093,-0.08652621,-0.4129357,...,0.121007,3.027763,1.345411,0.038067,9.624592,0.534884,0.034071,0.236348,1.580012,1
4,ultron_1679886505861957,-0.022961,0.191936,-0.119626,0.045207,0.100174,-0.229207,0.488372,-0.01731655,-0.04970694,...,0.292066,0.912576,0.085184,0.139414,1.911808,0.488372,0.013045,0.048197,1.61423,-1
5,ultron_1679886507688449,-0.182934,0.088219,-2.073635,0.203039,0.064413,-2.840045,0.372093,0.02865366,0.1216129,...,0.13763,2.076166,0.4154,0.037306,7.659388,0.511628,-0.007816,-0.052667,1.721932,-1
6,ultron_1679886507124933,-0.237604,0.108053,-2.198945,0.20454,0.04653,-5.10651,0.372093,-0.033544,-0.1527681,...,0.136746,2.228318,0.414074,0.035759,8.521356,0.511628,0.018255,0.115338,1.911546,-1
7,ultron_1679886507249070,-0.275579,0.108297,-2.544667,0.196047,0.048362,-5.698297,0.372093,-0.01654747,-0.07351195,...,0.16433,2.03032,0.341204,0.041795,7.98292,0.465116,0.023125,0.129928,2.007699,-1
8,ultron_1679886505491893,-0.124121,0.115322,-1.076298,0.254128,0.056082,-2.2132,0.511628,0.03148097,0.1081746,...,0.165317,2.12856,0.323337,0.061121,5.757217,0.488372,0.027254,0.154393,2.220547,1
9,ultron_1679886506690535,-0.418406,0.107276,-3.900279,0.511019,0.076367,-5.478914,0.395349,-0.04521712,-0.1881072,...,0.169477,3.134004,0.935677,0.045813,11.593629,0.651163,0.014649,0.083115,2.361252,1
