In [9]:
%matplotlib inline

import os
import datetime as dt
import numpy as np 
import pandas as pd
from alphamind.api import *
from PyFin.api import *
from alphamind.model.modelbase import create_model_base
from m1_xgb import *

In [10]:
freq = '5b'
universe = Universe('zz500')
batch = 1
neutralized_risk = industry_styles
risk_model = 'short'
pre_process = [winsorize_normal, standardize]
post_process = [standardize]
warm_start = 3
data_source = 'postgresql+psycopg2://alpha:alpha@180.166.26.82:8889/alpha'
horizon = map_freq(freq)

engine = SqlEngine(data_source)

In [18]:
kernal_feature = 'ROE'
regress_features = {kernal_feature: LAST(kernal_feature),
                    kernal_feature + '_l1': SHIFT(kernal_feature, 1),
                    kernal_feature + '_l2': SHIFT(kernal_feature, 2),
                    kernal_feature + '_l3': SHIFT(kernal_feature, 3)
                   }
fit_target = [kernal_feature]

data_meta = DataMeta(freq=freq,
                     universe=universe,
                     batch=batch,
                     neutralized_risk=neutralized_risk,
                     risk_model=risk_model,
                     pre_process=pre_process,
                     post_process=post_process,
                     warm_start=warm_start,
                     data_source=data_source)

In [20]:
data_meta.neutralized_risk

['Bank',
 'RealEstate',
 'Health',
 'Transportation',
 'Mining',
 'NonFerMetal',
 'HouseApp',
 'LeiService',
 'MachiEquip',
 'BuildDeco',
 'CommeTrade',
 'CONMAT',
 'Auto',
 'Textile',
 'FoodBever',
 'Electronics',
 'Computer',
 'LightIndus',
 'Utilities',
 'Telecom',
 'AgriForest',
 'CHEM',
 'Media',
 'IronSteel',
 'NonBankFinan',
 'ELECEQP',
 'AERODEF',
 'Conglomerates']

In [12]:
class LinearRegressionXGB(create_model_base()):
    def __init__(self, features, fit_target, arg):
        super().__init__(features=features, fit_target=fit_target)
        self.impl = XGBooster(arg)

    @classmethod
    def load(cls, model_desc: dict):
        return super().load(model_desc)

    def save(self):
        model_desc = super().save()
        model_desc['weight'] = self.impl.result()
        return model_desc


In [13]:
regress_conf.xgb_config_r()
regression_model_xg = LinearRegressionXGB(features=regress_features, fit_target=fit_target, arg=regress_conf)
regression_composer_xg = Composer(alpha_model=regression_model_xg, data_meta=data_meta)

In [14]:
regress_conf.cv_folds

10

In [15]:
ref_date = '2011-01-01'
ref_date = adjustDateByCalendar('china.sse', ref_date).strftime('%Y-%m-%d')
print(ref_date)

2011-01-04


In [17]:
regression_composer_xg.train(ref_date)
print("Tensorflow Regression Testing IC: {0:.4f}".format(regression_composer_xg.ic(ref_date=ref_date)[0]))

cross_validation。。。。
[0]	train-rmse:1.10647	test-rmse:1.09378
[1]	train-rmse:1.09609	test-rmse:1.08355
[2]	train-rmse:1.08582	test-rmse:1.07353
[3]	train-rmse:1.07564	test-rmse:1.06358
[4]	train-rmse:1.06557	test-rmse:1.05364
[5]	train-rmse:1.05559	test-rmse:1.04385
[6]	train-rmse:1.04571	test-rmse:1.03407
[7]	train-rmse:1.03593	test-rmse:1.02448
[8]	train-rmse:1.02624	test-rmse:1.01492
[9]	train-rmse:1.01664	test-rmse:1.00547
[10]	train-rmse:1.00714	test-rmse:0.996143
[11]	train-rmse:0.997729	test-rmse:0.98705
[12]	train-rmse:0.988406	test-rmse:0.977898
[13]	train-rmse:0.979174	test-rmse:0.968823
[14]	train-rmse:0.970031	test-rmse:0.959916
[15]	train-rmse:0.960979	test-rmse:0.951216
[16]	train-rmse:0.952011	test-rmse:0.942369
[17]	train-rmse:0.94313	test-rmse:0.933722
[18]	train-rmse:0.934335	test-rmse:0.925102
[19]	train-rmse:0.925626	test-rmse:0.916629
[20]	train-rmse:0.916997	test-rmse:0.90836
[21]	train-rmse:0.908455	test-rmse:0.900172
[22]	train-rmse:0.899998	test-rmse:0.892061
[

[186]	train-rmse:0.200808	test-rmse:0.215583
[187]	train-rmse:0.199042	test-rmse:0.213815
[188]	train-rmse:0.197292	test-rmse:0.212109
[189]	train-rmse:0.195558	test-rmse:0.210375
[190]	train-rmse:0.19384	test-rmse:0.208678
[191]	train-rmse:0.192139	test-rmse:0.206995
[192]	train-rmse:0.190453	test-rmse:0.205331
[193]	train-rmse:0.188783	test-rmse:0.203714
[194]	train-rmse:0.187129	test-rmse:0.202075
[195]	train-rmse:0.18549	test-rmse:0.200451
[196]	train-rmse:0.183866	test-rmse:0.198848
[197]	train-rmse:0.182258	test-rmse:0.197297
[198]	train-rmse:0.180663	test-rmse:0.195717
[199]	train-rmse:0.179084	test-rmse:0.194184
[200]	train-rmse:0.17752	test-rmse:0.192644
[201]	train-rmse:0.175971	test-rmse:0.191112
[202]	train-rmse:0.174436	test-rmse:0.189564
[203]	train-rmse:0.172915	test-rmse:0.188057
[204]	train-rmse:0.171409	test-rmse:0.186569
[205]	train-rmse:0.169916	test-rmse:0.185081
[206]	train-rmse:0.168438	test-rmse:0.183619
[207]	train-rmse:0.166973	test-rmse:0.182182
[208]	train-r

[365]	train-rmse:0.0464939	test-rmse:0.0625518
[366]	train-rmse:0.0461651	test-rmse:0.0622299
[367]	train-rmse:0.0458391	test-rmse:0.0619109
[368]	train-rmse:0.0455164	test-rmse:0.0615951
[369]	train-rmse:0.0451968	test-rmse:0.0612836
[370]	train-rmse:0.0448803	test-rmse:0.0609735
[371]	train-rmse:0.0445669	test-rmse:0.0606668
[372]	train-rmse:0.0442568	test-rmse:0.0603647
[373]	train-rmse:0.0439496	test-rmse:0.0600659
[374]	train-rmse:0.0436453	test-rmse:0.0597686
[375]	train-rmse:0.0433441	test-rmse:0.05947
[376]	train-rmse:0.0430457	test-rmse:0.0591782
[377]	train-rmse:0.0427504	test-rmse:0.0588917
[378]	train-rmse:0.0424579	test-rmse:0.0586054
[379]	train-rmse:0.0421682	test-rmse:0.0583219
[380]	train-rmse:0.0418815	test-rmse:0.0580443
[381]	train-rmse:0.0415973	test-rmse:0.0577636
[382]	train-rmse:0.0413164	test-rmse:0.0574891
[383]	train-rmse:0.0410381	test-rmse:0.0572162
[384]	train-rmse:0.0407626	test-rmse:0.0569476
[385]	train-rmse:0.04049	test-rmse:0.0566813
[386]	train-rmse:

spend time :9.380418300628662(s)


TypeError: predict() missing 1 required positional argument: 'x_pred'

In [None]:
print(regress_conf.params)


In [None]:
import os
from PyFin.api import *
from alphamind.api import *


universe_name = 'zz1000'
formula = CSBottomN(LAST('ROE'), 20)
ref_date = '2019-02-13'

depends = formula.fields

engine = SqlEngine(data_source)
universe = Universe(universe_name)
codes = universe.query(engine, dates=[ref_date])

factors = engine.fetch_factor(ref_date, depends, codes.code.tolist())