In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pylab as plt
from scipy.stats import pearsonr

from hyperopt import fmin, tpe, hp, partial


def find_time_ind(df):
    day_ind = [0]
    day_temp = df["TradingDay"][0]
    for i in range(1, len(df)):
        if df["TradingDay"][i] != day_temp:
            day_ind.append(i)
            day_temp = df["TradingDay"][i]
    return day_ind

In [2]:
# input cell
# train = pd.read_csv('0805rolling5.csv')
train = pd.read_csv('08053before_rolling.csv')
# train = pd.read_csv('08052acou_and_factors.csv')
# x_column = ["factor"+str(i)+'_'+str(j) for i in range(0,17) for j in range(5)]
x_column = ["factor"+str(i) for i in range(0,17)]
# for i in range(12):
#     for j in range(5):
#         x_column.append("10M" + str(i) +'_'+str(j))
# for i in range(12):
#     x_column.append("10M" + str(i))

train = train.sort_values(by = ["TradingDay", "SecuCode"], ascending = [True, True])
train.reset_index(drop=True, inplace=True)
X = train[x_column]
target = ['NextReturnCate']
y = train[target]
y += 1
yp = train["NextReturn"]
time_ind = find_time_ind(train)

In [3]:
# initializing
train_days = 775
delta_days = -776
X_train = X[time_ind[0]:time_ind[train_days]]
X_test = X[time_ind[train_days]:time_ind[train_days+delta_days]]
y_train = y[time_ind[0]:time_ind[train_days]]
y_test = y[time_ind[train_days]:time_ind[train_days+delta_days]]
yp_train = yp[time_ind[0]:time_ind[train_days]]
yp_test = yp[time_ind[train_days]:time_ind[train_days+delta_days]]
dtrain = xgb.DMatrix(data=X_train, label=y_train)
dtest = xgb.DMatrix(data=X_test)
ddev = xgb.DMatrix(data=X_test, label=y_test)

evallist = [(ddev, 'eval'), (dtrain, 'train')]


In [4]:
# 自定义hyperopt的参数空间
space = {"max_depth": hp.randint("max_depth", 25),
         "n_estimators": hp.randint("n_estimators", 30),
         'learning_rate': hp.uniform('learning_rate', 1e-3, 5e-1),
         "subsample": hp.randint("subsample", 5),
         "min_child_weight": hp.randint("min_child_weight", 6),
         "alpha": hp.uniform("alpha", 0, 1),
         "lambda": hp.uniform("lambda", 0, 1)
         }

def argsDict_tranform(argsDict, isPrint=False):
    argsDict["max_depth"] = argsDict["max_depth"] + 5    # from 5 to 30
    argsDict['n_estimators'] = argsDict['n_estimators'] + 10  # from 10 to 40
    argsDict["learning_rate"] = argsDict["learning_rate"] * 0.02 + 0.001   # from 0.001 to 0.01
    argsDict["subsample"] = argsDict["subsample"] * 0.1 + 0.5  # from 0.5 to 1
    argsDict["min_child_weight"] = argsDict["min_child_weight"] + 1 # from 1 to 6
    argsDict["alpha"] = argsDict["alpha"] * 20
    argsDict["lambda"] = argsDict["lambda"] * 50
    if isPrint:
        print(argsDict)
    else:
        pass

    return argsDict

In [5]:
from sklearn.metrics import mean_squared_error, zero_one_loss

def xgboost_factory(argsDict):
    argsDict = argsDict_tranform(argsDict)
    
    params = {'nthread': -1,  # 进程数
              'max_depth': argsDict['max_depth'],  # 最大深度
              'n_estimators': argsDict['n_estimators'],  # 树的数量
              'eta': argsDict['learning_rate'],  # 学习率
              'subsample': argsDict['subsample'],  # 采样数
              'min_child_weight': argsDict['min_child_weight'],  # 终点节点最小样本占比的和
              'objective': 'reg:squarederror',
              'silent': 0,  # 是否显示
              'gamma': 0,  # 是否后剪枝
              'colsample_bytree': 0.7,  # 样本列采样
              'alpha': argsDict['alpha'],  # L1 正则化
              'lambda': argsDict['lambda'],  # L2 正则化
              'scale_pos_weight': 0,  # 取值>0时,在数据不平衡时有助于收敛
              'seed': 100,  # 随机种子
              'tree_method':'exact'
              }

    xrf = xgb.train(params, dtrain, params['n_estimators'], evallist,early_stopping_rounds=100)

    return get_tranformer_score(xrf)


def get_tranformer_score(tranformer):
    
    xrf = tranformer
    pred = xrf.predict(ddev, ntree_limit=xrf.best_ntree_limit)
  
    return -pearsonr(np.array(yp_test), pred)[0]

In [6]:
# 开始使用hyperopt进行自动调参
algo = partial(tpe.suggest, n_startup_jobs=1)
best = fmin(xgboost_factory, space, algo=algo, max_evals=100, pass_expr_memo_ctrl=None)
store = best
print("the best parameter before transformation is:" + best)
print("the best parameter after transformation is:" + argsDict_tranform(best))


[0]	eval-rmse:0.954875	train-rmse:0.93093            

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.

[1]	eval-rmse:0.953518	train-rmse:0.929155           

[2]	eval-rmse:0.95216	train-rmse:0.927418            

[3]	eval-rmse:0.950901	train-rmse:0.926101           

[4]	eval-rmse:0.94945	train-rmse:0.924519            

[5]	eval-rmse:0.948124	train-rmse:0.922813           

[6]	eval-rmse:0.946781	train-rmse:0.921258           

[7]	eval-rmse:0.945502	train-rmse:0.91983            

[8]	eval-rmse:0.944165	train-rmse:0.918299           

[9]	eval-rmse:0.942875	train-rmse:0.916808           

[10]	eval-rmse:0.941597	train-rmse:0.915348          

[11]	eval-rmse:0.940298	train-rmse:0.913958          

[12]	eval-rmse:0.939008	train-rmse:0.912558          

[13]	eval-rmse:0.937879	train-rmse:0.911165          

[14]	eval-rmse:0.936715	train-rmse:0.909794          

[15]	eval-rmse:0.935441	tra

[4]	eval-rmse:0.954744	train-rmse:0.931056                                        

[5]	eval-rmse:0.954369	train-rmse:0.930703                                        

[6]	eval-rmse:0.954084	train-rmse:0.93049                                         

[7]	eval-rmse:0.953874	train-rmse:0.930251                                        

[8]	eval-rmse:0.953717	train-rmse:0.929924                                        

[9]	eval-rmse:0.953516	train-rmse:0.929459                                        

[10]	eval-rmse:0.95328	train-rmse:0.928993                                        

[11]	eval-rmse:0.952967	train-rmse:0.928663                                       

[12]	eval-rmse:0.952644	train-rmse:0.92834                                        

[13]	eval-rmse:0.952335	train-rmse:0.927971                                       

[14]	eval-rmse:0.952069	train-rmse:0.927637                                       

[15]	eval-rmse:0.951762	train-rmse:0.927268                                 

[10]	eval-rmse:0.947122	train-rmse:0.921781                                       

[11]	eval-rmse:0.946257	train-rmse:0.920886                                       

[12]	eval-rmse:0.945461	train-rmse:0.920035                                       

[13]	eval-rmse:0.94471	train-rmse:0.919154                                        

[14]	eval-rmse:0.943928	train-rmse:0.918261                                       

[15]	eval-rmse:0.943116	train-rmse:0.917325                                       

[16]	eval-rmse:0.942337	train-rmse:0.916406                                       

[17]	eval-rmse:0.941602	train-rmse:0.915516                                       

[18]	eval-rmse:0.940768	train-rmse:0.914679                                       

[0]	eval-rmse:0.954251	train-rmse:0.930368                                        

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.              

[17]	eval-rmse:0.924885	train-rmse:0.898777                                        

[18]	eval-rmse:0.923373	train-rmse:0.89726                                         

[19]	eval-rmse:0.92191	train-rmse:0.895783                                         

[20]	eval-rmse:0.920445	train-rmse:0.894279                                        

[21]	eval-rmse:0.918909	train-rmse:0.892736                                        

[22]	eval-rmse:0.91742	train-rmse:0.8912                                           

[0]	eval-rmse:0.956121	train-rmse:0.932138                                         

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                         

[1]	eval-rmse:0.955544	train-rmse:0.931684                                         

[2]	eval-rmse:0.95463	train-rmse:0.930853                                          

[3]	eval-rmse:0.954117	train-rmse:0.930394                    

Will train until train-rmse hasn't improved in 100 rounds.                         

[1]	eval-rmse:0.954005	train-rmse:0.929965                                         

[2]	eval-rmse:0.953011	train-rmse:0.928372                                         

[3]	eval-rmse:0.95188	train-rmse:0.92705                                           

[4]	eval-rmse:0.950778	train-rmse:0.925963                                         

[5]	eval-rmse:0.949669	train-rmse:0.924682                                         

[6]	eval-rmse:0.948595	train-rmse:0.923293                                         

[7]	eval-rmse:0.947489	train-rmse:0.921914                                         

[8]	eval-rmse:0.946469	train-rmse:0.920731                                         

[9]	eval-rmse:0.945376	train-rmse:0.919571                                         

[10]	eval-rmse:0.944342	train-rmse:0.918309                                        

[11]	eval-rmse:0.94325	train-rmse:0.917057                       

[14]	eval-rmse:0.941724	train-rmse:0.902751                                        

[15]	eval-rmse:0.940783	train-rmse:0.900827                                        

[16]	eval-rmse:0.939839	train-rmse:0.898976                                        

[17]	eval-rmse:0.93897	train-rmse:0.897106                                         

[18]	eval-rmse:0.938082	train-rmse:0.895219                                        

[19]	eval-rmse:0.937195	train-rmse:0.893319                                        

[20]	eval-rmse:0.936324	train-rmse:0.891462                                        

[21]	eval-rmse:0.935448	train-rmse:0.889593                                        

[22]	eval-rmse:0.934581	train-rmse:0.88768                                         

[23]	eval-rmse:0.933683	train-rmse:0.885746                                        

[24]	eval-rmse:0.93278	train-rmse:0.883961                                         

[25]	eval-rmse:0.931922	train-rmse:0.882229                      

[3]	eval-rmse:0.955188	train-rmse:0.931211                                         

[4]	eval-rmse:0.954667	train-rmse:0.930741                                         

[5]	eval-rmse:0.954321	train-rmse:0.930431                                         

[6]	eval-rmse:0.954063	train-rmse:0.930085                                         

[7]	eval-rmse:0.953826	train-rmse:0.929611                                         

[8]	eval-rmse:0.953564	train-rmse:0.929097                                         

[9]	eval-rmse:0.953307	train-rmse:0.928649                                         

[10]	eval-rmse:0.953008	train-rmse:0.928225                                        

[11]	eval-rmse:0.952672	train-rmse:0.927799                                        

[12]	eval-rmse:0.952343	train-rmse:0.927393                                        

[13]	eval-rmse:0.95206	train-rmse:0.927033                                         

[14]	eval-rmse:0.951782	train-rmse:0.926734                      

[6]	eval-rmse:0.95339	train-rmse:0.929211                                          

[7]	eval-rmse:0.952894	train-rmse:0.928669                                         

[8]	eval-rmse:0.952422	train-rmse:0.928177                                         

[9]	eval-rmse:0.951906	train-rmse:0.927538                                         

[10]	eval-rmse:0.95152	train-rmse:0.927174                                         

[11]	eval-rmse:0.951119	train-rmse:0.926896                                        

[12]	eval-rmse:0.950696	train-rmse:0.926607                                        

[13]	eval-rmse:0.950316	train-rmse:0.92625                                         

[14]	eval-rmse:0.949866	train-rmse:0.925828                                        

[15]	eval-rmse:0.949441	train-rmse:0.925378                                        

[16]	eval-rmse:0.948959	train-rmse:0.92488                                         

[17]	eval-rmse:0.948522	train-rmse:0.924414                      

[11]	eval-rmse:0.941629	train-rmse:0.915265                                      

[12]	eval-rmse:0.940427	train-rmse:0.913967                                      

[13]	eval-rmse:0.939427	train-rmse:0.91266                                       

[14]	eval-rmse:0.938384	train-rmse:0.911398                                      

[15]	eval-rmse:0.937224	train-rmse:0.910098                                      

[16]	eval-rmse:0.936104	train-rmse:0.908794                                      

[17]	eval-rmse:0.934995	train-rmse:0.907507                                      

[18]	eval-rmse:0.933874	train-rmse:0.90627                                       

[19]	eval-rmse:0.932802	train-rmse:0.905026                                      

[20]	eval-rmse:0.931767	train-rmse:0.903728                                      

[0]	eval-rmse:0.95443	train-rmse:0.930764                                        

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.



[20]	eval-rmse:0.917042	train-rmse:0.891113                                      

[21]	eval-rmse:0.915598	train-rmse:0.889371                                      

[22]	eval-rmse:0.914054	train-rmse:0.887727                                      

[23]	eval-rmse:0.912471	train-rmse:0.88605                                       

[24]	eval-rmse:0.911002	train-rmse:0.884452                                      

[25]	eval-rmse:0.909598	train-rmse:0.882932                                      

[26]	eval-rmse:0.908155	train-rmse:0.881366                                      

[27]	eval-rmse:0.906711	train-rmse:0.87992                                       

[28]	eval-rmse:0.90535	train-rmse:0.878448                                       

[29]	eval-rmse:0.904082	train-rmse:0.877029                                      

[30]	eval-rmse:0.902849	train-rmse:0.875635                                      

[31]	eval-rmse:0.901499	train-rmse:0.8742                                        

[32]

[4]	eval-rmse:0.945245	train-rmse:0.920229                                       

[5]	eval-rmse:0.942918	train-rmse:0.917767                                       

[6]	eval-rmse:0.940853	train-rmse:0.915502                                       

[7]	eval-rmse:0.938772	train-rmse:0.913428                                       

[8]	eval-rmse:0.936857	train-rmse:0.91132                                        

[9]	eval-rmse:0.934765	train-rmse:0.909095                                       

[10]	eval-rmse:0.932741	train-rmse:0.906982                                      

[11]	eval-rmse:0.930844	train-rmse:0.904837                                      

[0]	eval-rmse:0.955639	train-rmse:0.93175                                        

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                       

[1]	eval-rmse:0.954263	train-rmse:0.930506                                       


[16]	eval-rmse:0.945826	train-rmse:0.920121                                      

[17]	eval-rmse:0.945211	train-rmse:0.919475                                      

[18]	eval-rmse:0.944574	train-rmse:0.918841                                      

[19]	eval-rmse:0.943966	train-rmse:0.918167                                      

[20]	eval-rmse:0.943381	train-rmse:0.917454                                      

[0]	eval-rmse:0.954525	train-rmse:0.929699                                       

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                       

[1]	eval-rmse:0.952758	train-rmse:0.926973                                       

[2]	eval-rmse:0.950953	train-rmse:0.924174                                       

[3]	eval-rmse:0.949185	train-rmse:0.921488                                       

[4]	eval-rmse:0.947223	train-rmse:0.918861                                       


[4]	eval-rmse:0.947492	train-rmse:0.922633                                       

[5]	eval-rmse:0.945787	train-rmse:0.920813                                       

[6]	eval-rmse:0.944149	train-rmse:0.919093                                       

[7]	eval-rmse:0.942391	train-rmse:0.917113                                       

[8]	eval-rmse:0.940799	train-rmse:0.91542                                        

[9]	eval-rmse:0.93921	train-rmse:0.913773                                        

[10]	eval-rmse:0.937625	train-rmse:0.912098                                      

[11]	eval-rmse:0.936054	train-rmse:0.910389                                      

[12]	eval-rmse:0.934341	train-rmse:0.908633                                      

[13]	eval-rmse:0.932876	train-rmse:0.906985                                      

[14]	eval-rmse:0.931493	train-rmse:0.905377                                      

[15]	eval-rmse:0.930077	train-rmse:0.903682                                      

[16]

[27]	eval-rmse:0.926148	train-rmse:0.89703                                       

[28]	eval-rmse:0.925262	train-rmse:0.895884                                      

[29]	eval-rmse:0.924368	train-rmse:0.8948                                        

[30]	eval-rmse:0.923446	train-rmse:0.893678                                      

[0]	eval-rmse:0.954726	train-rmse:0.930933                                         

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                         

[1]	eval-rmse:0.953512	train-rmse:0.929217                                         

[2]	eval-rmse:0.952099	train-rmse:0.927478                                         

[3]	eval-rmse:0.950841	train-rmse:0.926208                                         

[4]	eval-rmse:0.94939	train-rmse:0.924709                                          

[5]	eval-rmse:0.948049	train-rmse:0.923035                            

[6]	eval-rmse:0.949039	train-rmse:0.924917                                       

[7]	eval-rmse:0.948083	train-rmse:0.923694                                       

[8]	eval-rmse:0.947069	train-rmse:0.922413                                       

[9]	eval-rmse:0.946061	train-rmse:0.921485                                       

[10]	eval-rmse:0.945306	train-rmse:0.920677                                      

[11]	eval-rmse:0.944258	train-rmse:0.919746                                      

[12]	eval-rmse:0.943048	train-rmse:0.918421                                      

[13]	eval-rmse:0.942109	train-rmse:0.917342                                      

[14]	eval-rmse:0.941266	train-rmse:0.916401                                      

[15]	eval-rmse:0.940232	train-rmse:0.915433                                      

[16]	eval-rmse:0.939414	train-rmse:0.914557                                      

[17]	eval-rmse:0.9386	train-rmse:0.91368                                         

[18]

[0]	eval-rmse:0.955169	train-rmse:0.930934                                       

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                       

[1]	eval-rmse:0.954152	train-rmse:0.929002                                       

[2]	eval-rmse:0.953121	train-rmse:0.927268                                       

[3]	eval-rmse:0.952043	train-rmse:0.925649                                       

[4]	eval-rmse:0.950868	train-rmse:0.923897                                       

[5]	eval-rmse:0.949819	train-rmse:0.922177                                       

[6]	eval-rmse:0.948715	train-rmse:0.920467                                       

[7]	eval-rmse:0.947698	train-rmse:0.918734                                       

[8]	eval-rmse:0.946649	train-rmse:0.917085                                       

[9]	eval-rmse:0.945617	train-rmse:0.91548                                        


[6]	eval-rmse:0.950572	train-rmse:0.926433                                       

[7]	eval-rmse:0.949652	train-rmse:0.925617                                       

[8]	eval-rmse:0.948817	train-rmse:0.924715                                       

[9]	eval-rmse:0.948248	train-rmse:0.923788                                       

[10]	eval-rmse:0.947172	train-rmse:0.922688                                      

[11]	eval-rmse:0.946284	train-rmse:0.921838                                      

[12]	eval-rmse:0.945578	train-rmse:0.921112                                      

[13]	eval-rmse:0.944924	train-rmse:0.920434                                      

[14]	eval-rmse:0.9441	train-rmse:0.919624                                        

[15]	eval-rmse:0.943187	train-rmse:0.918676                                      

[16]	eval-rmse:0.942428	train-rmse:0.917846                                      

[17]	eval-rmse:0.941675	train-rmse:0.917002                                      

[18]

[25]	eval-rmse:0.931941	train-rmse:0.901622                                      

[26]	eval-rmse:0.931088	train-rmse:0.900469                                      

[27]	eval-rmse:0.930326	train-rmse:0.899371                                      

[28]	eval-rmse:0.929527	train-rmse:0.898272                                      

[29]	eval-rmse:0.928771	train-rmse:0.897186                                      

[30]	eval-rmse:0.927996	train-rmse:0.896062                                      

[31]	eval-rmse:0.927202	train-rmse:0.895004                                      

[32]	eval-rmse:0.926474	train-rmse:0.893953                                      

[33]	eval-rmse:0.925664	train-rmse:0.892931                                      

[34]	eval-rmse:0.924933	train-rmse:0.891917                                      

[0]	eval-rmse:0.954936	train-rmse:0.93099                                        

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.



[18]	eval-rmse:0.947029	train-rmse:0.922386                                        

[19]	eval-rmse:0.946567	train-rmse:0.92195                                         

[20]	eval-rmse:0.946017	train-rmse:0.921455                                        

[0]	eval-rmse:0.956138	train-rmse:0.932177                                         

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                         

[1]	eval-rmse:0.955696	train-rmse:0.931853                                         

[2]	eval-rmse:0.954685	train-rmse:0.931009                                         

[3]	eval-rmse:0.95413	train-rmse:0.930578                                          

[4]	eval-rmse:0.953793	train-rmse:0.930204                                         

[5]	eval-rmse:0.953497	train-rmse:0.929456                                         

[6]	eval-rmse:0.953071	train-rmse:0.928812                    

[3]	eval-rmse:0.955125	train-rmse:0.931456                                         

[4]	eval-rmse:0.95447	train-rmse:0.930823                                          

[5]	eval-rmse:0.954138	train-rmse:0.930594                                         

[6]	eval-rmse:0.953897	train-rmse:0.930358                                         

[7]	eval-rmse:0.953734	train-rmse:0.929986                                         

[8]	eval-rmse:0.953541	train-rmse:0.929458                                         

[9]	eval-rmse:0.953274	train-rmse:0.92903                                          

[10]	eval-rmse:0.952854	train-rmse:0.928622                                        

[11]	eval-rmse:0.95251	train-rmse:0.928251                                         

[12]	eval-rmse:0.952153	train-rmse:0.927825                                        

[13]	eval-rmse:0.95183	train-rmse:0.92741                                          

[14]	eval-rmse:0.95159	train-rmse:0.927167                       

[1]	eval-rmse:0.954651	train-rmse:0.930835                                         

[2]	eval-rmse:0.954008	train-rmse:0.930034                                         

[3]	eval-rmse:0.953361	train-rmse:0.92894                                          

[4]	eval-rmse:0.952484	train-rmse:0.927935                                         

[5]	eval-rmse:0.951738	train-rmse:0.927119                                         

[6]	eval-rmse:0.951041	train-rmse:0.9265                                           

[7]	eval-rmse:0.950331	train-rmse:0.92575                                          

[8]	eval-rmse:0.949563	train-rmse:0.924878                                         

[9]	eval-rmse:0.9488	train-rmse:0.923994                                           

[10]	eval-rmse:0.948085	train-rmse:0.923023                                        

[11]	eval-rmse:0.947322	train-rmse:0.922149                                        

[12]	eval-rmse:0.946554	train-rmse:0.921346                      

[5]	eval-rmse:0.953651	train-rmse:0.929688                                         

[6]	eval-rmse:0.953286	train-rmse:0.928985                                         

[7]	eval-rmse:0.95285	train-rmse:0.928423                                          

[8]	eval-rmse:0.952352	train-rmse:0.927889                                         

[9]	eval-rmse:0.951905	train-rmse:0.927345                                         

[10]	eval-rmse:0.951512	train-rmse:0.92691                                         

[11]	eval-rmse:0.95108	train-rmse:0.926582                                         

[12]	eval-rmse:0.95067	train-rmse:0.926244                                         

[13]	eval-rmse:0.950292	train-rmse:0.925823                                        

[0]	eval-rmse:0.956279	train-rmse:0.932306                                         

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.    

[13]	eval-rmse:0.946731	train-rmse:0.921819                                        

[14]	eval-rmse:0.946063	train-rmse:0.921122                                        

[15]	eval-rmse:0.945466	train-rmse:0.920486                                        

[16]	eval-rmse:0.944867	train-rmse:0.91986                                         

[17]	eval-rmse:0.944216	train-rmse:0.919201                                        

[18]	eval-rmse:0.943505	train-rmse:0.918501                                        

[19]	eval-rmse:0.942804	train-rmse:0.917741                                        

[20]	eval-rmse:0.94216	train-rmse:0.916957                                         

[21]	eval-rmse:0.941595	train-rmse:0.916227                                        

[22]	eval-rmse:0.940897	train-rmse:0.915534                                        

[23]	eval-rmse:0.940252	train-rmse:0.914878                                        

[24]	eval-rmse:0.939585	train-rmse:0.914235                      

[16]	eval-rmse:0.942224	train-rmse:0.917011                                        

[17]	eval-rmse:0.941437	train-rmse:0.916115                                        

[18]	eval-rmse:0.940589	train-rmse:0.915301                                        

[19]	eval-rmse:0.93978	train-rmse:0.914508                                         

[20]	eval-rmse:0.939028	train-rmse:0.913722                                        

[21]	eval-rmse:0.938296	train-rmse:0.91296                                         

[22]	eval-rmse:0.937507	train-rmse:0.912155                                        

[23]	eval-rmse:0.936758	train-rmse:0.911313                                        

[24]	eval-rmse:0.93603	train-rmse:0.910483                                         

[0]	eval-rmse:0.955869	train-rmse:0.931957                                         

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.    

[9]	eval-rmse:0.951438	train-rmse:0.926366                                         

[10]	eval-rmse:0.950958	train-rmse:0.925833                                        

[11]	eval-rmse:0.950443	train-rmse:0.925249                                        

[12]	eval-rmse:0.949861	train-rmse:0.924629                                        

[13]	eval-rmse:0.949417	train-rmse:0.924014                                        

[14]	eval-rmse:0.948962	train-rmse:0.923393                                        

[15]	eval-rmse:0.948494	train-rmse:0.922751                                        

[16]	eval-rmse:0.948025	train-rmse:0.922136                                        

[17]	eval-rmse:0.947573	train-rmse:0.921543                                        

[18]	eval-rmse:0.947075	train-rmse:0.920985                                        

[19]	eval-rmse:0.946578	train-rmse:0.920431                                        

[20]	eval-rmse:0.946129	train-rmse:0.919869                      

[2]	eval-rmse:0.952331	train-rmse:0.927888                                         

[3]	eval-rmse:0.951077	train-rmse:0.926673                                         

[4]	eval-rmse:0.949646	train-rmse:0.925319                                         

[5]	eval-rmse:0.948373	train-rmse:0.923737                                         

[6]	eval-rmse:0.947093	train-rmse:0.922168                                         

[7]	eval-rmse:0.945893	train-rmse:0.920859                                         

[8]	eval-rmse:0.944606	train-rmse:0.919525                                         

[9]	eval-rmse:0.94325	train-rmse:0.918087                                          

[10]	eval-rmse:0.942026	train-rmse:0.916734                                        

[11]	eval-rmse:0.94074	train-rmse:0.915402                                         

[12]	eval-rmse:0.939487	train-rmse:0.91414                                         

[13]	eval-rmse:0.938369	train-rmse:0.912893                      

[14]	eval-rmse:0.94279	train-rmse:0.917774                                         

[15]	eval-rmse:0.94194	train-rmse:0.916784                                         

[16]	eval-rmse:0.94119	train-rmse:0.915878                                         

[17]	eval-rmse:0.94027	train-rmse:0.914974                                         

[18]	eval-rmse:0.939398	train-rmse:0.914149                                        

[19]	eval-rmse:0.938591	train-rmse:0.913356                                        

[20]	eval-rmse:0.937778	train-rmse:0.912498                                        

[0]	eval-rmse:0.955884	train-rmse:0.931975                                         

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                         

[1]	eval-rmse:0.954812	train-rmse:0.931008                                         

[2]	eval-rmse:0.954096	train-rmse:0.930336                    

TypeError: must be str, not dict

In [7]:
best

{'alpha': 0.5044419515831545,
 'lambda': 0.023551513057876713,
 'learning_rate': 0.049121011680094875,
 'max_depth': 2,
 'min_child_weight': 0,
 'n_estimators': 11,
 'subsample': 0}

In [8]:
argsDict_tranform(best)

{'alpha': 10.08883903166309,
 'lambda': 1.1775756528938357,
 'learning_rate': 0.0019824202336018977,
 'max_depth': 7,
 'min_child_weight': 1,
 'n_estimators': 21,
 'subsample': 0.5}

In [12]:
best

{'alpha': 1216.9753467453504,
 'lambda': 1078.9878530966914,
 'learning_rate': 0.0010237485529570754,
 'max_depth': 17,
 'min_child_weight': 7,
 'n_estimators': 49,
 'subsample': 0.558}

In [11]:
def xgboost_factory_notrans(argsDict):
    
    params = {'nthread': -1,  # 进程数
              'max_depth': argsDict['max_depth'],  # 最大深度
              'n_estimators': argsDict['n_estimators'],  # 树的数量
              'eta': argsDict['learning_rate'],  # 学习率
              'subsample': argsDict['subsample'],  # 采样数
              'min_child_weight': argsDict['min_child_weight'],  # 终点节点最小样本占比的和
              'objective': 'reg:squarederror',
              'silent': 0,  # 是否显示
              'gamma': 0,  # 是否后剪枝
              'colsample_bytree': 0.7,  # 样本列采样
              'alpha': argsDict['alpha'],  # L1 正则化
              'lambda': argsDict['lambda'],  # L2 正则化
              'scale_pos_weight': 0,  # 取值>0时,在数据不平衡时有助于收敛
              'seed': 100,  # 随机种子
              'tree_method':'exact'
              }

    xrf = xgb.train(params, dtrain, params['n_estimators'], evallist,early_stopping_rounds=100)

    return get_tranformer_score(xrf)

xgboost_factory_notrans(best)

[0]	eval-rmse:0.956294	train-rmse:0.932323
Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.

Will train until train-rmse hasn't improved in 100 rounds.
[1]	eval-rmse:0.956204	train-rmse:0.932242
[2]	eval-rmse:0.955814	train-rmse:0.931945
[3]	eval-rmse:0.955778	train-rmse:0.931912
[4]	eval-rmse:0.955707	train-rmse:0.931841
[5]	eval-rmse:0.954407	train-rmse:0.930852
[6]	eval-rmse:0.954307	train-rmse:0.930755
[7]	eval-rmse:0.954263	train-rmse:0.930723
[8]	eval-rmse:0.953948	train-rmse:0.93052
[9]	eval-rmse:0.953839	train-rmse:0.930419
[10]	eval-rmse:0.953808	train-rmse:0.930382
[11]	eval-rmse:0.953647	train-rmse:0.929275
[12]	eval-rmse:0.953591	train-rmse:0.929216
[13]	eval-rmse:0.953194	train-rmse:0.928925
[14]	eval-rmse:0.95316	train-rmse:0.92889
[15]	eval-rmse:0.953073	train-rmse:0.928812
[16]	eval-rmse:0.951963	train-rmse:0.927507
[17]	eval-rmse:0.951928	train-rmse:0.927474
[18]	eval-rmse:0.951848	train-rmse:0.927389
[19]	eval-rmse:0.951454	train-r

-0.03290653323531516

In [31]:
bst=xgb.train(params,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3)


[0]	train-rmse:0.926187	test-rmse:0.896756


In [36]:
xrf = xgb.train(params, dtrain,num_boost_round=1,early_stopping_rounds=None)

In [None]:
# first time training

bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3)
pred = bst.predict(dtest)
predtrain = bst.predict(dtrain)
print(pearsonr(np.array(yp_train), predtrain))
print(pearsonr(np.array(yp_test), pred))


# first time training - continued
for kk in range(29):
    bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3, xgb_model = bst)
    pred = bst.predict(dtest)
    predtrain = bst.predict(dtrain)
print(pearsonr(np.array(yp_train), predtrain))
# print(pearsonr(np.array(y_train.T)[0], predtrain))
print(pearsonr(np.array(yp_test), pred))
# print(pearsonr(np.array(y_test.T)[0], pred))

# concatenate predictions

g =  train[["TradingDay", "SecuCode"]][time_ind[train_days]:time_ind[train_days+delta_days]]
g["NextCatPred"] = pred
k = g.pivot_table(columns='SecuCode', index='TradingDay', values='NextCatPred').reset_index().sort_values("TradingDay")


In [None]:
# second traininng and so on

# change dataset
drift = 1
while drift * delta_days + train_days + delta_days < len(time_ind):
    X_train = X[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    X_test = X[time_ind[train_days + drift * delta_days]:time_ind[train_days+delta_days + drift * delta_days]]
    y_train = y[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    y_test = y[time_ind[train_days + drift * delta_days]:time_ind[train_days+delta_days + drift * delta_days]]
    yp_train = yp[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    yp_test = yp[time_ind[train_days + drift * delta_days]:time_ind[train_days+delta_days + drift * delta_days]]
    dtrain = xgb.DMatrix(data=X_train, label=y_train)
    dtest = xgb.DMatrix(data=X_test)
    ddev = xgb.DMatrix(data=X_test, label=y_test)
    bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3)

    for kk in range(29):
        bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3, xgb_model = bst)
#         bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3)
        pred = bst.predict(dtest)
        predtrain = bst.predict(dtrain)
    print(pearsonr(np.array(yp_train), predtrain))
    # print(pearsonr(np.array(y_train.T)[0], predtrain))
    print(pearsonr(np.array(yp_test), pred))
    # print(pearsonr(np.array(y_test.T)[0], pred))
    gt =  train[["TradingDay", "SecuCode"]][time_ind[train_days + drift * delta_days]:time_ind[train_days+delta_days + drift * delta_days]]
    gt["NextCatPred"] = pred
    kt = gt.pivot_table(columns='SecuCode', index='TradingDay', values='NextCatPred').reset_index().sort_values("TradingDay")
    k = pd.concat([k, kt],ignore_index=True)
    drift += 1

# the last dataset
if drift * delta_days + train_days + delta_days > len(time_ind):
    X_train = X[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    X_test = X[time_ind[train_days + drift * delta_days]:]
    y_train = y[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    y_test = y[time_ind[train_days + drift * delta_days]:]
    yp_train = yp[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    yp_test = yp[time_ind[train_days + drift * delta_days]:]
    dtrain = xgb.DMatrix(data=X_train, label=y_train)
    dtest = xgb.DMatrix(data=X_test)
    ddev = xgb.DMatrix(data=X_test, label=y_test)
    bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3)

    for kk in range(29):
        bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3, xgb_model = bst)
        pred = bst.predict(dtest)
        predtrain = bst.predict(dtrain)
    print(pearsonr(np.array(yp_train), predtrain))
    # print(pearsonr(np.array(y_train.T)[0], predtrain))
    print(pearsonr(np.array(yp_test), pred))
    # print(pearsonr(np.array(y_test.T)[0], pred))
    gt =  train[["TradingDay", "SecuCode"]][time_ind[train_days + drift * delta_days]:]
    gt["NextCatPred"] = pred
    kt = gt.pivot_table(columns='SecuCode', index='TradingDay', values='NextCatPred').reset_index().sort_values("TradingDay")
    k = pd.concat([k, kt],ignore_index=True)

k.set_index("TradingDay").to_csv("0805back_input_30_30.csv")