In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pylab as plt
from scipy.stats import pearsonr

from hyperopt import fmin, tpe, hp, partial


def find_time_ind(df):
    day_ind = [0]
    day_temp = df["TradingDay"][0]
    for i in range(1, len(df)):
        if df["TradingDay"][i] != day_temp:
            day_ind.append(i)
            day_temp = df["TradingDay"][i]
    return day_ind

In [2]:
# input cell
# train = pd.read_csv('0805rolling5.csv')
train = pd.read_csv('08053before_rolling.csv')
# train = pd.read_csv('08052acou_and_factors.csv')
# x_column = ["factor"+str(i)+'_'+str(j) for i in range(0,17) for j in range(5)]
x_column = ["factor"+str(i) for i in range(0,17)]
# for i in range(12):
#     for j in range(5):
#         x_column.append("10M" + str(i) +'_'+str(j))
for i in range(12):
    x_column.append("10M" + str(i))

train = train.sort_values(by = ["TradingDay", "SecuCode"], ascending = [True, True])
train.reset_index(drop=True, inplace=True)
X = train[x_column]
target = ['NextReturnCate']
y = train[target]
y += 1
yp = train["NextReturn"]
time_ind = find_time_ind(train)

In [3]:
# initializing
train_days = 775
delta_days = -776
X_train = X[time_ind[0]:time_ind[train_days]]
X_test = X[time_ind[train_days]:time_ind[train_days+delta_days]]
y_train = y[time_ind[0]:time_ind[train_days]]
y_test = y[time_ind[train_days]:time_ind[train_days+delta_days]]
yp_train = yp[time_ind[0]:time_ind[train_days]]
yp_test = yp[time_ind[train_days]:time_ind[train_days+delta_days]]
dtrain = xgb.DMatrix(data=X_train, label=y_train)
dtest = xgb.DMatrix(data=X_test)
ddev = xgb.DMatrix(data=X_test, label=y_test)

evallist = [(ddev, 'eval'), (dtrain, 'train')]


In [4]:
# 自定义hyperopt的参数空间
space = {"max_depth": hp.randint("max_depth", 25),
         "n_estimators": hp.randint("n_estimators", 30),
         'learning_rate': hp.uniform('learning_rate', 1e-3, 5e-1),
         "subsample": hp.randint("subsample", 5),
         "min_child_weight": hp.randint("min_child_weight", 6),
         "alpha": hp.uniform("alpha", 0, 1),
         "lambda": hp.uniform("lambda", 0, 1)
         }

def argsDict_tranform(argsDict, isPrint=False):
    argsDict["max_depth"] = argsDict["max_depth"] + 5    # from 5 to 30
    argsDict['n_estimators'] = argsDict['n_estimators'] + 10  # from 10 to 40
    argsDict["learning_rate"] = argsDict["learning_rate"] * 0.02 + 0.001   # from 0.001 to 0.01
    argsDict["subsample"] = argsDict["subsample"] * 0.1 + 0.5  # from 0.5 to 1
    argsDict["min_child_weight"] = argsDict["min_child_weight"] + 1 # from 1 to 6
    argsDict["alpha"] = argsDict["alpha"] * 20
    argsDict["lambda"] = argsDict["lambda"] * 50
    if isPrint:
        print(argsDict)
    else:
        pass

    return argsDict

In [5]:
from sklearn.metrics import mean_squared_error, zero_one_loss

def xgboost_factory(argsDict):
    argsDict = argsDict_tranform(argsDict)
    
    params = {'nthread': -1,  # 进程数
              'max_depth': argsDict['max_depth'],  # 最大深度
              'n_estimators': argsDict['n_estimators'],  # 树的数量
              'eta': argsDict['learning_rate'],  # 学习率
              'subsample': argsDict['subsample'],  # 采样数
              'min_child_weight': argsDict['min_child_weight'],  # 终点节点最小样本占比的和
              'objective': 'reg:squarederror',
              'silent': 0,  # 是否显示
              'gamma': 0,  # 是否后剪枝
              'colsample_bytree': 0.7,  # 样本列采样
              'alpha': argsDict['alpha'],  # L1 正则化
              'lambda': argsDict['lambda'],  # L2 正则化
              'scale_pos_weight': 0,  # 取值>0时,在数据不平衡时有助于收敛
              'seed': 100,  # 随机种子
              'tree_method':'exact'
              }

    xrf = xgb.train(params, dtrain, params['n_estimators'], evallist,early_stopping_rounds=100)

    return get_tranformer_score(xrf)


def get_tranformer_score(tranformer):
    
    xrf = tranformer
    pred = xrf.predict(ddev, ntree_limit=xrf.best_ntree_limit)
  
    return -pearsonr(np.array(yp_test), pred)[0]

In [6]:
# 开始使用hyperopt进行自动调参
algo = partial(tpe.suggest, n_startup_jobs=1)
best = fmin(xgboost_factory, space, algo=algo, max_evals=100, pass_expr_memo_ctrl=None)
store = best
print("the best parameter before transformation is:" + best)
print("the best parameter after transformation is:" + argsDict_tranform(best))


[0]	eval-rmse:0.956279	train-rmse:0.932308           

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.

[1]	eval-rmse:0.956145	train-rmse:0.932158           

[2]	eval-rmse:0.955883	train-rmse:0.931955           

[3]	eval-rmse:0.955631	train-rmse:0.931761           

[4]	eval-rmse:0.955124	train-rmse:0.931316           

[5]	eval-rmse:0.95475	train-rmse:0.930898            

[6]	eval-rmse:0.954449	train-rmse:0.930655           

[7]	eval-rmse:0.954199	train-rmse:0.930452           

[8]	eval-rmse:0.954004	train-rmse:0.930189           

[9]	eval-rmse:0.953845	train-rmse:0.929862           

[10]	eval-rmse:0.953717	train-rmse:0.929499          

[11]	eval-rmse:0.953532	train-rmse:0.929097          

[12]	eval-rmse:0.953265	train-rmse:0.928752          

[13]	eval-rmse:0.953028	train-rmse:0.928432          

[14]	eval-rmse:0.952717	train-rmse:0.928113          

[15]	eval-rmse:0.952477	tra

[22]	eval-rmse:0.90745	train-rmse:0.879195                                         

[23]	eval-rmse:0.905828	train-rmse:0.877319                                        

[24]	eval-rmse:0.904193	train-rmse:0.875472                                        

[25]	eval-rmse:0.902475	train-rmse:0.873735                                        

[26]	eval-rmse:0.900919	train-rmse:0.871972                                        

[27]	eval-rmse:0.899502	train-rmse:0.870267                                        

[28]	eval-rmse:0.897922	train-rmse:0.868582                                        

[29]	eval-rmse:0.896487	train-rmse:0.866952                                        

[0]	eval-rmse:0.953651	train-rmse:0.929681                                         

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                        

[1]	eval-rmse:0.95095	train-rmse:0.926546                      

[14]	eval-rmse:0.925634	train-rmse:0.889295                                        

[15]	eval-rmse:0.92379	train-rmse:0.886836                                         

[16]	eval-rmse:0.921939	train-rmse:0.884399                                        

[17]	eval-rmse:0.920237	train-rmse:0.881871                                        

[18]	eval-rmse:0.918652	train-rmse:0.879373                                        

[19]	eval-rmse:0.916798	train-rmse:0.876919                                        

[20]	eval-rmse:0.915151	train-rmse:0.874572                                        

[21]	eval-rmse:0.913539	train-rmse:0.872188                                        

[22]	eval-rmse:0.912014	train-rmse:0.869761                                        

[23]	eval-rmse:0.910413	train-rmse:0.867382                                        

[24]	eval-rmse:0.908889	train-rmse:0.865071                                        

[25]	eval-rmse:0.907259	train-rmse:0.862838                      

[8]	eval-rmse:0.940482	train-rmse:0.911229                                         

[9]	eval-rmse:0.938684	train-rmse:0.909012                                         

[10]	eval-rmse:0.937166	train-rmse:0.9069                                          

[11]	eval-rmse:0.935419	train-rmse:0.904802                                        

[12]	eval-rmse:0.93396	train-rmse:0.902653                                         

[13]	eval-rmse:0.932458	train-rmse:0.900492                                        

[14]	eval-rmse:0.930935	train-rmse:0.898421                                        

[15]	eval-rmse:0.929358	train-rmse:0.896411                                        

[16]	eval-rmse:0.927872	train-rmse:0.894456                                        

[17]	eval-rmse:0.926322	train-rmse:0.892466                                        

[18]	eval-rmse:0.924872	train-rmse:0.890429                                        

[19]	eval-rmse:0.923149	train-rmse:0.888404                      

[30]	eval-rmse:0.91086	train-rmse:0.876513                                          

[31]	eval-rmse:0.909636	train-rmse:0.875004                                         

[32]	eval-rmse:0.90856	train-rmse:0.87353                                           

[33]	eval-rmse:0.907414	train-rmse:0.872087                                         

[0]	eval-rmse:0.954117	train-rmse:0.930414                                          

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                          

[1]	eval-rmse:0.952296	train-rmse:0.927796                                          

[2]	eval-rmse:0.950253	train-rmse:0.925694                                          

[3]	eval-rmse:0.948292	train-rmse:0.923453                                          

[4]	eval-rmse:0.946414	train-rmse:0.921269                                          

[5]	eval-rmse:0.944635	train-rmse:0.919208          

[3]	eval-rmse:0.954316	train-rmse:0.929923                                         

[4]	eval-rmse:0.953886	train-rmse:0.929045                                         

[5]	eval-rmse:0.953361	train-rmse:0.928228                                         

[6]	eval-rmse:0.952882	train-rmse:0.927472                                         

[7]	eval-rmse:0.952413	train-rmse:0.926843                                         

[8]	eval-rmse:0.951968	train-rmse:0.926272                                         

[9]	eval-rmse:0.951439	train-rmse:0.925694                                         

[10]	eval-rmse:0.950993	train-rmse:0.925085                                        

[11]	eval-rmse:0.950505	train-rmse:0.924428                                        

[12]	eval-rmse:0.950009	train-rmse:0.923717                                        

[13]	eval-rmse:0.949584	train-rmse:0.922993                                        

[14]	eval-rmse:0.949095	train-rmse:0.922274                      

[23]	eval-rmse:0.922832	train-rmse:0.87758                                          

[24]	eval-rmse:0.921786	train-rmse:0.875502                                         

[25]	eval-rmse:0.920558	train-rmse:0.873442                                         

[26]	eval-rmse:0.91951	train-rmse:0.871374                                          

[27]	eval-rmse:0.918358	train-rmse:0.869321                                         

[28]	eval-rmse:0.917251	train-rmse:0.867225                                         

[29]	eval-rmse:0.916173	train-rmse:0.865201                                         

[30]	eval-rmse:0.915125	train-rmse:0.863133                                         

[31]	eval-rmse:0.913954	train-rmse:0.861103                                         

[32]	eval-rmse:0.912806	train-rmse:0.859145                                         

[33]	eval-rmse:0.911791	train-rmse:0.857179                                         

[0]	eval-rmse:0.954092	train-rmse:0.929811            

[11]	eval-rmse:0.931818	train-rmse:0.899703                                         

[12]	eval-rmse:0.930116	train-rmse:0.897184                                         

[13]	eval-rmse:0.928335	train-rmse:0.894714                                         

[14]	eval-rmse:0.926507	train-rmse:0.892303                                         

[15]	eval-rmse:0.924725	train-rmse:0.889987                                         

[16]	eval-rmse:0.923024	train-rmse:0.887709                                         

[17]	eval-rmse:0.921282	train-rmse:0.88534                                          

[18]	eval-rmse:0.91972	train-rmse:0.882936                                          

[19]	eval-rmse:0.917834	train-rmse:0.880617                                         

[0]	eval-rmse:0.954272	train-rmse:0.929669                                          

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 r

Will train until train-rmse hasn't improved in 100 rounds.                            

[1]	eval-rmse:0.952517	train-rmse:0.928161                                            

[2]	eval-rmse:0.950686	train-rmse:0.926224                                            

[3]	eval-rmse:0.948603	train-rmse:0.924294                                            

[4]	eval-rmse:0.94681	train-rmse:0.922092                                             

[5]	eval-rmse:0.945161	train-rmse:0.92031                                             

[6]	eval-rmse:0.943354	train-rmse:0.91841                                             

[7]	eval-rmse:0.941641	train-rmse:0.916345                                            

[8]	eval-rmse:0.939982	train-rmse:0.914584                                            

[9]	eval-rmse:0.938174	train-rmse:0.912853                                            

[10]	eval-rmse:0.936487	train-rmse:0.911098                                           

[11]	eval-rmse:0.934891	train-rm

[1]	eval-rmse:0.953374	train-rmse:0.928886                                            

[2]	eval-rmse:0.951821	train-rmse:0.927096                                            

[3]	eval-rmse:0.950299	train-rmse:0.925675                                            

[4]	eval-rmse:0.948867	train-rmse:0.923935                                            

[5]	eval-rmse:0.947493	train-rmse:0.922189                                            

[6]	eval-rmse:0.946118	train-rmse:0.920609                                            

[7]	eval-rmse:0.944601	train-rmse:0.919046                                            

[8]	eval-rmse:0.943252	train-rmse:0.917445                                            

[9]	eval-rmse:0.941752	train-rmse:0.91585                                             

[10]	eval-rmse:0.940443	train-rmse:0.914357                                           

[11]	eval-rmse:0.939094	train-rmse:0.912913                                           

[12]	eval-rmse:0.937733	train-rm

[12]	eval-rmse:0.936148	train-rmse:0.909204                                          

[13]	eval-rmse:0.934793	train-rmse:0.907576                                          

[14]	eval-rmse:0.93326	train-rmse:0.905955                                           

[15]	eval-rmse:0.931845	train-rmse:0.904361                                          

[16]	eval-rmse:0.930519	train-rmse:0.902789                                          

[17]	eval-rmse:0.929126	train-rmse:0.901166                                          

[18]	eval-rmse:0.92785	train-rmse:0.89955                                            

[19]	eval-rmse:0.926433	train-rmse:0.897984                                          

[20]	eval-rmse:0.925042	train-rmse:0.896475                                          

[21]	eval-rmse:0.923691	train-rmse:0.894969                                          

[22]	eval-rmse:0.922543	train-rmse:0.893445                                          

[23]	eval-rmse:0.921156	train-rmse:0.891959

[32]	eval-rmse:0.942082	train-rmse:0.916091                                          

[33]	eval-rmse:0.941678	train-rmse:0.915616                                          

[34]	eval-rmse:0.941282	train-rmse:0.915134                                          

[0]	eval-rmse:0.955803	train-rmse:0.931895                                           

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                           

[1]	eval-rmse:0.954687	train-rmse:0.930862                                           

[2]	eval-rmse:0.953878	train-rmse:0.929942                                           

[3]	eval-rmse:0.953176	train-rmse:0.928806                                           

[4]	eval-rmse:0.952447	train-rmse:0.927828                                           

[5]	eval-rmse:0.951711	train-rmse:0.926962                                           

[6]	eval-rmse:0.950956	train-rmse:0.926141

[14]	eval-rmse:0.94459	train-rmse:0.918855                                            

[15]	eval-rmse:0.943775	train-rmse:0.917992                                           

[16]	eval-rmse:0.943077	train-rmse:0.917108                                           

[17]	eval-rmse:0.942319	train-rmse:0.916206                                           

[18]	eval-rmse:0.941585	train-rmse:0.915369                                           

[19]	eval-rmse:0.940856	train-rmse:0.914552                                           

[20]	eval-rmse:0.940082	train-rmse:0.913746                                           

[21]	eval-rmse:0.939357	train-rmse:0.912965                                           

[0]	eval-rmse:0.955999	train-rmse:0.932079                                            

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                           

[1]	eval-rmse:0.955286	train-rmse

[16]	eval-rmse:0.940162	train-rmse:0.911007                                           

[17]	eval-rmse:0.939262	train-rmse:0.90981                                            

[18]	eval-rmse:0.938392	train-rmse:0.908635                                           

[19]	eval-rmse:0.937424	train-rmse:0.907481                                           

[20]	eval-rmse:0.936485	train-rmse:0.90637                                            

[21]	eval-rmse:0.935632	train-rmse:0.905205                                           

[22]	eval-rmse:0.93471	train-rmse:0.904013                                            

[23]	eval-rmse:0.933828	train-rmse:0.902826                                           

[24]	eval-rmse:0.932981	train-rmse:0.901654                                           

[25]	eval-rmse:0.932128	train-rmse:0.900541                                           

[26]	eval-rmse:0.931321	train-rmse:0.899404                                           

[27]	eval-rmse:0.930505	train-rm

[9]	eval-rmse:0.953718	train-rmse:0.929771                                            

[10]	eval-rmse:0.953585	train-rmse:0.929346                                           

[11]	eval-rmse:0.95336	train-rmse:0.928973                                            

[12]	eval-rmse:0.953086	train-rmse:0.928617                                           

[13]	eval-rmse:0.952797	train-rmse:0.928303                                           

[14]	eval-rmse:0.952466	train-rmse:0.927991                                           

[15]	eval-rmse:0.952191	train-rmse:0.927681                                           

[16]	eval-rmse:0.951943	train-rmse:0.927388                                           

[17]	eval-rmse:0.951705	train-rmse:0.927147                                           

[18]	eval-rmse:0.951511	train-rmse:0.92694                                            

[19]	eval-rmse:0.951263	train-rmse:0.926748                                           

[20]	eval-rmse:0.951055	train-rm

[10]	eval-rmse:0.951092	train-rmse:0.925551                                           

[11]	eval-rmse:0.950577	train-rmse:0.924998                                           

[12]	eval-rmse:0.950133	train-rmse:0.92438                                            

[13]	eval-rmse:0.94965	train-rmse:0.923724                                            

[14]	eval-rmse:0.949188	train-rmse:0.923044                                           

[15]	eval-rmse:0.948725	train-rmse:0.922391                                           

[16]	eval-rmse:0.948275	train-rmse:0.921783                                           

[17]	eval-rmse:0.947843	train-rmse:0.92117                                            

[18]	eval-rmse:0.947392	train-rmse:0.920579                                           

[19]	eval-rmse:0.946897	train-rmse:0.92001                                            

[20]	eval-rmse:0.946417	train-rmse:0.919452                                           

[21]	eval-rmse:0.945955	train-rm

[15]	eval-rmse:0.933322	train-rmse:0.90271                                            

[16]	eval-rmse:0.931929	train-rmse:0.901046                                           

[17]	eval-rmse:0.93059	train-rmse:0.899341                                            

[18]	eval-rmse:0.929333	train-rmse:0.89761                                            

[19]	eval-rmse:0.927905	train-rmse:0.895931                                           

[20]	eval-rmse:0.926606	train-rmse:0.894322                                           

[21]	eval-rmse:0.925379	train-rmse:0.892696                                           

[22]	eval-rmse:0.924228	train-rmse:0.891041                                           

[23]	eval-rmse:0.923028	train-rmse:0.889437                                           

[24]	eval-rmse:0.921844	train-rmse:0.887842                                           

[25]	eval-rmse:0.920606	train-rmse:0.886237                                           

[26]	eval-rmse:0.919576	train-rm

[2]	eval-rmse:0.954579	train-rmse:0.930349                                            

[3]	eval-rmse:0.954046	train-rmse:0.929333                                            

[4]	eval-rmse:0.953511	train-rmse:0.92845                                             

[5]	eval-rmse:0.952949	train-rmse:0.927583                                            

[6]	eval-rmse:0.952376	train-rmse:0.926871                                            

[7]	eval-rmse:0.951799	train-rmse:0.926168                                            

[8]	eval-rmse:0.951265	train-rmse:0.92548                                             

[9]	eval-rmse:0.95074	train-rmse:0.924723                                             

[10]	eval-rmse:0.950219	train-rmse:0.923945                                           

[11]	eval-rmse:0.949619	train-rmse:0.923137                                           

[12]	eval-rmse:0.949035	train-rmse:0.92233                                            

[13]	eval-rmse:0.948524	train-rm

[7]	eval-rmse:0.937668	train-rmse:0.910937                                            

[8]	eval-rmse:0.935601	train-rmse:0.908586                                            

[9]	eval-rmse:0.933318	train-rmse:0.906183                                            

[10]	eval-rmse:0.931333	train-rmse:0.903807                                           

[11]	eval-rmse:0.929272	train-rmse:0.901441                                           

[12]	eval-rmse:0.92724	train-rmse:0.899137                                            

[13]	eval-rmse:0.925355	train-rmse:0.896851                                           

[14]	eval-rmse:0.923299	train-rmse:0.89468                                            

[15]	eval-rmse:0.921415	train-rmse:0.892556                                           

[16]	eval-rmse:0.919513	train-rmse:0.890476                                           

[17]	eval-rmse:0.917763	train-rmse:0.888337                                           

[18]	eval-rmse:0.916053	train-rm

[3]	eval-rmse:0.949187	train-rmse:0.92458                                             

[4]	eval-rmse:0.947636	train-rmse:0.922499                                            

[5]	eval-rmse:0.946044	train-rmse:0.920614                                            

[6]	eval-rmse:0.944377	train-rmse:0.918777                                            

[7]	eval-rmse:0.942669	train-rmse:0.91687                                             

[8]	eval-rmse:0.94111	train-rmse:0.915111                                             

[9]	eval-rmse:0.939421	train-rmse:0.913414                                            

[10]	eval-rmse:0.93791	train-rmse:0.911737                                            

[11]	eval-rmse:0.936272	train-rmse:0.910024                                           

[12]	eval-rmse:0.934696	train-rmse:0.90837                                            

[13]	eval-rmse:0.933192	train-rmse:0.906657                                           

[14]	eval-rmse:0.931695	train-rm

[22]	eval-rmse:0.94073	train-rmse:0.914579                                         

[23]	eval-rmse:0.94011	train-rmse:0.913858                                         

[24]	eval-rmse:0.939487	train-rmse:0.913133                                        

[25]	eval-rmse:0.938839	train-rmse:0.912433                                        

[26]	eval-rmse:0.938425	train-rmse:0.911724                                        

[27]	eval-rmse:0.937803	train-rmse:0.911037                                        

[28]	eval-rmse:0.937167	train-rmse:0.910329                                        

[29]	eval-rmse:0.936533	train-rmse:0.909628                                        

[30]	eval-rmse:0.93588	train-rmse:0.908921                                         

[31]	eval-rmse:0.935217	train-rmse:0.908218                                        

[32]	eval-rmse:0.934581	train-rmse:0.907519                                        

[33]	eval-rmse:0.933922	train-rmse:0.90684                       

[6]	eval-rmse:0.946213	train-rmse:0.911958                                            

[7]	eval-rmse:0.944828	train-rmse:0.909095                                            

[8]	eval-rmse:0.943454	train-rmse:0.906328                                            

[9]	eval-rmse:0.942149	train-rmse:0.903452                                            

[10]	eval-rmse:0.940915	train-rmse:0.900729                                           

[11]	eval-rmse:0.939459	train-rmse:0.898229                                           

[12]	eval-rmse:0.938093	train-rmse:0.895483                                           

[13]	eval-rmse:0.936714	train-rmse:0.892751                                           

[14]	eval-rmse:0.935467	train-rmse:0.89007                                            

[15]	eval-rmse:0.934183	train-rmse:0.88755                                            

[16]	eval-rmse:0.932992	train-rmse:0.884934                                           

[17]	eval-rmse:0.931805	train-rm

[12]	eval-rmse:0.935188	train-rmse:0.890109                                           

[13]	eval-rmse:0.933807	train-rmse:0.886995                                           

[14]	eval-rmse:0.932381	train-rmse:0.884014                                           

[15]	eval-rmse:0.930986	train-rmse:0.88109                                            

[16]	eval-rmse:0.929537	train-rmse:0.878153                                           

[17]	eval-rmse:0.92823	train-rmse:0.875133                                            

[18]	eval-rmse:0.926964	train-rmse:0.872025                                           

[0]	eval-rmse:0.955986	train-rmse:0.931957                                            

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping. 


Will train until train-rmse hasn't improved in 100 rounds.                            

[1]	eval-rmse:0.95526	train-rmse:0.931016                                             

[2]	eval-rmse:0.954698	train-rm

[15]	eval-rmse:0.941432	train-rmse:0.916034                                         

[16]	eval-rmse:0.940611	train-rmse:0.91512                                          

[17]	eval-rmse:0.939782	train-rmse:0.914184                                         

[18]	eval-rmse:0.938925	train-rmse:0.913267                                         

[19]	eval-rmse:0.938015	train-rmse:0.912365                                         

[20]	eval-rmse:0.937151	train-rmse:0.911456                                         

[0]	eval-rmse:0.955218	train-rmse:0.931216                                          

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                         

[1]	eval-rmse:0.954387	train-rmse:0.929594                                         

[2]	eval-rmse:0.953351	train-rmse:0.927755                                         

[3]	eval-rmse:0.952433	train-rmse:0.926402             

[11]	eval-rmse:0.952386	train-rmse:0.927819                                        

[12]	eval-rmse:0.952071	train-rmse:0.927438                                        

[13]	eval-rmse:0.951805	train-rmse:0.927103                                        

[14]	eval-rmse:0.951464	train-rmse:0.926798                                        

[15]	eval-rmse:0.951149	train-rmse:0.926485                                        

[16]	eval-rmse:0.950852	train-rmse:0.92619                                         

[17]	eval-rmse:0.950527	train-rmse:0.925891                                        

[18]	eval-rmse:0.950219	train-rmse:0.925585                                        

[19]	eval-rmse:0.949876	train-rmse:0.925249                                        

[20]	eval-rmse:0.949538	train-rmse:0.924908                                        

[0]	eval-rmse:0.954894	train-rmse:0.931006                                         

Multiple eval metrics have been passed: 'train-rmse' will be used

[29]	eval-rmse:0.924096	train-rmse:0.886123                                         

[30]	eval-rmse:0.92318	train-rmse:0.884683                                          

[31]	eval-rmse:0.92221	train-rmse:0.883285                                          

[32]	eval-rmse:0.921332	train-rmse:0.881921                                         

[33]	eval-rmse:0.920422	train-rmse:0.880528                                         

[0]	eval-rmse:0.956154	train-rmse:0.932147                                          

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                          

[1]	eval-rmse:0.955671	train-rmse:0.931774                                          

[2]	eval-rmse:0.954882	train-rmse:0.930918                                          

[3]	eval-rmse:0.95434	train-rmse:0.930461                                           

[4]	eval-rmse:0.954023	train-rmse:0.929922          

[31]	eval-rmse:0.91764	train-rmse:0.890668                                          

[32]	eval-rmse:0.916572	train-rmse:0.88956                                          

[33]	eval-rmse:0.915499	train-rmse:0.888468                                         

[34]	eval-rmse:0.914612	train-rmse:0.887402                                         

[35]	eval-rmse:0.913666	train-rmse:0.88633                                          

[36]	eval-rmse:0.912712	train-rmse:0.88524                                          

[37]	eval-rmse:0.911775	train-rmse:0.884216                                         

[38]	eval-rmse:0.910707	train-rmse:0.883205                                         

[0]	eval-rmse:0.955914	train-rmse:0.931959                                          

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                          

[1]	eval-rmse:0.954915	train-rmse:0.931001          

[17]	eval-rmse:0.938689	train-rmse:0.90334                                          

[18]	eval-rmse:0.937819	train-rmse:0.901749                                         

[19]	eval-rmse:0.936815	train-rmse:0.900207                                         

[0]	eval-rmse:0.954711	train-rmse:0.930874                                          

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                          

[1]	eval-rmse:0.953411	train-rmse:0.928882                                          

[2]	eval-rmse:0.951773	train-rmse:0.927095                                          

[3]	eval-rmse:0.950313	train-rmse:0.925593                                          

[4]	eval-rmse:0.948895	train-rmse:0.923908                                          

[5]	eval-rmse:0.947452	train-rmse:0.922094                                          

[6]	eval-rmse:0.946019	train-rmse:0.920478          

[7]	eval-rmse:0.943533	train-rmse:0.917033                                          

[8]	eval-rmse:0.942054	train-rmse:0.915268                                          

[9]	eval-rmse:0.940511	train-rmse:0.913507                                          

[10]	eval-rmse:0.939107	train-rmse:0.911863                                         

[11]	eval-rmse:0.937559	train-rmse:0.910164                                         

[12]	eval-rmse:0.936152	train-rmse:0.908496                                         

[13]	eval-rmse:0.93457	train-rmse:0.906837                                          

[14]	eval-rmse:0.933142	train-rmse:0.905154                                         

[15]	eval-rmse:0.931717	train-rmse:0.903478                                         

[16]	eval-rmse:0.930327	train-rmse:0.901864                                         

[17]	eval-rmse:0.928905	train-rmse:0.900232                                         

[18]	eval-rmse:0.927604	train-rmse:0.898553           

[26]	eval-rmse:0.925951	train-rmse:0.900084                                         

[0]	eval-rmse:0.955869	train-rmse:0.931897                                          

Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.


Will train until train-rmse hasn't improved in 100 rounds.                         

[1]	eval-rmse:0.954793	train-rmse:0.930813                                         

[2]	eval-rmse:0.954159	train-rmse:0.929913                                         

[3]	eval-rmse:0.953488	train-rmse:0.928748                                         

[4]	eval-rmse:0.952799	train-rmse:0.927763                                         

[5]	eval-rmse:0.952143	train-rmse:0.926911                                         

[6]	eval-rmse:0.951466	train-rmse:0.926124                                         

[7]	eval-rmse:0.950763	train-rmse:0.925314                                         

[8]	eval-rmse:0.950101	train-rmse:0.924448                  

TypeError: must be str, not dict

In [7]:
best

{'alpha': 0.024323280644326623,
 'lambda': 0.0686095965507202,
 'learning_rate': 0.12013231877185954,
 'max_depth': 6,
 'min_child_weight': 2,
 'n_estimators': 25,
 'subsample': 2}

In [8]:
argsDict_tranform(best)

{'alpha': 0.48646561288653245,
 'lambda': 3.43047982753601,
 'learning_rate': 0.003402646375437191,
 'max_depth': 11,
 'min_child_weight': 3,
 'n_estimators': 35,
 'subsample': 0.7}

In [47]:
best

{'learning_rate': 0.415602004895926,
 'max_depth': 2,
 'min_child_weight': 5,
 'n_estimators': 14,
 'subsample': 2}

In [49]:
 xgboost_factory(argsDict_tranform(best))

[0]	eval-rmse:0.887142	train-rmse:0.91346
Multiple eval metrics have been passed: 'train-rmse' will be used for early stopping.

Will train until train-rmse hasn't improved in 100 rounds.
[1]	eval-rmse:0.876897	train-rmse:0.894919
[2]	eval-rmse:0.868053	train-rmse:0.878668
[3]	eval-rmse:0.859192	train-rmse:0.86251
[4]	eval-rmse:0.851483	train-rmse:0.848265
[5]	eval-rmse:0.844992	train-rmse:0.833192
[6]	eval-rmse:0.838924	train-rmse:0.820392
[7]	eval-rmse:0.833219	train-rmse:0.807377
[8]	eval-rmse:0.828818	train-rmse:0.794724
[9]	eval-rmse:0.824214	train-rmse:0.783454
[10]	eval-rmse:0.820525	train-rmse:0.772966
[11]	eval-rmse:0.817106	train-rmse:0.762479
[12]	eval-rmse:0.814711	train-rmse:0.752645
[13]	eval-rmse:0.812286	train-rmse:0.743479
[14]	eval-rmse:0.810634	train-rmse:0.734102
[15]	eval-rmse:0.808537	train-rmse:0.72533
[16]	eval-rmse:0.807054	train-rmse:0.716354
[17]	eval-rmse:0.805628	train-rmse:0.707871
[18]	eval-rmse:0.804132	train-rmse:0.699963
[19]	eval-rmse:0.803122	train-r

-0.06243747878596423

In [31]:
bst=xgb.train(params,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3)


[0]	train-rmse:0.926187	test-rmse:0.896756


In [36]:
xrf = xgb.train(params, dtrain,num_boost_round=1,early_stopping_rounds=None)

In [None]:
# first time training

bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3)
pred = bst.predict(dtest)
predtrain = bst.predict(dtrain)
print(pearsonr(np.array(yp_train), predtrain))
print(pearsonr(np.array(yp_test), pred))


# first time training - continued
for kk in range(29):
    bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3, xgb_model = bst)
    pred = bst.predict(dtest)
    predtrain = bst.predict(dtrain)
print(pearsonr(np.array(yp_train), predtrain))
# print(pearsonr(np.array(y_train.T)[0], predtrain))
print(pearsonr(np.array(yp_test), pred))
# print(pearsonr(np.array(y_test.T)[0], pred))

# concatenate predictions

g =  train[["TradingDay", "SecuCode"]][time_ind[train_days]:time_ind[train_days+delta_days]]
g["NextCatPred"] = pred
k = g.pivot_table(columns='SecuCode', index='TradingDay', values='NextCatPred').reset_index().sort_values("TradingDay")


In [None]:
# second traininng and so on

# change dataset
drift = 1
while drift * delta_days + train_days + delta_days < len(time_ind):
    X_train = X[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    X_test = X[time_ind[train_days + drift * delta_days]:time_ind[train_days+delta_days + drift * delta_days]]
    y_train = y[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    y_test = y[time_ind[train_days + drift * delta_days]:time_ind[train_days+delta_days + drift * delta_days]]
    yp_train = yp[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    yp_test = yp[time_ind[train_days + drift * delta_days]:time_ind[train_days+delta_days + drift * delta_days]]
    dtrain = xgb.DMatrix(data=X_train, label=y_train)
    dtest = xgb.DMatrix(data=X_test)
    ddev = xgb.DMatrix(data=X_test, label=y_test)
    bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3)

    for kk in range(29):
        bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3, xgb_model = bst)
#         bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3)
        pred = bst.predict(dtest)
        predtrain = bst.predict(dtrain)
    print(pearsonr(np.array(yp_train), predtrain))
    # print(pearsonr(np.array(y_train.T)[0], predtrain))
    print(pearsonr(np.array(yp_test), pred))
    # print(pearsonr(np.array(y_test.T)[0], pred))
    gt =  train[["TradingDay", "SecuCode"]][time_ind[train_days + drift * delta_days]:time_ind[train_days+delta_days + drift * delta_days]]
    gt["NextCatPred"] = pred
    kt = gt.pivot_table(columns='SecuCode', index='TradingDay', values='NextCatPred').reset_index().sort_values("TradingDay")
    k = pd.concat([k, kt],ignore_index=True)
    drift += 1

# the last dataset
if drift * delta_days + train_days + delta_days > len(time_ind):
    X_train = X[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    X_test = X[time_ind[train_days + drift * delta_days]:]
    y_train = y[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    y_test = y[time_ind[train_days + drift * delta_days]:]
    yp_train = yp[time_ind[drift * delta_days]:time_ind[train_days + drift * delta_days]]
    yp_test = yp[time_ind[train_days + drift * delta_days]:]
    dtrain = xgb.DMatrix(data=X_train, label=y_train)
    dtest = xgb.DMatrix(data=X_test)
    ddev = xgb.DMatrix(data=X_test, label=y_test)
    bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3)

    for kk in range(29):
        bst=xgb.train(params=xgb_param,dtrain=dtrain,num_boost_round=1,early_stopping_rounds=None,evals=[(dtrain,'train'),(ddev,'test')],verbose_eval=3, xgb_model = bst)
        pred = bst.predict(dtest)
        predtrain = bst.predict(dtrain)
    print(pearsonr(np.array(yp_train), predtrain))
    # print(pearsonr(np.array(y_train.T)[0], predtrain))
    print(pearsonr(np.array(yp_test), pred))
    # print(pearsonr(np.array(y_test.T)[0], pred))
    gt =  train[["TradingDay", "SecuCode"]][time_ind[train_days + drift * delta_days]:]
    gt["NextCatPred"] = pred
    kt = gt.pivot_table(columns='SecuCode', index='TradingDay', values='NextCatPred').reset_index().sort_values("TradingDay")
    k = pd.concat([k, kt],ignore_index=True)

k.set_index("TradingDay").to_csv("0805back_input_30_30.csv")