In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from keras.models import Model
from xgboost import XGBRegressor

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# load the data
dat=pd.read_table('dat4nn.txt',header=None,delim_whitespace=True)
dat=np.array(dat)
dat

array([[ 0.49237545, -1.31158826,  1.50796416, ...,  0.        ,
         0.        ,  1.        ],
       [ 0.63266203, -1.2111115 ,  1.48764176, ...,  0.        ,
         0.        ,  1.        ],
       [ 0.71568878, -1.19101615,  1.45715816, ...,  0.        ,
         0.        ,  1.        ],
       ...,
       [-0.48676759,  0.73813757, -0.28548783, ...,  0.        ,
         0.        ,  1.        ],
       [-0.47130744,  0.59747011, -0.24992363, ...,  0.        ,
         1.        ,  0.        ],
       [-0.45928288,  0.05489563, -0.20927882, ...,  0.        ,
         1.        ,  0.        ]])

In [3]:
# reshape the data to feed the model
def genarator(data,lookback,step,delay):
    n=len(data)
    N=n-lookback-delay+1 # size of trainable data
    X=np.zeros((N,lookback//step,data.shape[-1]))
    y=np.zeros((N,delay))

    for i,j in enumerate(np.arange(lookback+delay-1,n)):
        ind_y=np.arange(j-delay+1,j+1)
        ind_X=np.arange(j-delay-lookback+1,j-delay+1,step)
        y[i,:]=data[ind_y,1]
        X[i,:,:]=data[ind_X]
    
    #X=np.reshape(X,(N,-1))
    return X,y

In [4]:
# parameters setting
lookback=24*14
step=3
delay=24

In [5]:
X,y=genarator(dat,lookback,step,delay)
print('The shape of X:',X.shape)
print('The shape of y:',y.shape)

The shape of X: (11276, 112, 24)
The shape of y: (11276, 24)


In [6]:
# Load DNN_V2
from keras.models import load_model
model=load_model('md_DNN_V2.h5')
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 112, 24)      0                                            
__________________________________________________________________________________________________
flatten_6 (Flatten)             (None, 2688)         0           input_6[0][0]                    
__________________________________________________________________________________________________
dense_31 (Dense)                (None, 1024)         2753536     flatten_6[0][0]                  
__________________________________________________________________________________________________
batch_normalization_22 (BatchNo (None, 1024)         4096        dense_31[0][0]                   
__________________________________________________________________________________________________
dropout_26

In [8]:
# get intermediate layer output to create low-dimensional features
feature_model = Model(inputs=model.input,outputs=model.get_layer('batch_normalization_26').output)
#feature_model.summary()

In [9]:
# obtain new X and y
X_new=feature_model.predict(X)
X_new.shape

(11276, 32)

In [10]:
from sklearn.model_selection import train_test_split, GridSearchCV
X_train,X_test,y_train,y_test=train_test_split(X_new,y,test_size=.2,random_state=1)

# Tuning Step by Step
Concerning XGBoost could only regress with single response, it hereby requires 24 independent models to predict PM2.5 in this case. The most important part of using XGBoost lies on parameters tuning. To present the fine tuning progress, we only take the first hour into account here and the rest ones could be conducted by other rougher tuning strategies mentioned in Andrew Ng's deep learning course. Thanks to AARSHAY JAIN's helpful [blog](https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/) about XGBoost tuning!

## max_depth & min_child_weight

In [35]:
# cross validation
param1 = {
 'max_depth':np.arange(3,10,2),
 'min_child_weight':np.arange(1,6,2)
}
gs1 = GridSearchCV(estimator = XGBRegressor(learning_rate =0.1,n_estimators=1000, max_depth=5,
                                            subsample=0.8, colsample_bytree=0.8,
                                             min_child_weight=1, gamma=0,
                                           scale_pos_weight=1.0,seed=1), 
                            param_grid = param1, scoring='neg_mean_absolute_error', cv=3)
gs1.fit(X_train,y_train[:,-1])
gs1.grid_scores_, gs1.best_params_, gs1.best_score_



([mean: -0.43203, std: 0.00409, params: {'max_depth': 3, 'min_child_weight': 1},
  mean: -0.43196, std: 0.00364, params: {'max_depth': 3, 'min_child_weight': 3},
  mean: -0.43324, std: 0.00456, params: {'max_depth': 3, 'min_child_weight': 5},
  mean: -0.40465, std: 0.00426, params: {'max_depth': 5, 'min_child_weight': 1},
  mean: -0.40672, std: 0.00470, params: {'max_depth': 5, 'min_child_weight': 3},
  mean: -0.40725, std: 0.00667, params: {'max_depth': 5, 'min_child_weight': 5},
  mean: -0.39065, std: 0.00908, params: {'max_depth': 7, 'min_child_weight': 1},
  mean: -0.39131, std: 0.00843, params: {'max_depth': 7, 'min_child_weight': 3},
  mean: -0.39236, std: 0.00613, params: {'max_depth': 7, 'min_child_weight': 5},
  mean: -0.38702, std: 0.00553, params: {'max_depth': 9, 'min_child_weight': 1},
  mean: -0.38575, std: 0.00806, params: {'max_depth': 9, 'min_child_weight': 3},
  mean: -0.38448, std: 0.00654, params: {'max_depth': 9, 'min_child_weight': 5}],
 {'max_depth': 9, 'min_chil

In [38]:
param1 = {
 'max_depth':[8,9,10,11],
 'min_child_weight':[4,5,6]
}
gs1 = GridSearchCV(estimator = XGBRegressor(learning_rate =0.1,n_estimators=1000, max_depth=5,
                                            subsample=0.8, colsample_bytree=0.8,
                                             min_child_weight=1, gamma=0,
                                           scale_pos_weight=1.0,seed=1), 
                            param_grid = param1, scoring='neg_mean_absolute_error', cv=5,n_jobs=-1)
gs1.fit(X_train,y_train[:,-1])
gs1.grid_scores_, gs1.best_params_, gs1.best_score_



([mean: -0.37582, std: 0.00716, params: {'max_depth': 8, 'min_child_weight': 4},
  mean: -0.37408, std: 0.00660, params: {'max_depth': 8, 'min_child_weight': 5},
  mean: -0.37423, std: 0.00540, params: {'max_depth': 8, 'min_child_weight': 6},
  mean: -0.37047, std: 0.00949, params: {'max_depth': 9, 'min_child_weight': 4},
  mean: -0.37000, std: 0.00857, params: {'max_depth': 9, 'min_child_weight': 5},
  mean: -0.37048, std: 0.00840, params: {'max_depth': 9, 'min_child_weight': 6},
  mean: -0.36969, std: 0.00902, params: {'max_depth': 10, 'min_child_weight': 4},
  mean: -0.36890, std: 0.00923, params: {'max_depth': 10, 'min_child_weight': 5},
  mean: -0.36974, std: 0.00982, params: {'max_depth': 10, 'min_child_weight': 6},
  mean: -0.36825, std: 0.00966, params: {'max_depth': 11, 'min_child_weight': 4},
  mean: -0.36637, std: 0.00872, params: {'max_depth': 11, 'min_child_weight': 5},
  mean: -0.36633, std: 0.00606, params: {'max_depth': 11, 'min_child_weight': 6}],
 {'max_depth': 11, 'm

In [43]:
param1 = {
 'max_depth':[13,15,17],
 'min_child_weight':[7,8,9,10]
}
gs1 = GridSearchCV(estimator = XGBRegressor(learning_rate =0.1,n_estimators=500, max_depth=5,
                                            subsample=0.8, colsample_bytree=0.8,
                                             min_child_weight=1, gamma=0,
                                           scale_pos_weight=1.0,seed=1), 
                            param_grid = param1, scoring='neg_mean_absolute_error', cv=5,n_jobs=-1)
gs1.fit(X_train,y_train[:,-1])
gs1.grid_scores_, gs1.best_params_, gs1.best_score_



([mean: -0.36515, std: 0.00784, params: {'max_depth': 13, 'min_child_weight': 7},
  mean: -0.36604, std: 0.00611, params: {'max_depth': 13, 'min_child_weight': 8},
  mean: -0.36576, std: 0.00729, params: {'max_depth': 13, 'min_child_weight': 9},
  mean: -0.36665, std: 0.00713, params: {'max_depth': 13, 'min_child_weight': 10},
  mean: -0.36586, std: 0.00826, params: {'max_depth': 15, 'min_child_weight': 7},
  mean: -0.36642, std: 0.00770, params: {'max_depth': 15, 'min_child_weight': 8},
  mean: -0.36403, std: 0.00930, params: {'max_depth': 15, 'min_child_weight': 9},
  mean: -0.36422, std: 0.00674, params: {'max_depth': 15, 'min_child_weight': 10},
  mean: -0.36497, std: 0.00648, params: {'max_depth': 17, 'min_child_weight': 7},
  mean: -0.36695, std: 0.00653, params: {'max_depth': 17, 'min_child_weight': 8},
  mean: -0.36403, std: 0.00973, params: {'max_depth': 17, 'min_child_weight': 9},
  mean: -0.36414, std: 0.00597, params: {'max_depth': 17, 'min_child_weight': 10}],
 {'max_depth

In [44]:
param1 = {
 'max_depth':[14,15,16],
 'min_child_weight':[7,8,9,10]
}
gs1 = GridSearchCV(estimator = XGBRegressor(learning_rate =0.1,n_estimators=500, max_depth=5,
                                            subsample=0.8, colsample_bytree=0.8,
                                             min_child_weight=1, gamma=0,
                                           scale_pos_weight=1.0,seed=1), 
                            param_grid = param1, scoring='neg_mean_absolute_error', cv=5,n_jobs=-1)
gs1.fit(X_train,y_train[:,-1])
gs1.grid_scores_, gs1.best_params_, gs1.best_score_



([mean: -0.36469, std: 0.00764, params: {'max_depth': 14, 'min_child_weight': 7},
  mean: -0.36510, std: 0.00679, params: {'max_depth': 14, 'min_child_weight': 8},
  mean: -0.36530, std: 0.00742, params: {'max_depth': 14, 'min_child_weight': 9},
  mean: -0.36315, std: 0.00756, params: {'max_depth': 14, 'min_child_weight': 10},
  mean: -0.36586, std: 0.00826, params: {'max_depth': 15, 'min_child_weight': 7},
  mean: -0.36642, std: 0.00770, params: {'max_depth': 15, 'min_child_weight': 8},
  mean: -0.36403, std: 0.00930, params: {'max_depth': 15, 'min_child_weight': 9},
  mean: -0.36422, std: 0.00674, params: {'max_depth': 15, 'min_child_weight': 10},
  mean: -0.36505, std: 0.00817, params: {'max_depth': 16, 'min_child_weight': 7},
  mean: -0.36410, std: 0.00626, params: {'max_depth': 16, 'min_child_weight': 8},
  mean: -0.36190, std: 0.00710, params: {'max_depth': 16, 'min_child_weight': 9},
  mean: -0.36513, std: 0.00696, params: {'max_depth': 16, 'min_child_weight': 10}],
 {'max_depth

In [46]:
param1 = {
 'max_depth':[16,17,18],
 'min_child_weight':[8,9,10]
}
gs1 = GridSearchCV(estimator = XGBRegressor(learning_rate =0.1,n_estimators=500, max_depth=5,
                                            subsample=0.8, colsample_bytree=0.8,
                                             min_child_weight=1, gamma=0,
                                           scale_pos_weight=1.0,seed=1), 
                            param_grid = param1, scoring='neg_mean_absolute_error', cv=5,n_jobs=-1)
gs1.fit(X_train,y_train[:,-1])
gs1.grid_scores_, gs1.best_params_, gs1.best_score_



([mean: -0.36410, std: 0.00626, params: {'max_depth': 16, 'min_child_weight': 8},
  mean: -0.36190, std: 0.00710, params: {'max_depth': 16, 'min_child_weight': 9},
  mean: -0.36513, std: 0.00696, params: {'max_depth': 16, 'min_child_weight': 10},
  mean: -0.36695, std: 0.00653, params: {'max_depth': 17, 'min_child_weight': 8},
  mean: -0.36403, std: 0.00973, params: {'max_depth': 17, 'min_child_weight': 9},
  mean: -0.36414, std: 0.00597, params: {'max_depth': 17, 'min_child_weight': 10},
  mean: -0.36448, std: 0.00718, params: {'max_depth': 18, 'min_child_weight': 8},
  mean: -0.36576, std: 0.00829, params: {'max_depth': 18, 'min_child_weight': 9},
  mean: -0.36403, std: 0.00571, params: {'max_depth': 18, 'min_child_weight': 10}],
 {'max_depth': 16, 'min_child_weight': 9},
 -0.36189817183002176)

## gamma

In [47]:
param2 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
gs2 = GridSearchCV(estimator = XGBRegressor(learning_rate =0.1,n_estimators=500, max_depth=16,
                                            subsample=0.8, colsample_bytree=0.8,
                                             min_child_weight=9, gamma=0,
                                           scale_pos_weight=1.0,seed=1), 
                            param_grid = param2, scoring='neg_mean_absolute_error', cv=5,n_jobs=-1)
gs2.fit(X_train,y_train[:,-1])
gs2.grid_scores_, gs2.best_params_, gs2.best_score_



([mean: -0.36190, std: 0.00710, params: {'gamma': 0.0},
  mean: -0.37217, std: 0.00791, params: {'gamma': 0.1},
  mean: -0.37474, std: 0.00745, params: {'gamma': 0.2},
  mean: -0.37784, std: 0.00663, params: {'gamma': 0.3},
  mean: -0.37722, std: 0.00786, params: {'gamma': 0.4}],
 {'gamma': 0.0},
 -0.36189817183002176)

## subsample and colsample_bytree

In [48]:
param3 = {
 'subsample':[i/10.0 for i in range(6,10)],
 'colsample_bytree':[i/10.0 for i in range(6,10)]
}
gs3 = GridSearchCV(estimator = XGBRegressor(learning_rate =0.1,n_estimators=500, max_depth=16,
                                            subsample=0.8, colsample_bytree=0.8,
                                             min_child_weight=9, gamma=0,
                                           scale_pos_weight=1.0,seed=1), 
                            param_grid = param3, scoring='neg_mean_absolute_error', cv=5,n_jobs=-1)
gs3.fit(X_train,y_train[:,-1])
gs3.grid_scores_, gs3.best_params_, gs3.best_score_



([mean: -0.37561, std: 0.00608, params: {'colsample_bytree': 0.6, 'subsample': 0.6},
  mean: -0.37357, std: 0.00419, params: {'colsample_bytree': 0.6, 'subsample': 0.7},
  mean: -0.37301, std: 0.00609, params: {'colsample_bytree': 0.6, 'subsample': 0.8},
  mean: -0.37143, std: 0.00671, params: {'colsample_bytree': 0.6, 'subsample': 0.9},
  mean: -0.36945, std: 0.00863, params: {'colsample_bytree': 0.7, 'subsample': 0.6},
  mean: -0.36500, std: 0.00697, params: {'colsample_bytree': 0.7, 'subsample': 0.7},
  mean: -0.36611, std: 0.00619, params: {'colsample_bytree': 0.7, 'subsample': 0.8},
  mean: -0.36727, std: 0.00565, params: {'colsample_bytree': 0.7, 'subsample': 0.9},
  mean: -0.36809, std: 0.00610, params: {'colsample_bytree': 0.8, 'subsample': 0.6},
  mean: -0.36221, std: 0.00728, params: {'colsample_bytree': 0.8, 'subsample': 0.7},
  mean: -0.36190, std: 0.00710, params: {'colsample_bytree': 0.8, 'subsample': 0.8},
  mean: -0.36566, std: 0.00867, params: {'colsample_bytree': 0.8,

In [50]:
param3 = {
 'subsample':[i/100.0 for i in range(60,80,5)],
 'colsample_bytree':[i/100.0 for i in range(80,100,5)]
}
gs3 = GridSearchCV(estimator = XGBRegressor(learning_rate =0.1,n_estimators=500, max_depth=16,
                                            subsample=0.8, colsample_bytree=0.8,
                                             min_child_weight=9, gamma=0,
                                           scale_pos_weight=1.0,seed=1), 
                            param_grid = param3, scoring='neg_mean_absolute_error', cv=5,n_jobs=-1)
gs3.fit(X_train,y_train[:,-1])
gs3.grid_scores_, gs3.best_params_, gs3.best_score_



([mean: -0.36809, std: 0.00610, params: {'colsample_bytree': 0.8, 'subsample': 0.6},
  mean: -0.36909, std: 0.00562, params: {'colsample_bytree': 0.8, 'subsample': 0.65},
  mean: -0.36221, std: 0.00728, params: {'colsample_bytree': 0.8, 'subsample': 0.7},
  mean: -0.36505, std: 0.00621, params: {'colsample_bytree': 0.8, 'subsample': 0.75},
  mean: -0.36696, std: 0.00981, params: {'colsample_bytree': 0.85, 'subsample': 0.6},
  mean: -0.36515, std: 0.00653, params: {'colsample_bytree': 0.85, 'subsample': 0.65},
  mean: -0.36316, std: 0.00596, params: {'colsample_bytree': 0.85, 'subsample': 0.7},
  mean: -0.36344, std: 0.00624, params: {'colsample_bytree': 0.85, 'subsample': 0.75},
  mean: -0.36798, std: 0.00645, params: {'colsample_bytree': 0.9, 'subsample': 0.6},
  mean: -0.36811, std: 0.00972, params: {'colsample_bytree': 0.9, 'subsample': 0.65},
  mean: -0.36170, std: 0.00505, params: {'colsample_bytree': 0.9, 'subsample': 0.7},
  mean: -0.36292, std: 0.00772, params: {'colsample_bytr

## reg_alpha

In [51]:
param4 = {
 'reg_alpha':[1e-5, 1e-2, 0.1, 1, 100]
}
gs4 = GridSearchCV(estimator = XGBRegressor(learning_rate =0.1,n_estimators=500, max_depth=16,
                                            subsample=0.75, colsample_bytree=0.95,
                                             min_child_weight=9, gamma=0,
                                           scale_pos_weight=1.0,seed=1), 
                            param_grid = param4, scoring='neg_mean_absolute_error', cv=5,n_jobs=-1)
gs4.fit(X_train,y_train[:,-1])
gs4.grid_scores_, gs4.best_params_, gs4.best_score_



([mean: -0.36124, std: 0.00623, params: {'reg_alpha': 1e-05},
  mean: -0.36249, std: 0.00815, params: {'reg_alpha': 0.01},
  mean: -0.35785, std: 0.00671, params: {'reg_alpha': 0.1},
  mean: -0.35416, std: 0.00485, params: {'reg_alpha': 1},
  mean: -0.48667, std: 0.00763, params: {'reg_alpha': 100}],
 {'reg_alpha': 1},
 -0.3541596875244134)

In [53]:
param4 = {
 'reg_alpha':np.arange(0.1,1,0.1)
}
gs4 = GridSearchCV(estimator = XGBRegressor(learning_rate =0.1,n_estimators=500, max_depth=16,
                                            subsample=0.75, colsample_bytree=0.95,
                                             min_child_weight=9, gamma=0,
                                           scale_pos_weight=1.0,seed=1), 
                            param_grid = param4, scoring='neg_mean_absolute_error', cv=5,n_jobs=-1)
gs4.fit(X_train,y_train[:,-1])
gs4.grid_scores_, gs4.best_params_, gs4.best_score_



([mean: -0.35785, std: 0.00671, params: {'reg_alpha': 0.1},
  mean: -0.35608, std: 0.00595, params: {'reg_alpha': 0.2},
  mean: -0.35658, std: 0.00740, params: {'reg_alpha': 0.30000000000000004},
  mean: -0.35770, std: 0.00765, params: {'reg_alpha': 0.4},
  mean: -0.35740, std: 0.00646, params: {'reg_alpha': 0.5},
  mean: -0.35533, std: 0.00394, params: {'reg_alpha': 0.6},
  mean: -0.35575, std: 0.00637, params: {'reg_alpha': 0.7000000000000001},
  mean: -0.35547, std: 0.00542, params: {'reg_alpha': 0.8},
  mean: -0.35637, std: 0.00589, params: {'reg_alpha': 0.9}],
 {'reg_alpha': 0.6},
 -0.3553327457690302)

In [57]:
# XGBoost Regressor Before Tuning
xgbr=XGBRegressor(n_estimators=5000,learning_rate=0.01)
xgbr.fit(X_train,y_train[:,i],eval_set=[(X_train, y_train[:,i]), (X_test, y_test[:,i])],eval_metric='mae',verbose=200)

[0]	validation_0-mae:0.947146	validation_1-mae:0.96722
[200]	validation_0-mae:0.300269	validation_1-mae:0.309682
[400]	validation_0-mae:0.239953	validation_1-mae:0.254005
[600]	validation_0-mae:0.226547	validation_1-mae:0.243912
[800]	validation_0-mae:0.219776	validation_1-mae:0.239266
[1000]	validation_0-mae:0.214993	validation_1-mae:0.236289
[1200]	validation_0-mae:0.211244	validation_1-mae:0.234533
[1400]	validation_0-mae:0.208124	validation_1-mae:0.233419
[1600]	validation_0-mae:0.205619	validation_1-mae:0.232471
[1800]	validation_0-mae:0.203394	validation_1-mae:0.231736
[2000]	validation_0-mae:0.201197	validation_1-mae:0.231174
[2200]	validation_0-mae:0.19917	validation_1-mae:0.230526
[2400]	validation_0-mae:0.197189	validation_1-mae:0.229881
[2600]	validation_0-mae:0.195236	validation_1-mae:0.229266
[2800]	validation_0-mae:0.193453	validation_1-mae:0.228723
[3000]	validation_0-mae:0.191777	validation_1-mae:0.228247
[3200]	validation_0-mae:0.190166	validation_1-mae:0.227756
[3400]

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.01, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=5000,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

## Comparisons

In [58]:
# XGBoost Regressor After Tuning
xgbr=XGBRegressor(learning_rate =0.01,n_estimators=5000, max_depth=16,
                                            subsample=0.75, colsample_bytree=0.95,
                                             min_child_weight=9, gamma=0,
                                           scale_pos_weight=1.0,seed=1,reg_alpha=.6)
xgbr.fit(X_train,y_train[:,i],eval_set=[(X_train, y_train[:,i]), (X_test, y_test[:,i])],eval_metric='mae',verbose=200)

[0]	validation_0-mae:0.946778	validation_1-mae:0.967051
[200]	validation_0-mae:0.214673	validation_1-mae:0.276302
[400]	validation_0-mae:0.113089	validation_1-mae:0.212929
[600]	validation_0-mae:0.081269	validation_1-mae:0.202086
[800]	validation_0-mae:0.064445	validation_1-mae:0.198018
[1000]	validation_0-mae:0.053391	validation_1-mae:0.195952
[1200]	validation_0-mae:0.045655	validation_1-mae:0.194646
[1400]	validation_0-mae:0.040203	validation_1-mae:0.193945
[1600]	validation_0-mae:0.036124	validation_1-mae:0.193427
[1800]	validation_0-mae:0.033011	validation_1-mae:0.192997
[2000]	validation_0-mae:0.030543	validation_1-mae:0.192713
[2200]	validation_0-mae:0.028579	validation_1-mae:0.192478
[2400]	validation_0-mae:0.026927	validation_1-mae:0.1923
[2600]	validation_0-mae:0.025561	validation_1-mae:0.192129
[2800]	validation_0-mae:0.02444	validation_1-mae:0.191974
[3000]	validation_0-mae:0.023455	validation_1-mae:0.191853
[3200]	validation_0-mae:0.022596	validation_1-mae:0.191736
[3400]	

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.95, gamma=0, learning_rate=0.01,
       max_delta_step=0, max_depth=16, min_child_weight=9, missing=None,
       n_estimators=5000, n_jobs=1, nthread=None, objective='reg:linear',
       random_state=0, reg_alpha=0.6, reg_lambda=1, scale_pos_weight=1.0,
       seed=1, silent=True, subsample=0.75)