# Setup

Make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os


# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
%config InlineBackend.figure_format = 'retina' # for retina screens
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

Configure notebook to display all results in cell

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Revert to the last line of output only
# InteractiveShell.ast_node_interactivity = "last_expr"

In [9]:
class LabelCountEncoder(object):
    def __init__(self):
        self.count_dict = {}
    
    def fit(self, column):
        # This gives you a dictionary with level as the key and counts as the value
        count = column.value_counts().to_dict()
        # We want to rank the key by its value and use the rank as the new value
        self.count_dict = {key[0]: rank+1 for rank, key in enumerate(sorted(count.items(), key=lambda x: x[1]))}
    
    def transform(self, column):
        # If a category only appears in the test set, we will assign the value to zero.
        missing = 0
        return column.apply(lambda x : self.count_dict.get(x, missing))
    
    def fit_transform(self, column):
        self.fit(column)
        return self.transform(column)

# Load Data

In [3]:
import pandas as pd

houses_train = pd.read_csv('../Data/features_houses_train.csv')
houses_test = pd.read_csv('../Data/features_houses_test.csv')

#houses_train = pd.read_csv('../Data/encoded_houses_train.csv')
#houses_test = pd.read_csv('../Data/encoded_houses_test.csv')

In [4]:
print("houses_train dimensions: {}".format(houses_train.shape))
print("houses_test dimensions: {}".format(houses_test.shape))

houses_train dimensions: (1460, 84)
houses_test dimensions: (1459, 84)


In [5]:
pd.set_option("display.max_columns", 400)
houses_train.head(3)

Unnamed: 0.1,Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Alley,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,X1stFlrSF,X2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,X3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,SaleType,SaleCondition,Garage.interaction,new.old,Room.size,full.YrSold,QuarterSold,TotalBath,BathToBed,AvgHouseLivArea.ratio,SalePrice
0,1,60,RL,65.0,8450,,Reg,Lvl,Inside,Gtl,CollgCr,Norm,1Fam,2Story,4,6,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,196,4,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,5,Y,SBrkr,856,854,0,1710,1,0,2,1,3,1,4,8,Typ,0,,Attchd,2003,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,,,,0,WD,Normal,2.TA,2.163265,214,200802,1,3.5,1.157598,0.932072,208500.0
1,2,20,RL,80.0,9600,,Reg,Lvl,FR2,Gtl,Veenker,Feedr,1Fam,1Story,5,3,1976,1976,Gable,CompShg,MetalSd,MetalSd,,0,3,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,5,Y,SBrkr,1262,0,0,1262,0,1,2,0,3,1,3,6,Typ,1,TA,Attchd,1976,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,,,,0,WD,Normal,2.TA,-2.583333,210,200705,2,2.5,0.728713,0.742753,181500.0
2,3,60,RL,68.0,11250,,IR1,Lvl,Inside,Gtl,CollgCr,Norm,1Fam,2Story,4,6,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,162,4,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,5,Y,SBrkr,920,866,0,1786,1,0,2,1,3,1,4,6,Typ,1,TA,Attchd,2001,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,,,,0,WD,Normal,2.TA,0.163265,298,200809,3,3.5,1.157598,0.973497,223500.0


In [6]:
houses_train.fillna(0, inplace = True)
houses_test.fillna(0, inplace = True)

In [13]:
#houses_train.isnull().sum()

In [26]:
#houses_train.info()
#houses_test.info()

Delete first column

In [7]:
houses_train.drop('Unnamed: 0', axis=1, inplace=True, errors='raise')
houses_test.drop('Unnamed: 0', axis=1, inplace=True, errors='raise')

Label count encode data frame:

In [10]:
for c in houses_train.columns:
    if houses_train[c].dtype == 'object':
        lce = LabelCountEncoder()
        houses_train[c] = lce.fit_transform(houses_train[c])

# Run xgboost

### Create private training & test set

In [11]:
from sklearn.model_selection import train_test_split

seed = 10
test_ratio = 0.2

X = houses_train.loc[:, houses_train.columns != "SalePrice"].values # convert to np.array
y = houses_train.loc[:, houses_train.columns == "SalePrice"].values

# Take log of SalePrice
y = np.log(y + 1).ravel() # convert to 1D array for model fit (xxx, )


In [12]:
X_pr_train, X_pr_test, y_pr_train, y_pr_test = train_test_split(X, y, test_size=test_ratio, random_state=seed)

In [13]:
print(len(X_pr_train), "train +", len(X_pr_test), "test")

1168 train + 292 test


### Fit Model

In [14]:
from xgboost import XGBRegressor

xgb_clf = XGBRegressor(max_depth=3, 
                        learning_rate=0.05, 
                        n_estimators=1000, # Number of boosted trees to fit
                        silent=False, # print messages while running 
                        objective='reg:linear', 
                        booster='gbtree', # Specify which booster to use: gbtree, gblinear or dart
                        #for dart see http://xgboost.readthedocs.io/en/latest/tutorials/dart.html 
                        n_jobs=-1, # Number of parallel threads used to run xgboost. (replaces nthread)
                        gamma=0,  # Minimum loss reduction required to make a further partition on a leaf node of the tree.
                        min_child_weight=1, # Minimum sum of instance weight(hessian) needed in a child
                        max_delta_step=0, # Maximum delta step we allow each tree’s weight estimation to be
                        subsample=1, # Subsample ratio of the training instance
                        colsample_bytree=1, # Subsample ratio of columns when constructing each tree
                        colsample_bylevel=1, # Subsample ratio of columns for each split, in each level
                        reg_alpha=0, # L1 regularization term on weights
                        reg_lambda=1, # L2 regularization term on weights
                        scale_pos_weight=1, # Balancing of positive and negative weights
                        base_score=0.5, # The initial prediction score of all instances, global bias
                        random_state=743, 
                        missing=None) # Value in the data which needs to be present as a missing value. If None, defaults to np.nan





TypeError: __init__() got an unexpected keyword argument 'booster'

In [847]:
xgb_clf.fit(X_pr_train, y_pr_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.05, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=1000,
       n_jobs=-1, nthread=None, objective='reg:linear', random_state=743,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=False, subsample=1)

In [848]:
# make predictions for test data

y_pr_pred = xgb_clf.predict(X_pr_test)

Evaluate predictions

In [849]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_pr_test, y_pr_pred)
rmse = np.sqrt(mse)
rmse


0.12373572496003182

In [850]:
y_pr_test[1:5]
y_pr_pred[1:5]

array([ 12.1428719 ,  11.8277435 ,  12.01067193,  12.64109979])

array([ 12.22154427,  11.80571461,  11.84539032,  12.69584465], dtype=float32)

Save model to file

In [851]:
from sklearn.externals import joblib # More memory efficient than pickle for large numpy arrays

joblib.dump(xgb_clf, './Models/xgboost_model.pkl') 

['./Models/xgboost_model.pkl']

To load a model:

In [852]:
# xgb_clf_loaded = joblib.load('./Models/xgboost_model.pkl') 

### Model Tuning

In [853]:
from sklearn.model_selection import GridSearchCV

xgb_params = {'max_depth': 3,
              'learning_rate': 0.1, 
              'n_estimators': 100, 
              'objective': 'reg:linear'}

# param_grid = {'max_depth': [3], 
#               'learning_rate': [0.05], 
#               'n_estimators': [2000], # Number of boosted trees to fit
#               'objective': ['reg:linear'], 
#               'booster': ['gbtree'], # Specify which booster to use: gbtree, gblinear or dart
#               'n_jobs': [-1], # Number of parallel threads used to run xgboost. (replaces nthread)
#               'gamma': [0],  # Minimum loss reduction required to make a further partition on a leaf node of the tree.
#               'min_child_weight': [1], # Minimum sum of instance weight(hessian) needed in a child
#               'max_delta_step': [0], # Maximum delta step we allow each tree’s weight estimation to be
#               'subsample': [1], # Subsample ratio of the training instance
#               'colsample_bytree': [0.5], # Subsample ratio of columns when constructing each tree
#               'colsample_bylevel': [0.4], # Subsample ratio of columns for each split, in each level
#               'reg_alpha': [0], # L1 regularization term on weights
#               'reg_lambda': [1], # L2 regularization term on weights
#               'scale_pos_weight': [1], # Balancing of positive and negative weights
#               'base_score': [0.5], # The initial prediction score of all instances, global bias
#               'silent': [True],
#               'random_state': [10]}

param_grid = {'max_depth': [3], 
              'learning_rate': [0.05, 0.07, 0.09], 
              'n_estimators': [1500, 1800, 2000], # Number of boosted trees to fit
              'objective': ['reg:linear'], 
              'booster': ['gbtree'], # Specify which booster to use: gbtree, gblinear or dart
              'n_jobs': [-1], # Number of parallel threads used to run xgboost. (replaces nthread)
              'gamma': [0, 0.5],  # Minimum loss reduction required to make a further partition on a leaf node of the tree.
              'min_child_weight': [1], # Minimum sum of instance weight(hessian) needed in a child
              'max_delta_step': [0], # Maximum delta step we allow each tree’s weight estimation to be
              'subsample': [1], # Subsample ratio of the training instance
              'colsample_bytree': [0.5, 0.6, 0.7], # Subsample ratio of columns when constructing each tree
              'colsample_bylevel': [0.2, 0.3, 0.4], # Subsample ratio of columns for each split, in each level
              'reg_alpha': [0], # L1 regularization term on weights
              'reg_lambda': [1], # L2 regularization term on weights
              'scale_pos_weight': [1], # Balancing of positive and negative weights
              'base_score': [0.5], # The initial prediction score of all instances, global bias
              'silent': [True],
              'random_state': [10]}

optimized_xgb_clf = GridSearchCV(XGBRegressor(**xgb_params), # scikit-learn estimator interface 
                                 param_grid = param_grid, # Dictionary with parameters names (string) as keys
                                 scoring="neg_mean_squared_error", # controls what metric they apply to the estimators evaluated
                                 n_jobs=-1, # If True, the data is assumed to be identically distributed across the folds,
                                 iid=True, 
                                 refit=True, # Refit an estimator using the best found parameters (best_estimator_)
                                 cv=3, #integer, to specify the number of folds in a (Stratified)KFold. None -> default 3-fold cross validation
                                 verbose=10, # the higher, the more messages
                                 pre_dispatch="2*n_jobs", # number of jobs that get dispatched during parallel execution
                                 error_score="raise", 
                                 return_train_score=False) #If False, the cv_results_ attribute will not include training scores

Inspect the grid

In [None]:
optimized_xgb_clf

GridSearchCV(cv=3, error_score='raise',
       estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'max_depth': [3], 'learning_rate': [0.05, 0.07, 0.09], 'n_estimators': [1500, 1800, 2000], 'objective': ['reg:linear'], 'booster': ['gbtree'], 'n_jobs': [-1], 'gamma': [0, 0.5], 'min_child_weight': [1], 'max_delta_step': [0], 'subsample': [1], 'colsample_bytree': [0.5, 0.6, 0.7], 'colsample_bylevel': [0.2, 0.3, 0.4], 'reg_alpha': [0], 'reg_lambda': [1], 'scale_pos_weight': [1], 'base_score': [0.5], 'silent': [True], 'random_state': [10]},
       pre_dispatch='2*n_jobs', refit=True, return_tr

Run grid tuning

In [None]:
optimized_xgb_clf.fit(X_pr_train, y_pr_train)

Fitting 3 folds for each of 162 candidates, totalling 486 fits
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV] base_score=0.5, boos

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    7.9s


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.015882879480013438, total=   4.6s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.01426418179182491, total=   4.5s
[

[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   12.1s


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.015397253074609015, total=   4.7s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.014896910615445118, total=   3.5s


[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:   20.5s


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.014487917429806195, total=   5.0s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.014882573504946824, total=   4.9s


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:   25.5s


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.02612714322820539, total=   3.7s
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.016601781227944486, total=   5.1s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   37.3s


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.02612714322820539, total=   4.8s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.02075288379834782, total=   4

[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   45.4s


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.023272999934123635, total=   4.7s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.021072572662089138, total=  

[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:   58.0s


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.5, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.01987726869683416, total=   4.8s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.015968985168971123, total=   4.0s

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:  1.2min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.014346245178755703, total=   4.3s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.014673124230664615, total=   4.4s


[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:  1.4min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.015125185992708332, total=   4.8s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.0169127740362413, total=   5.2s


[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 


[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:  1.7min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.02507966059810059, total=   4.0s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.020988968856872178, total=   

[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.020189125563916617, total=   5.6s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, 

[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:  2.0min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.02603826327905968, total=   5.5s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.6, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.020189125563916617, total=   5.

[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.013752267206574515, total=   6.1s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.014344940450357663, total=   6.5s


[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:  2.4min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.016369492049837786, total=   7.7s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.013777016883146849, total=   7.3s


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.014650454728789055, total=   7.7s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.015600611415723933, total=   7.7

[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:  2.8min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.019779085786995873, total=   4.9s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.024816630466459585, total=  

[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.025384513686572227, total=   6.7s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, 

[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  3.2min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.020649016330320024, total=   4.6s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.2, colsample_bytree=0.7, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.01997056853198773, total=   

[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.013497981762664436, total=   7.7s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.01538927451485356, total=   8.3s
[

[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:  3.8min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.014146340857155916, total=   6.5s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.015523815002504862, total=   6.9s


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.015612516617147743, total=   7.2s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.013912039448150141, total=   7.4

[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  4.3min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.025257406062620932, total=   7.5s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.02091060848861221, total=   

[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.020544278738009736, total=   6.8s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, 

[Parallel(n_jobs=-1)]: Done 213 tasks      | elapsed:  5.0min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.025314913072055453, total=   6.9s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.5, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.020770288041086566, total=   6

[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.014335971563446882, total=   5.9s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.013991828521211653, total=   5.8s


[Parallel(n_jobs=-1)]: Done 234 tasks      | elapsed:  5.6min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.01607022490137085, total=   5.9s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.013486314897466848, total=   5.9s
[

[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.020768675540582395, total=   7.0s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0.5, learning_rate=0.05, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.019890639713141055, total=  

[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed:  6.2min


[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.020099730604727317, total=   7.2s
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0.5, learning_rate=0.09, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV]  base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0.5, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=2000, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1, score=-0.026082645420525526, total=  

The best combination of parameters is:

In [None]:
optimized_xgb_clf.best_params_

In [None]:
## The best score is
optimized_xgb_clf.best_score_ # that's the training score so not meaningful

In [None]:
# optimized_xgb_clf.cv_results_

In [None]:
# make predictions for test data

y_pr_pred = optimized_xgb_clf.predict(X_pr_test)

Evaluate predictions

In [None]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_pr_test, y_pr_pred)
rmse = np.sqrt(mse)
rmse


In [None]:
y_pr_test[1:5]
y_pr_pred[1:5]