In [11]:
import pandas as pd
import numpy as np
import xgboost as xgb
import catboost as cb
import lightgbm as lgbm
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, TimeSeriesSplit
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import math
import scipy.stats as st
from skopt import BayesSearchCV
from skopt.space import Integer

In [2]:
print(cb.__version__)
print(lgbm.__version__)
print(xgb.__version__)

0.13.1
2.2.3
0.81


In [2]:
train_rossman = pd.read_csv('/Users/cmcnamara/Downloads/Kaggle Rossman/train.csv')
store_rossman = pd.read_csv('/Users/cmcnamara/Downloads/Kaggle Rossman/store.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
train_rossman = pd.merge(train_rossman, store_rossman, on='Store', how='left').fillna(0)

In [4]:
train_rossman['StateHoliday'] = train_rossman['StateHoliday'].astype('category').cat.codes
train_rossman['StoreType'] = train_rossman['StoreType'].astype('category').cat.codes
train_rossman['Assortment'] = train_rossman['Assortment'].astype('category').cat.codes
train_rossman['Promo2SinceWeek'] = train_rossman['Promo2SinceWeek'].astype(int)
train_rossman['Promo2SinceYear'] = train_rossman['Promo2SinceYear'].astype(int)
train_rossman['CompeititonDistance'] = train_rossman['CompetitionDistance'].astype(int)
train_rossman['CompetitionOpenSinceMonth'] = train_rossman['CompetitionOpenSinceMonth'].astype(int)
train_rossman['CompetitionOpenSinceYear'] = train_rossman["CompetitionOpenSinceYear"].astype(int)
train_rossman['PromoInterval'] = train_rossman['PromoInterval'].astype('category').cat.codes
train_rossman['Date'] = pd.to_datetime(train_rossman['Date'])

In [5]:
test_rossman = train_rossman.iloc[np.where(train_rossman.Date.dt.year == 2015)[0], :]
train_rossman = train_rossman.iloc[np.where(train_rossman.Date.dt.year == 2014)[0], :]

In [6]:
train_rossman = train_rossman.iloc[::-1]
test_rossman = test_rossman.iloc[::-1]

Note that there are a few problems when comparing the different algorithms (xgboost, lightgbm, catboost).

* The first is that they take the input data in differently. While  xgboost and lightgbm have wrappers to handle pandas dataframes directly, catboost does not. Therefore, I will use their native data wrapper for each algorithm.
* The second is that they do not grow the trees the same.

Nonetheless, I will try to ensure that each algorithm is as close to one another as possible for maximum accuracy."

In [7]:
train_y = train_rossman['Sales']
train_x = train_rossman.drop(['Sales', 'Date'], axis=1)
params = {"max_depth": [5,10, 15],
         "learning_rate": [0.01, 0.05, 0.1],
         "n_estimators": [100, 250]}
params_cb = {'depth': [5, 10, 15],
            'learning_rate': [0.01, 0.05, 0.1],
            'iterations': [100, 250]}
test_y = test_rossman['Sales']
test_x = test_rossman.drop(['Sales', 'Date'], axis=1)
n_splits = 3
max_train_size = len(train_x) // (n_splits+1)

In [9]:
train_x.shape

(373855, 17)

In [10]:
test_x.shape

(236380, 17)

# Grid Search

### xgboost

In [11]:
%%time
model_xg = xgb.XGBRegressor()
grid_search_xg = GridSearchCV(model_xg, param_grid=params, cv=TimeSeriesSplit(n_splits=n_splits, max_train_size=max_train_size), verbose=1, n_jobs=1)
grid_search_xg.fit(train_x, train_y)

Fitting 3 folds for each of 18 candidates, totalling 54 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  54 out of  54 | elapsed: 42.3min finished


CPU times: user 46min 36s, sys: 8.97 s, total: 46min 45s
Wall time: 46min 57s


### lightgbm

In [12]:
%%time
model_lg = lgbm.LGBMRegressor()
grid_search_lg = GridSearchCV(model_lg, param_grid=params, cv=TimeSeriesSplit(n_splits=n_splits, max_train_size=max_train_size), verbose=1, n_jobs=1)
grid_search_lg.fit(train_x, train_y)

Fitting 3 folds for each of 18 candidates, totalling 54 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  54 out of  54 | elapsed:  1.9min finished


CPU times: user 6min 37s, sys: 5.92 s, total: 6min 43s
Wall time: 1min 55s


### catboost

In [13]:
%%time
model_cb = cb.CatBoostRegressor(verbose=False)
grid_search_cb = GridSearchCV(model_cb, param_grid=params_cb, cv=TimeSeriesSplit(n_splits=n_splits, max_train_size=max_train_size), verbose=1, n_jobs=1)
grid_search_cb.fit(train_x, train_y)

Fitting 3 folds for each of 18 candidates, totalling 54 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  54 out of  54 | elapsed: 38.0min finished


CPU times: user 57min 19s, sys: 41min 58s, total: 1h 39min 17s
Wall time: 38min 4s


## Predictions

In [14]:
predict_y_xg = grid_search_xg.predict(test_x)
predict_y_lg = grid_search_lg.predict(test_x)
predict_y_cb = grid_search_cb.predict(test_x)

print('XGBoost R2 Score: {}'.format(r2_score(test_y, predict_y_xg)))
print('LightGBM R2 Score: {}'.format(r2_score(test_y, predict_y_lg)))
print('CatBoost R2 Score: {}\n'.format(r2_score(test_y, predict_y_cb)))

print('XGBoost RMSE Score: {}'.format(math.sqrt(mean_squared_error(test_y, predict_y_xg))))
print('LightGBM RMSE Score: {}'.format(math.sqrt(mean_squared_error(test_y, predict_y_lg))))
print('CatBoost RMSE Score: {}\n'.format(math.sqrt(mean_squared_error(test_y, predict_y_cb))))

print('XGBoost MAE Score: {}'.format(mean_absolute_error(test_y, predict_y_xg)))
print('LightGBM MAE Score: {}'.format(mean_absolute_error(test_y, predict_y_lg)))
print('CatBoost MAE Score: {}\n'.format(mean_absolute_error(test_y, predict_y_cb)))

XGBoost R2 Score: 0.9844574591682844
LightGBM R2 Score: 0.9768122982657019
CatBoost R2 Score: 0.38984165613214683

XGBoost RMSE Score: 478.7553058503369
LightGBM RMSE Score: 584.7650988998363
CatBoost RMSE Score: 2999.6726883567626

XGBoost MAE Score: 304.70115830091817
LightGBM MAE Score: 397.43611843205383
CatBoost MAE Score: 2275.531732568766



# Randomized Search

In [15]:
params = {'max_depth': st.randint(3, 16),
          'learning_rate': st.uniform(0.01, 0.4),
          'n_estimators': st.randint(100, 500)
         }

params_cb = {'depth': st.randint(3,16),
            'learning_rate': st.uniform(0.01, 0.4),
            'iterations': st.randint(100, 500)
            }

### xgboost

In [16]:
%%time
model_xg = xgb.XGBRegressor()
rand_search_xg = RandomizedSearchCV(model_xg, params, n_jobs=1, cv=TimeSeriesSplit(n_splits=n_splits, max_train_size=max_train_size), scoring='neg_mean_squared_error', n_iter=5, verbose=1)
rand_search_xg.fit(train_x, train_y)

Fitting 3 folds for each of 5 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed: 18.1min finished


CPU times: user 20min 49s, sys: 1.45 s, total: 20min 50s
Wall time: 20min 51s


### lightgbm

In [17]:
%%time
model_lg = lgbm.LGBMRegressor()
rand_search_lg = RandomizedSearchCV(model_lg, params, n_jobs=1, cv=TimeSeriesSplit(n_splits=n_splits, max_train_size=max_train_size), scoring='neg_mean_squared_error', n_iter=5, verbose=1)
rand_search_lg.fit(train_x, train_y)

Fitting 3 folds for each of 5 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   34.6s finished


CPU times: user 2min 37s, sys: 1.15 s, total: 2min 38s
Wall time: 40.4 s


### catboost

In [18]:
%%time
model_cb = cb.CatBoostRegressor(verbose=False)
rand_search_cb = RandomizedSearchCV(model_cb, params_cb, n_jobs=1, cv=TimeSeriesSplit(n_splits=n_splits, max_train_size=max_train_size), scoring='neg_mean_squared_error', n_iter=5, verbose=1)
rand_search_cb.fit(train_x, train_y)

Fitting 3 folds for each of 5 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:  9.7min finished


CPU times: user 17min 52s, sys: 9min 35s, total: 27min 27s
Wall time: 9min 59s


In [19]:
predict_y_xg = rand_search_xg.predict(test_x)
predict_y_lg = rand_search_lg.predict(test_x)
predict_y_cb = rand_search_cb.predict(test_x)

print('XGBoost R2 Score: {}'.format(r2_score(test_y, predict_y_xg)))
print('LightGBM R2 Score: {}'.format(r2_score(test_y, predict_y_lg)))
print('CatBoost R2 Score: {}\n'.format(r2_score(test_y, predict_y_cb)))

print('XGBoost RMSE Score: {}'.format(math.sqrt(mean_squared_error(test_y, predict_y_xg))))
print('LightGBM RMSE Score: {}'.format(math.sqrt(mean_squared_error(test_y, predict_y_lg))))
print('CatBoost RMSE Score: {}\n'.format(math.sqrt(mean_squared_error(test_y, predict_y_cb))))

print('XGBoost MAE Score: {}'.format(mean_absolute_error(test_y, predict_y_xg)))
print('LightGBM MAE Score: {}'.format(mean_absolute_error(test_y, predict_y_lg)))
print('CatBoost MAE Score: {}\n'.format(mean_absolute_error(test_y, predict_y_cb)))

XGBoost R2 Score: 0.9816412715286165
LightGBM R2 Score: 0.982828346510317
CatBoost R2 Score: 0.9798560758457027

XGBoost RMSE Score: 520.324039162105
LightGBM RMSE Score: 503.2208763464184
CatBoost RMSE Score: 545.0352993850663

XGBoost MAE Score: 352.0900901997669
LightGBM MAE Score: 334.43611951993023
CatBoost MAE Score: 365.1795048309185



# Bayesian Randomized Search

In [19]:
params = {'max_depth': Integer(3, 16),
          'learning_rate': (0.01, 0.4, 'uniform'),
          'n_estimators': Integer(100, 5000)
         }

params_cb = {'depth': Integer(3, 16),
            'learning_rate': (0.01, 0.4, 'uniform'),
            'iterations': Integer(100, 5000)
            }

## xgboost

In [15]:
%%time
model_xg = xgb.XGBRegressor()
bayes_search_xg = BayesSearchCV(model_xg, params, n_jobs=1, cv=TimeSeriesSplit(n_splits=n_splits, max_train_size=max_train_size), scoring='neg_mean_squared_error', n_iter=5, verbose=1)
bayes_search_xg.fit(train_x, train_y)

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  2.7min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 24.8min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  7.0min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  2.4min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed: 18.3min finished


CPU times: user 1h 4min, sys: 23.2 s, total: 1h 4min 23s
Wall time: 1h 6min 11s


## lightgbm

In [16]:
%%time
model_lg = lgbm.LGBMRegressor()
bayes_search_lg = BayesSearchCV(model_lg, params, n_jobs=1, cv=TimeSeriesSplit(n_splits=n_splits, max_train_size=max_train_size), scoring='neg_mean_squared_error', n_iter=5, verbose=1)
bayes_search_lg.fit(train_x, train_y)

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   38.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   36.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   57.7s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    3.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   13.0s finished


CPU times: user 10min 41s, sys: 5.88 s, total: 10min 47s
Wall time: 3min 6s


## catboost

In [21]:
%%time
model_cb = cb.CatBoostRegressor(verbose=1000)
bayes_search_cb = BayesSearchCV(model_cb, params_cb, n_jobs=1, cv=TimeSeriesSplit(n_splits=n_splits, max_train_size=max_train_size), scoring='neg_mean_squared_error', n_iter=5, verbose=0)
bayes_search_cb.fit(train_x, train_y)

0:	learn: 4856.1540562	total: 19.6ms	remaining: 34.2s
1000:	learn: 296.4766185	total: 14.5s	remaining: 10.8s
1744:	learn: 278.7339886	total: 26.8s	remaining: 0us
0:	learn: 5113.6497597	total: 33.3ms	remaining: 58s
1000:	learn: 340.6133635	total: 15.9s	remaining: 11.8s
1744:	learn: 320.5290109	total: 29.1s	remaining: 0us
0:	learn: 5037.4275472	total: 21.2ms	remaining: 36.9s
1000:	learn: 316.6158176	total: 15.5s	remaining: 11.5s
1744:	learn: 293.3222780	total: 26.7s	remaining: 0us
0:	learn: 6092.4622595	total: 110ms	remaining: 7m 27s
1000:	learn: 263.4372295	total: 1m 42s	remaining: 5m 11s
2000:	learn: 238.0136548	total: 3m 24s	remaining: 3m 30s
3000:	learn: 226.6386273	total: 5m 6s	remaining: 1m 47s
4000:	learn: 219.7331136	total: 6m 52s	remaining: 5.97s
4058:	learn: 219.2853617	total: 6m 58s	remaining: 0us
0:	learn: 6409.8125723	total: 102ms	remaining: 6m 53s
1000:	learn: 303.6218031	total: 1m 40s	remaining: 5m 7s
2000:	learn: 277.3465567	total: 3m 26s	remaining: 3m 32s
3000:	learn: 26

## Predictions

In [22]:
predict_y_xg = bayes_search_xg.predict(test_x)
predict_y_lg = bayes_search_lg.predict(test_x)
predict_y_cb = bayes_search_cb.predict(test_x)

print('XGBoost R2 Score: {}'.format(r2_score(test_y, predict_y_xg)))
print('LightGBM R2 Score: {}'.format(r2_score(test_y, predict_y_lg)))
print('CatBoost R2 Score: {}\n'.format(r2_score(test_y, predict_y_cb)))

print('XGBoost RMSE Score: {}'.format(math.sqrt(mean_squared_error(test_y, predict_y_xg))))
print('LightGBM RMSE Score: {}'.format(math.sqrt(mean_squared_error(test_y, predict_y_lg))))
print('CatBoost RMSE Score: {}\n'.format(math.sqrt(mean_squared_error(test_y, predict_y_cb))))

print('XGBoost MAE Score: {}'.format(mean_absolute_error(test_y, predict_y_xg)))
print('LightGBM MAE Score: {}'.format(mean_absolute_error(test_y, predict_y_lg)))
print('CatBoost MAE Score: {}\n'.format(mean_absolute_error(test_y, predict_y_cb)))

XGBoost R2 Score: 0.9848034829809743
LightGBM R2 Score: 0.9850149060972804
CatBoost R2 Score: 0.9837376678507641

XGBoost RMSE Score: 473.3960414434413
LightGBM RMSE Score: 470.0914214999989
CatBoost RMSE Score: 489.71567483065314

XGBoost MAE Score: 308.78272181417975
LightGBM MAE Score: 297.4239127999638
CatBoost MAE Score: 316.41304963760973



# Early Stopping

### xgboost

In [9]:
train_xg = xgb.DMatrix(train_x, train_y)
params_xg = {"objective": 'reg:linear',
             'eval_metric': 'rmse',
             'silent': 1
}
validation_xg = xgb.DMatrix(test_x, test_y)
watchlist  = [(train_xg,'train'),(validation_xg,'eval')]

In [10]:
%%time
model_xg = xgb.train(params_xg, train_xg, num_boost_round=9999, evals=watchlist, early_stopping_rounds=10, verbose_eval=10)

[0]	train-rmse:4987.38	eval-rmse:5045.6
Multiple eval metrics have been passed: 'eval-rmse' will be used for early stopping.

Will train until eval-rmse hasn't improved in 10 rounds.
[10]	train-rmse:930.629	eval-rmse:989.363
[20]	train-rmse:803.924	eval-rmse:848.51
[30]	train-rmse:723.601	eval-rmse:769.458
[40]	train-rmse:662.337	eval-rmse:708.054
[50]	train-rmse:611.971	eval-rmse:661.209
[60]	train-rmse:576.525	eval-rmse:629.574
[70]	train-rmse:555.81	eval-rmse:610.509
[80]	train-rmse:535.494	eval-rmse:590.706
[90]	train-rmse:522.09	eval-rmse:579.964
[100]	train-rmse:506.527	eval-rmse:565.162
[110]	train-rmse:494.181	eval-rmse:555.372
[120]	train-rmse:482.999	eval-rmse:545.271
[130]	train-rmse:474.449	eval-rmse:538.903
[140]	train-rmse:467.323	eval-rmse:532.778
[150]	train-rmse:460.324	eval-rmse:528.181
[160]	train-rmse:454.31	eval-rmse:523.906
[170]	train-rmse:445.942	eval-rmse:517.821
[180]	train-rmse:440.297	eval-rmse:514.189
[190]	train-rmse:436.293	eval-rmse:512.824
[200]	train-r

### lightgbm

In [11]:
train_lg = lgbm.Dataset(train_x, train_y)
params_lg = {'task': 'train',
            'objective': 'regression',
            'metric': 'rmse'}
validation_lg = lgbm.Dataset(test_x, test_y, reference=train_lg)

In [12]:
%%time
model_lg = lgbm.train(params_lg, train_lg, valid_sets=validation_lg, num_boost_round=9999, early_stopping_rounds=10, verbose_eval=10)

Training until validation scores don't improve for 10 rounds.
[10]	valid_0's rmse: 1772.68
[20]	valid_0's rmse: 1181.19
[30]	valid_0's rmse: 1007.85
[40]	valid_0's rmse: 934.314
[50]	valid_0's rmse: 884.104
[60]	valid_0's rmse: 841.211
[70]	valid_0's rmse: 806.982
[80]	valid_0's rmse: 782.903
[90]	valid_0's rmse: 762.026
[100]	valid_0's rmse: 743.001
[110]	valid_0's rmse: 724.934
[120]	valid_0's rmse: 708.632
[130]	valid_0's rmse: 696.25
[140]	valid_0's rmse: 682.187
[150]	valid_0's rmse: 670.446
[160]	valid_0's rmse: 657.355
[170]	valid_0's rmse: 648.667
[180]	valid_0's rmse: 639.145
[190]	valid_0's rmse: 629.553
[200]	valid_0's rmse: 620.95
[210]	valid_0's rmse: 611.825
[220]	valid_0's rmse: 605.192
[230]	valid_0's rmse: 599.403
[240]	valid_0's rmse: 592.047
[250]	valid_0's rmse: 586.043
[260]	valid_0's rmse: 581.254
[270]	valid_0's rmse: 576.751
[280]	valid_0's rmse: 572.125
[290]	valid_0's rmse: 567.635
[300]	valid_0's rmse: 565.404
[310]	valid_0's rmse: 560.348
[320]	valid_0's rms

### catboost

In [15]:
train_cb = cb.Pool(train_x, train_y)
params_cb = {'eval_metric': 'RMSE',
            'od_type': 'Iter',
             'iterations': 9999,
            'od_wait': 10,
            'use_best_model': True,
            'verbose': 10,
            'metric_period': 10}
validation_cb = cb.Pool(test_x, test_y)

In [16]:
%%time
model_cb = cb.train(train_cb, params_cb, eval_set=validation_cb)



0:	learn: 6810.0764866	test: 6827.7410514	best: 6827.7410514 (0)	total: 114ms	remaining: 19m 1s
10:	learn: 5130.5640687	test: 5182.6285243	best: 5182.6285243 (10)	total: 707ms	remaining: 10m 41s
20:	learn: 3907.5385344	test: 3981.1025695	best: 3981.1025695 (20)	total: 1.17s	remaining: 9m 17s
30:	learn: 3029.3545621	test: 3116.8937823	best: 3116.8937823 (30)	total: 1.66s	remaining: 8m 54s
40:	learn: 2402.0262843	test: 2497.8124817	best: 2497.8124817 (40)	total: 2.13s	remaining: 8m 37s
50:	learn: 1962.7732155	test: 2060.3065656	best: 2060.3065656 (50)	total: 2.58s	remaining: 8m 24s
60:	learn: 1660.5291298	test: 1755.3568194	best: 1755.3568194 (60)	total: 3.05s	remaining: 8m 17s
70:	learn: 1454.1014148	test: 1543.1298546	best: 1543.1298546 (70)	total: 3.49s	remaining: 8m 8s
80:	learn: 1315.9770891	test: 1398.0833191	best: 1398.0833191 (80)	total: 3.94s	remaining: 8m 2s
90:	learn: 1222.8549363	test: 1297.4086883	best: 1297.4086883 (90)	total: 4.41s	remaining: 8m
100:	learn: 1159.7248967	te

850:	learn: 740.7660110	test: 777.9346945	best: 777.9346945 (850)	total: 38.7s	remaining: 6m 56s
860:	learn: 738.6183092	test: 775.8176598	best: 775.8176598 (860)	total: 39.2s	remaining: 6m 55s
870:	learn: 736.7788329	test: 774.0944749	best: 774.0944749 (870)	total: 39.6s	remaining: 6m 55s
880:	learn: 734.8540466	test: 772.2045955	best: 772.2045955 (880)	total: 40.1s	remaining: 6m 54s
890:	learn: 732.8393197	test: 770.3438329	best: 770.3438329 (890)	total: 40.5s	remaining: 6m 54s
900:	learn: 730.6713824	test: 768.2028371	best: 768.2028371 (900)	total: 40.9s	remaining: 6m 53s
910:	learn: 729.0563085	test: 766.6125133	best: 766.6125133 (910)	total: 41.3s	remaining: 6m 52s
920:	learn: 726.7160361	test: 764.2564049	best: 764.2564049 (920)	total: 41.8s	remaining: 6m 51s
930:	learn: 724.1621749	test: 761.6468888	best: 761.6468888 (930)	total: 42.2s	remaining: 6m 51s
940:	learn: 722.2785406	test: 759.8527476	best: 759.8527476 (940)	total: 42.6s	remaining: 6m 50s
950:	learn: 720.0228898	test: 

1690:	learn: 616.2020850	test: 656.2139882	best: 656.2139882 (1690)	total: 1m 15s	remaining: 6m 9s
1700:	learn: 615.2843906	test: 655.4189608	best: 655.4189608 (1700)	total: 1m 15s	remaining: 6m 8s
1710:	learn: 614.3311504	test: 654.4698773	best: 654.4698773 (1710)	total: 1m 16s	remaining: 6m 8s
1720:	learn: 613.4051844	test: 653.5750681	best: 653.5750681 (1720)	total: 1m 16s	remaining: 6m 7s
1730:	learn: 612.2335605	test: 652.4235314	best: 652.4235314 (1730)	total: 1m 16s	remaining: 6m 7s
1740:	learn: 611.1313882	test: 651.3684875	best: 651.3684875 (1740)	total: 1m 17s	remaining: 6m 6s
1750:	learn: 610.1908848	test: 650.4760648	best: 650.4760648 (1750)	total: 1m 17s	remaining: 6m 6s
1760:	learn: 609.2160929	test: 649.5559794	best: 649.5559794 (1760)	total: 1m 18s	remaining: 6m 5s
1770:	learn: 608.2504111	test: 648.6648282	best: 648.6648282 (1770)	total: 1m 18s	remaining: 6m 5s
1780:	learn: 607.3404561	test: 647.7683994	best: 647.7683994 (1780)	total: 1m 19s	remaining: 6m 4s
1790:	lear

2520:	learn: 556.9649674	test: 601.2693795	best: 601.2693795 (2520)	total: 1m 51s	remaining: 5m 30s
2530:	learn: 556.3713731	test: 600.8052104	best: 600.7536378 (2529)	total: 1m 51s	remaining: 5m 29s
2540:	learn: 555.7551101	test: 600.2048944	best: 600.2048944 (2540)	total: 1m 52s	remaining: 5m 29s
2550:	learn: 555.1525471	test: 599.7169835	best: 599.7169835 (2550)	total: 1m 52s	remaining: 5m 28s
2560:	learn: 554.6309406	test: 599.2003669	best: 599.2003669 (2560)	total: 1m 53s	remaining: 5m 28s
2570:	learn: 554.1847188	test: 598.7824282	best: 598.7824282 (2570)	total: 1m 53s	remaining: 5m 27s
2580:	learn: 553.6592305	test: 598.3203165	best: 598.3203165 (2580)	total: 1m 53s	remaining: 5m 27s
2590:	learn: 553.0442297	test: 597.7482237	best: 597.7482237 (2590)	total: 1m 54s	remaining: 5m 26s
2600:	learn: 552.4583271	test: 597.2112042	best: 597.2112042 (2600)	total: 1m 54s	remaining: 5m 26s
2610:	learn: 551.8814973	test: 596.7088644	best: 596.7088644 (2610)	total: 1m 55s	remaining: 5m 25s


3350:	learn: 521.2911589	test: 569.2840323	best: 569.2840323 (3350)	total: 2m 30s	remaining: 4m 57s
3360:	learn: 520.9195650	test: 568.9925100	best: 568.9925100 (3360)	total: 2m 30s	remaining: 4m 57s
3370:	learn: 520.5173117	test: 568.6369852	best: 568.6369852 (3370)	total: 2m 31s	remaining: 4m 56s
3380:	learn: 520.2257734	test: 568.4031015	best: 568.3954215 (3379)	total: 2m 31s	remaining: 4m 56s
3390:	learn: 519.9947270	test: 568.2599432	best: 568.2599432 (3390)	total: 2m 31s	remaining: 4m 55s
3400:	learn: 519.5993013	test: 567.8634573	best: 567.8634573 (3400)	total: 2m 32s	remaining: 4m 55s
3410:	learn: 519.2046783	test: 567.5157266	best: 567.5157266 (3410)	total: 2m 32s	remaining: 4m 54s
3420:	learn: 518.8768537	test: 567.2407337	best: 567.2407337 (3420)	total: 2m 33s	remaining: 4m 54s
3430:	learn: 518.4730431	test: 566.8630780	best: 566.8630780 (3430)	total: 2m 33s	remaining: 4m 53s
3440:	learn: 518.0788238	test: 566.5059565	best: 566.5059565 (3440)	total: 2m 33s	remaining: 4m 53s


4180:	learn: 498.3140207	test: 549.7791281	best: 549.7791281 (4180)	total: 3m 6s	remaining: 4m 19s
4190:	learn: 498.1021361	test: 549.6063922	best: 549.6063922 (4190)	total: 3m 6s	remaining: 4m 18s
4200:	learn: 497.9175497	test: 549.4816520	best: 549.4816520 (4200)	total: 3m 7s	remaining: 4m 18s
4210:	learn: 497.7015267	test: 549.3156597	best: 549.3156597 (4210)	total: 3m 7s	remaining: 4m 17s
4220:	learn: 497.5414262	test: 549.2136517	best: 549.1783167 (4219)	total: 3m 8s	remaining: 4m 17s
4230:	learn: 497.3506200	test: 549.0682625	best: 549.0682625 (4230)	total: 3m 8s	remaining: 4m 16s
4240:	learn: 497.1378017	test: 548.9401376	best: 548.9293970 (4237)	total: 3m 8s	remaining: 4m 16s
4250:	learn: 496.8212677	test: 548.5994714	best: 548.5994714 (4250)	total: 3m 9s	remaining: 4m 16s
4260:	learn: 496.6366690	test: 548.4769409	best: 548.4769409 (4260)	total: 3m 9s	remaining: 4m 15s
4270:	learn: 496.4220691	test: 548.3011489	best: 548.3011489 (4270)	total: 3m 10s	remaining: 4m 15s
4280:	lea

5010:	learn: 482.1136700	test: 536.6534802	best: 536.6471242 (5009)	total: 3m 46s	remaining: 3m 45s
5020:	learn: 481.9673178	test: 536.5610884	best: 536.5610884 (5020)	total: 3m 46s	remaining: 3m 44s
5030:	learn: 481.8237954	test: 536.4470451	best: 536.4470451 (5030)	total: 3m 47s	remaining: 3m 44s
5040:	learn: 481.6422659	test: 536.3206836	best: 536.3206836 (5040)	total: 3m 48s	remaining: 3m 44s
5050:	learn: 481.4522243	test: 536.1465962	best: 536.1465962 (5050)	total: 3m 48s	remaining: 3m 44s
5060:	learn: 481.2497153	test: 535.9633623	best: 535.9617663 (5059)	total: 3m 49s	remaining: 3m 43s
5070:	learn: 481.0730182	test: 535.8032141	best: 535.8029272 (5069)	total: 3m 49s	remaining: 3m 43s
5080:	learn: 480.9543676	test: 535.7401012	best: 535.7401012 (5080)	total: 3m 50s	remaining: 3m 42s
5090:	learn: 480.8011971	test: 535.6124733	best: 535.6120396 (5089)	total: 3m 50s	remaining: 3m 42s
5100:	learn: 480.6788476	test: 535.5194128	best: 535.5155508 (5098)	total: 3m 51s	remaining: 3m 42s


5840:	learn: 469.9298874	test: 527.0207476	best: 527.0207476 (5840)	total: 4m 26s	remaining: 3m 9s
5850:	learn: 469.8246877	test: 526.9362889	best: 526.9362889 (5850)	total: 4m 26s	remaining: 3m 9s
5860:	learn: 469.6950737	test: 526.8278137	best: 526.8278137 (5860)	total: 4m 27s	remaining: 3m 8s
5870:	learn: 469.5680927	test: 526.7227223	best: 526.7227223 (5870)	total: 4m 27s	remaining: 3m 8s
5880:	learn: 469.4266500	test: 526.6276952	best: 526.6276952 (5880)	total: 4m 28s	remaining: 3m 7s
5890:	learn: 469.3335457	test: 526.5594135	best: 526.5594135 (5890)	total: 4m 28s	remaining: 3m 7s
5900:	learn: 469.2323056	test: 526.4946909	best: 526.4946909 (5900)	total: 4m 29s	remaining: 3m 6s
5910:	learn: 469.1210314	test: 526.4024895	best: 526.4004732 (5905)	total: 4m 29s	remaining: 3m 6s
5920:	learn: 468.9980709	test: 526.3279806	best: 526.3252494 (5919)	total: 4m 29s	remaining: 3m 5s
5930:	learn: 468.8426111	test: 526.2125715	best: 526.2125715 (5930)	total: 4m 30s	remaining: 3m 5s
5940:	lear