# Santander Customer Satisfaction

## Планы

* Форум
* Видимо в данных есть какие-то аномальные точки.

## Решение

### Общее начало

In [1]:
%matplotlib inline

import os
import copy
import random
import pickle
from importlib import reload

import numpy as np
import scipy as sp
import pandas as pd
import seaborn as sns
import pylab
import hyperopt

from sklearn.grid_search import GridSearchCV
from sklearn.metrics import precision_recall_fscore_support
from sklearn.cross_validation import StratifiedKFold, cross_val_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import get_scorer

import xgboost as xgb

import ml_training.common as common
import ml_training.vwutils as vwutils
import sklensemble.ensemblers.averagers as averagers

In [None]:
reload(common)

In [None]:
reload(vwutils)

In [None]:
reload(averagers)

In [2]:
def find_constant_columns(df):
    return [column for column in df.columns if len(df[column].unique()) < 2]

In [3]:
def find_equal_columns(df):
    equal_columns = []
    for col_num, column1 in enumerate(df.columns[: -1]):
        for column2 in df.columns[col_num+1:]:
            if not (df[column1] - df[column2]).any():
                equal_columns.append(column2)
    return list(set(equal_columns))

In [4]:
def make_feature_engineering(df):
    df.loc[df["var3"] == df["var3"].min(), "var3"] = df["var3"].mode()
    
    df["var38mc"] = np.isclose(df["var38"], df["var38"].mode())
    df["logvar38"] = df.loc[~df["var38mc"], "var38"].map(np.log)
    df["logvar38"].fillna(0, inplace=True)
    df.drop(["var38"], axis=1, inplace=True)
    
    df["var15less27"] = np.where(df["var15"] < 27, 1, 0)
    df["saldo_var30bigger10000"] = np.where(df["saldo_var30"] > 10000, 1, 0)

In [5]:
RANDOM_STATE = 777
random.seed(RANDOM_STATE)

In [6]:
train_df = pd.read_csv(os.path.join("input", "train.csv"))
train_y = train_df["TARGET"]
train_df.drop(["ID", "TARGET"], inplace=True, axis=1)
train_df["n0"] = train_df.apply(lambda x: sum(x == 0), axis=1)

In [7]:
constant_columns = find_constant_columns(train_df)
equal_columns = find_equal_columns(train_df)
drop_columns = list(set(constant_columns).union(equal_columns))

In [8]:
train_x = train_df.drop(drop_columns, axis=1)

In [9]:
make_feature_engineering(train_x)

In [None]:
#scaler = RobustScaler()
#scaled_train_x = scaler.fit_transform(train_x)
#pca = PCA(2)
#pca.fit(scaled_train_x)
#train_pca_df = pd.DataFrame([pca.transform(scaled_train_x)], columns=["pca1", "pca2"])
#train_x = pd.concat([train_x, train_pca_df], axis=1)

In [None]:
#if_mask = common.get_independent_features_mask(train_x.values, tol=5e-7, is_plot=False) # 5e-7
#uncorr_mask = common.find_corr_features_mask(train_x.values, trashhold=0.995) # 0.995
#res_mask = if_mask & uncorr_mask
#if_mask.sum(), uncorr_mask.sum(), res_mask.sum()

In [None]:
#train_x_if = train_x.loc[:, if_mask]
#train_x_uc = train_x.loc[:, uncorr_mask]

In [10]:
cv = StratifiedKFold(train_y, 5, shuffle=True, random_state=RANDOM_STATE)

### Создание классификаторов

In [16]:
xgbc = xgb.XGBClassifier(n_estimators=560, max_depth=5, seed=1234, colsample_bytree=0.7, learning_rate=0.02,
                         subsample=0.68)
xgbc2 = xgb.XGBClassifier(n_estimators=560, max_depth=5, seed=1, colsample_bytree=0.7, learning_rate=0.02,
                         subsample=0.68)
xgbc3 = xgb.XGBClassifier(n_estimators=560, max_depth=5, seed=2, colsample_bytree=0.7, learning_rate=0.02,
                         subsample=0.68)
hyperopt_params = {'max_depth': 6, 'reg_lambda': 0.35114262798003526, 'reg_alpha': 0.5677890342900263,
                   'colsample_bytree': 0.6092874683307445, 'min_child_weight': 4.282293898377818, 'n_estimators': 596,
                   'learning_rate': 0.012044571373036569, 'subsample': 0.7570493634313957, "seed": 777}
xgbc_h =  xgb.XGBClassifier(**hyperopt_params)

In [19]:
r_params = {"n_estimators": 560, "max_depth": 5, "seed": 1234, "colsample_bytree": 0.701, "learning_rate": 0.0202048,
            "subsample": 0.6815}
xgbc_r = xgb.XGBClassifier(**r_params)

xgbc_r_list = []
for i_seed in range(1, 11):
    i_seed_params = copy.deepcopy(r_params)
    i_seed_params["seed"] = i_seed
    xgbc_r_list.append(xgb.XGBClassifier(**i_seed_params))
    
xgbc_h_list = []
for i_seed in range(770, 780):
    i_seed_params = copy.deepcopy(hyperopt_params)
    i_seed_params["seed"] = i_seed
    xgbc_h_list.append(xgb.XGBClassifier(**i_seed_params))    

In [20]:
is_weighted = [False] * len(xgbc_h_list)
x_mask = [None] * len(xgbc_h_list)
bc = averagers.BlendingClassifier(xgbc_h_list, is_weighted, x_mask)

### Grid search

In [None]:
params = {"n_estimators": [570], "max_depth": [5], "seed": [1234], "learning_rate": [0.02], "subsample": [0.68],
          "colsample_bytree": [0.7]}
gs = GridSearchCV(xgbc, params, scoring="roc_auc", cv=cv, n_jobs=common.NCPU, refit=False, verbose=10)
gs.fit(train_x, train_y)

### Веса

In [21]:
w0 = 1
w1 = train_y.value_counts()[0] / train_y.value_counts()[1] * 0.5
weights = np.array(list((map(lambda x: w1 if x else w0, train_y))))

### Обучение классификаторов

In [None]:
xgbc.fit(train_x.values, train_y.values)

In [23]:
bc.fit(train_x.values, train_y.values, weights)

XGBClassifier(base_score=0.5, colsample_bylevel=1,
       colsample_bytree=0.6092874683307445, gamma=0,
       learning_rate=0.012044571373036569, max_delta_step=0, max_depth=6,
       min_child_weight=4.282293898377818, missing=None, n_estimators=596,
       nthread=-1, objective='binary:logistic',
       reg_alpha=0.5677890342900263, reg_lambda=0.35114262798003526,
       scale_pos_weight=1, seed=770, silent=True,
       subsample=0.7570493634313957) is fitting
XGBClassifier(base_score=0.5, colsample_bylevel=1,
       colsample_bytree=0.6092874683307445, gamma=0,
       learning_rate=0.012044571373036569, max_delta_step=0, max_depth=6,
       min_child_weight=4.282293898377818, missing=None, n_estimators=596,
       nthread=-1, objective='binary:logistic',
       reg_alpha=0.5677890342900263, reg_lambda=0.35114262798003526,
       scale_pos_weight=1, seed=771, silent=True,
       subsample=0.7570493634313957) is fitting
XGBClassifier(base_score=0.5, colsample_bylevel=1,
       colsam

In [None]:
xgbc_r.fit(train_x.values, train_y.values, eval_metric="auc")

In [22]:
xgbc_h.fit(train_x.values, train_y.values, eval_metric="auc")

XGBClassifier(base_score=0.5, colsample_bylevel=1,
       colsample_bytree=0.6092874683307445, gamma=0,
       learning_rate=0.012044571373036569, max_delta_step=0, max_depth=6,
       min_child_weight=4.282293898377818, missing=None, n_estimators=596,
       nthread=-1, objective='binary:logistic',
       reg_alpha=0.5677890342900263, reg_lambda=0.35114262798003526,
       scale_pos_weight=1, seed=777, silent=True,
       subsample=0.7570493634313957)

### Выходная оценка

In [24]:
test_df = pd.read_csv(os.path.join("input", "test.csv"))
test_id = test_df["ID"].values
test_df.drop(["ID"], inplace=True, axis=1)

test_df["n0"] = test_df.apply(lambda x: sum(x == 0), axis=1)
test_x = test_df.drop(drop_columns, axis=1)

In [25]:
make_feature_engineering(test_x)

In [None]:
#scaled_test_x = scaler.transform(test_x)
#test_pca_df = pd.DataFrame(pca.transform(scaled_test_x), columns=["pca1", "pca2", "pca3"])
#test_x = pd.concat([test_x, test_pca_df], axis=1)

In [28]:
#test_y = xgbc_r.predict_proba(test_x.values)[:, 1]
#test_y = xgbc_h.predict_proba(test_x.values)[:, 1]
test_y = bc.predict_proba(test_x.values)[:, 1]

In [29]:
common.save_output(np.array([test_id, test_y]).T, ["ID", "TARGET"])

### Тестирование классификаторов

In [None]:
cvs = cross_val_score(copy.deepcopy(xgbc_r), train_x, train_y, scoring="roc_auc", cv=cv, n_jobs=common.NCPU, verbose=1)
cvs, cvs.mean()

In [None]:
# as in r without intercept
# array([ 0.84362984,  0.83608526,  0.84830765,  0.84150639,  0.83651392]), 0.84120861257549406
# with 3 pca components
# array([ 0.84474928,  0.83518445,  0.84703559,  0.84057184,  0.83586845]), 0.84068191998743524
# 10 xgbc with different seeds
# array([ 0.84443436,  0.83580992,  0.84851709,  0.84200755,  0.83720884]), 0.84159554920533641
# with 2 pca components
# array([ 0.84360169,  0.83538863,  0.84814913,  0.8421763 ,  0.8365236 ]), 0.84116787178076746
# with var38mc and logvar38, without var38, cleaned var3
# array([ 0.84466562,  0.83450353,  0.84818809,  0.84334212,  0.83541293]), 0.84122245852502497
# after hyperopt with var38mc and logvar38, without var38, cleaned var3
# array([ 0.84644761,  0.83720953,  0.84990076,  0.84543031,  0.83589067]), 0.84297577420381453
# 10 xgbc with different seeds after hyperopt with var38mc and logvar38, without var38, cleaned var3
# array([ 0.84608496,  0.8374632 ,  0.85016673,  0.84540045,  0.83552255]), 0.84292757970569743

In [None]:
cvs = common.cross_val_score_with_weights(copy.deepcopy(bc), train_x.values, train_y.values, weights, "roc_auc", cv)
cvs, cvs.mean()

In [None]:
precision_recall_fscore_support(train_y.values, xgbc_r.predict(train_x.values))

## Hyperopt

In [None]:
def hyperopt_train_test(params):
    clf = xgb.XGBClassifier(**params)
    return cross_val_score(clf, train_x, train_y, scoring="roc_auc", cv=cv, n_jobs=common.NCPU, verbose=1).mean()

In [11]:
space = {
    "n_estimators": hyperopt.hp.choice("n_estimators", range(500, 800)),
    "max_depth": hyperopt.hp.choice("max_depth", range(4, 8)),
    "colsample_bytree": hyperopt.hp.uniform("colsample_bytree", 0.5, 0.9),
    "learning_rate": hyperopt.hp.uniform("learning_rate", 0.005, 0.02),
    "subsample": hyperopt.hp.uniform("subsample", 0.5, 0.9),
    "min_child_weight": hyperopt.hp.uniform("min_child_weight", 1.0, 5.0),
    "reg_lambda": hyperopt.hp.uniform("reg_lambda", 0.0, 0.5),
    "reg_alpha": hyperopt.hp.uniform("reg_alpha", 0.0, 2.0),
    "seed": hyperopt.hp.choice("seed", [RANDOM_STATE]),
}

In [None]:
count = 0
best = 0
best_params = {}
def f(params):
    global best, count, best_params
    acc = hyperopt_train_test(params)
    if acc > best:
        best = acc
        best_params = params
        print("new best - {0}, using - {1}".format(acc, params))
    
    print("iters - {0}, best_acc - {1}".format(count, best))
    count += 1        
    return {"loss": -acc, "status": hyperopt.STATUS_OK}

In [None]:
trials = hyperopt.Trials()
best = hyperopt.fmin(f, space, algo=hyperopt.tpe.suggest, max_evals=300, trials=trials)

In [13]:
hyperopt_tester = common.HyperoptTester(xgb.XGBClassifier, space, RANDOM_STATE, nf_test=4, nf_val=4)
scores = hyperopt_tester.optimize(train_x.values, train_y.values, "roc_auc", max_evals=70)
print(scores.mean(axis=0))

[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.6min finished


new best score - 0.8360396194046539, best params - {'max_depth': 7, 'learning_rate': 0.011574132704340354, 'subsample': 0.5755807866044884, 'min_child_weight': 4.749215075110342, 'seed': 777, 'n_estimators': 678, 'reg_lambda': 0.4268126752950345, 'reg_alpha': 1.3812943072811286, 'colsample_bytree': 0.7261376809265392}, num test step - 0
iters - 0, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.1min finished


iters - 1, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 2, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


new best score - 0.8364378643344751, best params - {'max_depth': 6, 'learning_rate': 0.01288166003440782, 'subsample': 0.7222045040342008, 'min_child_weight': 3.761177159466719, 'seed': 777, 'n_estimators': 658, 'reg_lambda': 0.13432763968174627, 'reg_alpha': 1.3759968022923363, 'colsample_bytree': 0.5494264012426486}, num test step - 0
iters - 3, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


iters - 4, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 5, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.6min finished


iters - 6, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


new best score - 0.8368413429268474, best params - {'max_depth': 5, 'learning_rate': 0.009638123010078036, 'subsample': 0.5794456166084812, 'min_child_weight': 3.58480043022372, 'seed': 777, 'n_estimators': 683, 'reg_lambda': 0.04252872230074223, 'reg_alpha': 1.616853421666508, 'colsample_bytree': 0.5706159740037747}, num test step - 0
iters - 7, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 8, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.1min finished


iters - 9, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.9min finished


iters - 10, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 11, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 12, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.8min finished


iters - 13, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.2min finished


iters - 14, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 15, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.0min finished


iters - 16, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.3min finished


iters - 17, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


new best score - 0.8370130454695277, best params - {'max_depth': 4, 'learning_rate': 0.013902086727288685, 'subsample': 0.5448767857299118, 'min_child_weight': 4.0850851003808195, 'seed': 777, 'n_estimators': 720, 'reg_lambda': 0.032194212371362274, 'reg_alpha': 1.5251982486266888, 'colsample_bytree': 0.5925530763021022}, num test step - 0
iters - 18, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 19, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 20, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 21, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 22, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 23, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.6min finished


iters - 24, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 25, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.6min finished


iters - 26, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 27, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 28, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 29, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 30, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 31, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 32, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 33, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 34, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 35, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.8min finished


iters - 36, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.1min finished


iters - 37, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.6min finished


iters - 38, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.9min finished


iters - 39, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


new best score - 0.8371087387369525, best params - {'max_depth': 5, 'learning_rate': 0.012650059589737517, 'subsample': 0.7431177453600848, 'min_child_weight': 3.741774269451039, 'seed': 777, 'n_estimators': 732, 'reg_lambda': 0.1329123788509015, 'reg_alpha': 1.4690235677671637, 'colsample_bytree': 0.5050211284425199}, num test step - 0
iters - 40, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.8min finished


iters - 41, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 42, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 43, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.9min finished


iters - 44, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 45, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 46, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.9min finished


iters - 47, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.8min finished


iters - 48, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 49, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


iters - 50, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 51, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 52, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 53, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.2min finished


iters - 54, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 55, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 56, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.7min finished


iters - 57, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 58, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.9min finished


iters - 59, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.3min finished


iters - 60, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.5min finished


iters - 61, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.9min finished


iters - 62, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  5.7min finished


iters - 63, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 64, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 65, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.7min finished


iters - 66, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 67, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 68, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.6min finished


iters - 69, num test step - 0


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


new best score - 0.840813319802743, best params - {'max_depth': 5, 'learning_rate': 0.01725289690225834, 'subsample': 0.6993154146339454, 'min_child_weight': 2.3155952040720575, 'seed': 777, 'n_estimators': 746, 'reg_lambda': 0.3724771627402421, 'reg_alpha': 0.16786982587471688, 'colsample_bytree': 0.7935036056780905}, num test step - 1
iters - 0, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.1min finished


new best score - 0.8412987975385684, best params - {'max_depth': 7, 'learning_rate': 0.016164415455703195, 'subsample': 0.6381511739999378, 'min_child_weight': 3.632162356147065, 'seed': 777, 'n_estimators': 549, 'reg_lambda': 0.4265106118591352, 'reg_alpha': 0.43467649800806196, 'colsample_bytree': 0.8595379718736728}, num test step - 1
iters - 1, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 2, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


new best score - 0.8413996946622504, best params - {'max_depth': 7, 'learning_rate': 0.016965242468514528, 'subsample': 0.682502884985975, 'min_child_weight': 2.60948519547425, 'seed': 777, 'n_estimators': 524, 'reg_lambda': 0.07746007604093463, 'reg_alpha': 1.8028430318247535, 'colsample_bytree': 0.635308627606946}, num test step - 1
iters - 3, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 4, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.4min finished


iters - 5, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.8min finished


iters - 6, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.9min finished


iters - 7, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 8, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.8min finished


iters - 9, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


new best score - 0.8430615482975893, best params - {'max_depth': 4, 'learning_rate': 0.01647813123515619, 'subsample': 0.6476473963608562, 'min_child_weight': 3.1410697758279693, 'seed': 777, 'n_estimators': 654, 'reg_lambda': 0.46598951169037844, 'reg_alpha': 0.21805970678036846, 'colsample_bytree': 0.6444286511457458}, num test step - 1
iters - 10, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  5.0min finished


iters - 11, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 12, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 13, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.4min finished


iters - 14, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 15, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.2min finished


iters - 16, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 17, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  5.5min finished


iters - 18, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 19, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.7min finished


iters - 20, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 21, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


iters - 22, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


iters - 23, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.9min finished


new best score - 0.8432700116260058, best params - {'max_depth': 4, 'learning_rate': 0.014186235616965272, 'subsample': 0.5412030107961404, 'min_child_weight': 2.0933197643311763, 'seed': 777, 'n_estimators': 670, 'reg_lambda': 0.11320549921753578, 'reg_alpha': 1.514505062573532, 'colsample_bytree': 0.5093921569196201}, num test step - 1
iters - 24, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.6min finished


iters - 25, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.8min finished


iters - 26, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 27, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


new best score - 0.8433692163984036, best params - {'max_depth': 4, 'learning_rate': 0.014518968413957969, 'subsample': 0.6072429551440787, 'min_child_weight': 2.263798866781787, 'seed': 777, 'n_estimators': 697, 'reg_lambda': 0.1136823338350139, 'reg_alpha': 1.4978557475856624, 'colsample_bytree': 0.5932267012439}, num test step - 1
iters - 28, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 29, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 30, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


iters - 31, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 32, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 33, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 34, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 35, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 36, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 37, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.8min finished


iters - 38, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 39, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


iters - 40, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 41, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 42, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.6min finished


iters - 43, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.8min finished


iters - 44, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.6min finished


iters - 45, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 46, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 47, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.4min finished


iters - 48, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 49, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 50, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.3min finished


iters - 51, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 52, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.3min finished


iters - 53, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 54, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 55, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 56, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 57, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 58, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.9min finished


iters - 59, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.8min finished


iters - 60, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 61, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 62, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 63, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 64, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 65, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 66, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 67, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.9min finished


iters - 68, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


new best score - 0.8434109011937967, best params - {'max_depth': 4, 'learning_rate': 0.01649371305609185, 'subsample': 0.5407648588315866, 'min_child_weight': 2.189676250940678, 'seed': 777, 'n_estimators': 709, 'reg_lambda': 0.2354040920746765, 'reg_alpha': 1.4309034952307693, 'colsample_bytree': 0.556591583690579}, num test step - 1
iters - 69, num test step - 1


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


new best score - 0.8382191460363535, best params - {'max_depth': 5, 'learning_rate': 0.0068715014900060795, 'subsample': 0.5584835072768228, 'min_child_weight': 3.3525039031270008, 'seed': 777, 'n_estimators': 731, 'reg_lambda': 0.07946731388530781, 'reg_alpha': 1.0498797864019314, 'colsample_bytree': 0.5554281054457171}, num test step - 2
iters - 0, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


new best score - 0.8386023339283423, best params - {'max_depth': 7, 'learning_rate': 0.010785025832895697, 'subsample': 0.8808979135012005, 'min_child_weight': 2.961069461632921, 'seed': 777, 'n_estimators': 504, 'reg_lambda': 0.3977929686261736, 'reg_alpha': 0.3502490181291953, 'colsample_bytree': 0.6058408358015286}, num test step - 2
iters - 1, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 2, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


new best score - 0.8391448043348052, best params - {'max_depth': 5, 'learning_rate': 0.009523045751112565, 'subsample': 0.5628814543934046, 'min_child_weight': 2.2529265664971523, 'seed': 777, 'n_estimators': 689, 'reg_lambda': 0.3654689885860357, 'reg_alpha': 0.9246102948130446, 'colsample_bytree': 0.5771811492605985}, num test step - 2
iters - 3, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 4, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 5, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 6, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.5min finished


iters - 7, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


iters - 8, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.8min finished


iters - 9, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


iters - 10, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


iters - 11, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


new best score - 0.8398023069713991, best params - {'max_depth': 6, 'learning_rate': 0.012582369708571097, 'subsample': 0.5689499144079537, 'min_child_weight': 2.9539372508755553, 'seed': 777, 'n_estimators': 674, 'reg_lambda': 0.23663207788255036, 'reg_alpha': 0.8681226375705928, 'colsample_bytree': 0.5361250927338393}, num test step - 2
iters - 12, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.9min finished


iters - 13, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.2min finished


iters - 14, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.4min finished


iters - 15, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 16, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.1min finished


iters - 17, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 18, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.5min finished


new best score - 0.8399020728691848, best params - {'max_depth': 6, 'learning_rate': 0.011589571241712775, 'subsample': 0.5492193930637729, 'min_child_weight': 3.6417917374340076, 'seed': 777, 'n_estimators': 707, 'reg_lambda': 0.4834843119863684, 'reg_alpha': 0.12939188998434825, 'colsample_bytree': 0.6433519636312212}, num test step - 2
iters - 19, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 20, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 21, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.5min finished


iters - 22, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.3min finished


iters - 23, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 24, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.0min finished


iters - 25, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.3min finished


iters - 26, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


new best score - 0.8399440421447955, best params - {'max_depth': 6, 'learning_rate': 0.010519093668305907, 'subsample': 0.7076003243878034, 'min_child_weight': 3.2640707852118247, 'seed': 777, 'n_estimators': 596, 'reg_lambda': 0.1594433526244875, 'reg_alpha': 0.13683405548286556, 'colsample_bytree': 0.500357223405647}, num test step - 2
iters - 27, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.3min finished


iters - 28, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.4min finished


iters - 29, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  5.5min finished


iters - 30, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 31, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 32, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.4min finished


iters - 33, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 34, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 35, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.1min finished


iters - 36, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 37, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


iters - 38, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 39, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 40, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 41, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 42, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.5min finished


iters - 43, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 44, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.8min finished


iters - 45, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 46, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.0min finished


iters - 47, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


new best score - 0.8400295624282601, best params - {'max_depth': 6, 'learning_rate': 0.012044571373036569, 'subsample': 0.7570493634313957, 'min_child_weight': 4.282293898377818, 'seed': 777, 'n_estimators': 596, 'reg_lambda': 0.35114262798003526, 'reg_alpha': 0.5677890342900263, 'colsample_bytree': 0.6092874683307445}, num test step - 2
iters - 48, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 49, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 50, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


iters - 51, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 52, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 53, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.7min finished


iters - 54, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.6min finished


iters - 55, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 56, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


iters - 57, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 58, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.4min finished


iters - 59, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 60, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.1min finished


iters - 61, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


iters - 62, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.9min finished


iters - 63, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.0min finished


iters - 64, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 65, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.1min finished


iters - 66, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 67, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.4min finished


iters - 68, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 69, num test step - 2


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


new best score - 0.8410408426682885, best params - {'max_depth': 4, 'learning_rate': 0.013653498271319623, 'subsample': 0.6262017827624806, 'min_child_weight': 3.7322026241254003, 'seed': 777, 'n_estimators': 593, 'reg_lambda': 0.14462620238972945, 'reg_alpha': 1.3118272435512368, 'colsample_bytree': 0.6298816069952946}, num test step - 3
iters - 0, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.6min finished


iters - 1, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.4min finished


iters - 2, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.8min finished


iters - 3, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


iters - 4, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 5, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 6, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.6min finished


iters - 7, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.9min finished


iters - 8, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.9min finished


iters - 9, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 10, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 11, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.6min finished


iters - 12, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 13, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.2min finished


iters - 14, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 15, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 16, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


new best score - 0.8412137290186064, best params - {'max_depth': 4, 'learning_rate': 0.01531626774986308, 'subsample': 0.5049758333415414, 'min_child_weight': 2.0420456664601896, 'seed': 777, 'n_estimators': 565, 'reg_lambda': 0.07544469869967885, 'reg_alpha': 0.39470630114226224, 'colsample_bytree': 0.820630308209681}, num test step - 3
iters - 17, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 18, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 19, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 20, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 21, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 22, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 23, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 24, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 25, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 26, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 27, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


iters - 28, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


new best score - 0.841271046689369, best params - {'max_depth': 4, 'learning_rate': 0.011888657445538277, 'subsample': 0.5086334084014258, 'min_child_weight': 4.4679754229460045, 'seed': 777, 'n_estimators': 578, 'reg_lambda': 0.07130110886284557, 'reg_alpha': 0.0333791331737609, 'colsample_bytree': 0.6114766614290508}, num test step - 3
iters - 29, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 30, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.1min finished


iters - 31, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 32, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.6min finished


iters - 33, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  4.7min finished


iters - 34, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.4min finished


iters - 35, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.5min finished


iters - 36, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


iters - 37, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.1min finished


iters - 38, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.2min finished


iters - 39, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  5.1min finished


iters - 40, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


new best score - 0.8413093116019446, best params - {'max_depth': 6, 'learning_rate': 0.007451350773420137, 'subsample': 0.518237938975129, 'min_child_weight': 4.53021872982859, 'seed': 777, 'n_estimators': 753, 'reg_lambda': 0.1637430738437339, 'reg_alpha': 0.8149965871908098, 'colsample_bytree': 0.5230494846014601}, num test step - 3
iters - 41, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 42, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.9min finished


iters - 43, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 44, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 45, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 46, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.8min finished


iters - 47, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.1min finished


iters - 48, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 49, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.9min finished


iters - 50, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.0min finished


iters - 51, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.2min finished


iters - 52, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 53, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.4min finished


iters - 54, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.3min finished


iters - 55, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.1min finished


iters - 56, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


new best score - 0.8413835129563081, best params - {'max_depth': 4, 'learning_rate': 0.010570738576343558, 'subsample': 0.5134371278506907, 'min_child_weight': 3.1630118163748886, 'seed': 777, 'n_estimators': 719, 'reg_lambda': 0.19737642863506585, 'reg_alpha': 0.8153756099600352, 'colsample_bytree': 0.5900969133949286}, num test step - 3
iters - 57, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.3min finished


iters - 58, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


iters - 59, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.1min finished


iters - 60, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  3.3min finished


iters - 61, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.0min finished


iters - 62, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.2min finished


iters - 63, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 64, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  1.8min finished


iters - 65, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.6min finished


new best score - 0.8416441459829263, best params - {'max_depth': 4, 'learning_rate': 0.011745896589207955, 'subsample': 0.5030363918848865, 'min_child_weight': 3.8066313974080463, 'seed': 777, 'n_estimators': 767, 'reg_lambda': 0.19552558654513397, 'reg_alpha': 1.0807840450623298, 'colsample_bytree': 0.6234153363921896}, num test step - 3
iters - 66, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 67, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.3min finished


iters - 68, num test step - 3


[Parallel(n_jobs=6)]: Done   4 out of   4 | elapsed:  2.7min finished


iters - 69, num test step - 3
num_cv_step - 0, num_test_step - 0
num_cv_step - 0, num_test_step - 1
num_cv_step - 0, num_test_step - 2
num_cv_step - 0, num_test_step - 3
num_cv_step - 1, num_test_step - 0
num_cv_step - 1, num_test_step - 1
num_cv_step - 1, num_test_step - 2
num_cv_step - 1, num_test_step - 3
num_cv_step - 2, num_test_step - 0
num_cv_step - 2, num_test_step - 1
num_cv_step - 2, num_test_step - 2
num_cv_step - 2, num_test_step - 3
num_cv_step - 3, num_test_step - 0
num_cv_step - 3, num_test_step - 1
num_cv_step - 3, num_test_step - 2
num_cv_step - 3, num_test_step - 3
[ 0.84203681  0.84139616  0.84219358  0.84155005]


In [15]:
hyperopt_tester.opt_params

{0: [0.83710873873695246,
  {'colsample_bytree': 0.5050211284425199,
   'learning_rate': 0.012650059589737517,
   'max_depth': 5,
   'min_child_weight': 3.741774269451039,
   'n_estimators': 732,
   'reg_alpha': 1.4690235677671637,
   'reg_lambda': 0.1329123788509015,
   'seed': 777,
   'subsample': 0.7431177453600848}],
 1: [0.84341090119379669,
  {'colsample_bytree': 0.556591583690579,
   'learning_rate': 0.01649371305609185,
   'max_depth': 4,
   'min_child_weight': 2.189676250940678,
   'n_estimators': 709,
   'reg_alpha': 1.4309034952307693,
   'reg_lambda': 0.2354040920746765,
   'seed': 777,
   'subsample': 0.5407648588315866}],
 2: [0.8400295624282601,
  {'colsample_bytree': 0.6092874683307445,
   'learning_rate': 0.012044571373036569,
   'max_depth': 6,
   'min_child_weight': 4.282293898377818,
   'n_estimators': 596,
   'reg_alpha': 0.5677890342900263,
   'reg_lambda': 0.35114262798003526,
   'seed': 777,
   'subsample': 0.7570493634313957}],
 3: [0.84164414598292625,
  {'col

In [14]:
scores

array([[ 0.85091488,  0.85082877,  0.85162338,  0.85131976],
       [ 0.83545495,  0.83385204,  0.83574345,  0.83424468],
       [ 0.84364924,  0.84346941,  0.84340737,  0.84322808],
       [ 0.83812818,  0.83743444,  0.83800011,  0.83740767]])

### VW

In [None]:
input_train_file = os.path.join("temp", "scs.input_train")
vwutils.make_vw_input(train_x, input_train_file, y=train_y)

In [None]:
input_test_file = os.path.join("temp", "scs.input_test")
vwutils.make_vw_input(test_x, input_test_file)

### Тестирование кода

In [None]:
x = train_x.iloc[: 1000].values
y = train_y.iloc[: 1000].values
w = weights[: 1000]
x_t = test_x.iloc[: 1000].values

In [None]:
imp_fetures = common.get_most_important_features(xgbc_r, train_x.columns.values)
imp_fetures.get("logvar38is0"), imp_fetures.get("var15less27"), imp_fetures.get("saldo_var30bigger10000")

In [None]:
imp_fetures

## Feature engineering

In [None]:
df = copy.deepcopy(train_x)

In [None]:
df["y"] = train_y.values