# Library

In [2]:
import warnings
warnings.filterwarnings("ignore")


import numpy as np
import pandas as pd
import datetime, random, math
from catboost import CatBoostClassifier
import lightgbm as lgb
from time import time
from tqdm import tqdm
from collections import Counter
from scipy import stats
import gc, pickle
import ast

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import StratifiedKFold, KFold, RepeatedKFold, GroupKFold, GridSearchCV, train_test_split, TimeSeriesSplit
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, log_loss
from sklearn.linear_model import Ridge,Lasso, BayesianRidge
from sklearn.svm import LinearSVR
from sklearn.preprocessing import minmax_scale
from sklearn.cluster import KMeans
import optuna
%matplotlib inline

# Preprocessing

In [3]:
def create_is_sell_data(sell_prices_df, calendar_df, train_df):
    train_df.index = train_df['id']
    sell_prices_df['id'] = sell_prices_df['item_id'].astype('str')+'_'+sell_prices_df['store_id']+'_evaluation'
    sell_prices_data = sell_prices_df[sell_prices_df.wm_yr_wk.isin(calendar_df.wm_yr_wk.unique())]
    sell_prices_data.reset_index(drop=True, inplace=True)
    tmp = sell_prices_data.groupby(['id'])[['wm_yr_wk', 'sell_price']].apply(
        lambda x: x.set_index('wm_yr_wk')['sell_price'].to_dict()
    ).to_dict()
    d = calendar_df.d
    wm_yr_wk = calendar_df.wm_yr_wk
    price_data = {}
    for col in tqdm(train_df.id.unique()):
        price_data[col] = wm_yr_wk.map(tmp[col])
    price_data = pd.DataFrame(price_data)
    price_data.index = d
    is_sell = price_data.notnull().astype(float).T
    price_data = price_data.fillna(0)
    
    is_sell.index=train_df.id
    train_df.index=train_df.id
    is_sell = pd.concat([
        train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']], is_sell
    ], axis=1)
    price_data = pd.concat([
        train_df[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']], price_data.T  
    ], axis=1)
    
    return price_data, is_sell

def sort_d_cols(d_cols):
    d_cols = [int(d.replace('d_','')) for d in d_cols]
    d_cols = sorted(d_cols)
    d_cols = [f'd_{d}' for d in d_cols]
    return d_cols


path='/Users/abcdm/Downloads/m5-forecasting-accuracy/'
train = pd.read_csv(path+'sales_train_evaluation.csv')
calendar = pd.read_csv(path+'calendar.csv')
price = pd.read_csv(path+'sell_prices.csv')
price_data, is_sell = create_is_sell_data(price, calendar, train)
d_cols = [f'd_{i+1}' for i in range(1969)]
train = train.reindex(
    columns=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']+d_cols
)
train = train.set_index('id', drop=False)
train = pd.concat([
    train[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']],
    train[d_cols]*price_data[d_cols]
], axis=1)


def sale_by_group(ID, data, train, trn_d_cols, group='state_id'):
    for _id in train[group].unique():
        f = train[train[group]==_id].groupby(ID)[trn_d_cols].sum(min_count=1).T
        f = f.stack(dropna=False).reset_index().rename(columns={0:f'TARGET_{_id}', 'level_0':'d'})
        data = pd.merge(data, f, on=['d', ID])
    return data

def cnt_by_group(ID, data, price_data, trn_d_cols, group='state_id'):
    for _id in price_data[group].unique():
        f = price_data[price_data[group]==_id].replace(0, np.nan).groupby(ID)[trn_d_cols].count().T
        f = f.stack(dropna=False).reset_index().rename(columns={0:f'cnt_{_id}', 'level_0':'d'})
        data = pd.merge(data, f, on=['d', ID])
    return data


def fe(ID,data, log=False):
    target_cols = [col for col in data.columns if 'TARGET' in col]
    for target_col in target_cols:
        if log:
            data[target_col] = np.log1p(data[target_col])
        for win in [7,28]:
            agg = {'mean'}
            data_2 = data.groupby(ID)[target_col].apply(
                lambda x: x.rolling(win, min_periods=1).agg(agg)
            )
            for col in data_2.columns:
                data[f'roll{win}_{col}_{target_col}'] = data_2[col]
        #data[f'roll28_mean_{target_col}_lag28'] = data.groupby(ID)[f'roll28_mean_{target_col}'].shift(28)
        data[f'roll28_mean_{target_col}_lag56'] = data.groupby(ID)[f'roll28_mean_{target_col}'].shift(56)
        data[f'roll28_mean_{target_col}_lag84'] = data.groupby(ID)[f'roll28_mean_{target_col}'].shift(84)
        
        for i in range(0,10,2):
                data[f'shift{7*(i+1)}_{target_col}'] = data.groupby(ID)[target_col].shift(7*i)
                
        data[f'shift_diff_{target_col}']=0
        for i in range(4):
                data[f'shift_diff_{target_col}'] += data.groupby(ID)[target_col].diff(7*i)/4

    data_2 = data.groupby(ID)['TARGET'].apply(
                lambda x: x.rolling(4, min_periods=1).mean().rolling(21, min_periods=1).agg({'std'})
            )
    for col in data_2.columns:
                data[f'smth_roll28_{col}_TARGET'] = data_2[col]
    
    del_f = [col for col in target_cols if col!='TARGET']
    data.drop(columns=del_f, inplace=True)
    
    return data


def select_near_event(x, event_name):
    z = ''
    for y in x:
        if y in event_name:
            z+=y+'_'
    if len(z)==0:
        return np.nan
    else:
        return z
    
    
calendar['qaurter'] = pd.to_datetime(calendar['date']).dt.day.apply(lambda x: x//7)

event_name = ['SuperBowl', 'ValentinesDay', 'PresidentsDay', 'LentStart', 'LentWeek2', 'StPatricksDay', 'Purim End', 
              'OrthodoxEaster', 'Pesach End', 'Cinco De Mayo', "Mother's day", 'MemorialDay', 'NBAFinalsStart', 'NBAFinalsEnd',
              "Father's day", 'IndependenceDay', 'Ramadan starts', 'Eid al-Fitr', 'LaborDay', 'ColumbusDay', 'Halloween', 
              'EidAlAdha', 'VeteransDay', 'Thanksgiving', 'Christmas', 'Chanukah End', 'NewYear', 'OrthodoxChristmas', 
              'MartinLutherKingDay', 'Easter']
event_type = ['Sporting', 'Cultural', 'National', 'Religious']
event_names = {'event_name_1':event_name, 'event_type_1':event_type}
for event, event_name in event_names.items():
    calendar[f'new_{event}']=''
    for i in range(-1,-8,-1):
        calendar[f'new_{event}'] += calendar[event].shift(i).astype(str)+'|'
    calendar[f'new_{event}'] = calendar[f'new_{event}'].apply(lambda x: x.split('|'))
    calendar[f'new_{event}'] = calendar[f'new_{event}'].apply(lambda x: select_near_event(x, event_name))
    
    
def make_data_dept_cat_id(ID, trn_d_cols, train, price_data, calendar, log=True):
    data = train.groupby(ID)[trn_d_cols].sum(min_count=1).T
    data = data.stack(dropna=False).reset_index().rename(columns={0:'TARGET', 'level_0':'d'})
    f = price_data.replace(0,np.nan).groupby(ID)[
        trn_d_cols
    ].count().stack(dropna=False).reset_index().rename(columns={0:'cnt', 'level_1':'d', 'level_0':'d'})
    data = pd.merge(data, f, on=['d', ID])
    data = sale_by_group(ID, data, train, trn_d_cols)

    data = cnt_by_group(ID, data, price_data, trn_d_cols)
    data = fe(ID, data, log=log)

    for key, value in calendar.set_index('d')[['qaurter','wday','month','year','new_event_name_1', 'new_event_type_1','event_name_1','event_type_1','snap_CA','snap_WI','snap_TX']].items():
        data[key] = data.d.map(value)
        if data[key].dtypes==object:
            data[key] = pd.factorize(data[key])[0]

        if key in ['event_name_1','event_type_1','snap_CA','snap_WI','snap_TX']:
            for i in [ -1,1]:
                data[f'{key}_{i}'] = data.groupby(ID)[key].shift(i)

    data[f'f_{ID}'] = pd.factorize(data[ID])[0]
    data = data[data.d.isin(trn_d_cols[28:])]
    return data

def make_data_all_id(ID, trn_d_cols, train, price_data, calendar):
    data = train.groupby(ID)[trn_d_cols].sum(min_count=1).T
    data = data.stack(dropna=False).reset_index().rename(columns={0:'TARGET', 'level_0':'d'})
    f = price_data.replace(0,np.nan).groupby(ID)[
        trn_d_cols
    ].count().stack(dropna=False).reset_index().rename(columns={0:'cnt', 'level_1':'d', 'level_0':'d'})
    data = pd.merge(data, f, on=['d', ID])
    data = sale_by_group(ID, data, train, trn_d_cols, group='state_id')

    data = cnt_by_group(ID, data, price_data, trn_d_cols)
    data = fe(ID, data, log=False)

    for key, value in calendar.set_index('d')[['qaurter','wday','month','year','new_event_name_1', 'new_event_type_1','event_name_1','event_type_1','snap_CA','snap_WI','snap_TX']].items():
        data[key] = data.d.map(value)
        if data[key].dtypes==object:
            data[key] = pd.factorize(data[key])[0]

        if key in ['event_name_1','event_type_1','snap_CA','snap_WI','snap_TX']:
            for i in [ -1,1]:
                data[f'{key}_{i}'] = data.groupby(ID)[key].shift(i)

    data[f'f_{ID}'] = pd.factorize(data[ID])[0]
    data = data[data.d.isin(trn_d_cols[28:])]
    return data
def make_data_state_id_store_id(ID, trn_d_cols, train, price_data, calendar, log=True):
    
    data = train.groupby(ID)[trn_d_cols].sum(min_count=1).T
    data = data.stack(dropna=False).reset_index().rename(columns={0:'TARGET', 'level_0':'d'})
    f = price_data.replace(0,np.nan).groupby(ID)[
        trn_d_cols
    ].count().stack(dropna=False).reset_index().rename(columns={0:'cnt', 'level_1':'d', 'level_0':'d'})
    data = pd.merge(data, f, on=['d', ID])
    
    group='cat_id'
    for _id in train[group].unique():
        f = train[train[group]==_id].groupby(ID)[trn_d_cols].sum(min_count=1).T
        f = f.stack(dropna=False).reset_index().rename(columns={0:f'TARGET_{_id}', 'level_0':'d'})
        data = pd.merge(data, f, on=['d', ID])
    
    for _id in price_data[group].unique():
        f = price_data[price_data[group]==_id].replace(0, np.nan).groupby(ID)[trn_d_cols].count().T
        f = f.stack(dropna=False).reset_index().rename(columns={0:f'cnt_{_id}', 'level_0':'d'})
        data = pd.merge(data, f, on=['d', ID])
    
    
    data = fe(ID, data, log=log)

    for key, value in calendar.set_index('d')[['qaurter','wday','month','year','new_event_name_1', 'new_event_type_1','event_name_1','event_type_1']].items():
        data[key] = data.d.map(value)
        if data[key].dtypes==object:
            data[key] = pd.factorize(data[key])[0]

        if key in ['event_name_1','event_type_1']:
            for i in [ -1,1]:
                data[f'{key}_{i}'] = data.groupby(ID)[key].shift(i)
                
    data['snap'] = 0
    for key, value in calendar.set_index('d')[['snap_CA','snap_WI','snap_TX']].items():
        state = key.replace('snap_', '')
        data.loc[data[ID].str.contains(state),'snap'] = data.loc[data[ID].str.contains(state),'d'].map(value)
    for i in [ -1,1]:
        data[f'snap_{i}'] = data.groupby(ID)['snap'].shift(i)
    
    data[f'f_{ID}'] = pd.factorize(data[ID])[0]
    data = data[data.d.isin(trn_d_cols[28:])]
    return data

def make_data_2_id(ID, trn_d_cols, train, price_data, calendar, log=True):
    
    data = train.groupby(ID)[trn_d_cols].sum(min_count=1).T
    data = data.stack(dropna=False).reset_index().rename(columns={0:'TARGET', 'level_0':'d'})
    f = price_data.replace(0,np.nan).groupby(ID)[
        trn_d_cols
    ].count().stack(dropna=False).reset_index().rename(columns={0:'cnt', 'level_1':'d', 'level_0':'d'})
    data = pd.merge(data, f, on=['d', ID])
    
    data = fe(ID, data, log=log)

    for key, value in calendar.set_index('d')[['qaurter','wday','month','year','new_event_name_1', 'new_event_type_1','event_name_1','event_type_1']].items():
        data[key] = data.d.map(value)
        if data[key].dtypes==object:
            data[key] = pd.factorize(data[key])[0]

        if key in ['event_name_1','event_type_1']:
            for i in [ -1,1]:
                data[f'{key}_{i}'] = data.groupby(ID)[key].shift(i)
                
    data['snap'] = 0
    for key, value in calendar.set_index('d')[['snap_CA','snap_WI','snap_TX']].items():
        state = key.replace('snap_', '')
        data.loc[data[ID].str.contains(state),'snap'] = data.loc[data[ID].str.contains(state),'d'].map(value)
    for i in [ -1,1]:
        data[f'snap_{i}'] = data.groupby(ID)['snap'].shift(i)
    
    data[f'f_{ID}'] = pd.factorize(data[ID])[0]
    data = data[data.d.isin(trn_d_cols[28:])]
    return data

100%|██████████| 30490/30490 [00:11<00:00, 2770.17it/s]


# lgb model utils

In [3]:
class Optimize_Optuna():
    def __init__(self, data, ID):
        days = data.d.unique().tolist()
        days = sort_d_cols(days)
        self.trn_days = days[:-28]
        val_days = days[-28:]
        
        self.X = data[data.d.isin(self.trn_days)]
        self.ID = ID
        
        self.shift_cols = [col for col in data.columns if 'shift' in col]
        self.roll_cols = [col for col in data.columns if 'roll' in col]
        cat_cols = [col for col in data.columns if (not 'shift' in col) and (not 'roll' in col)]
        self.cat_cols = [col for col in cat_cols if not col in [ID, 'd', 'TARGET']]
        self.features=self.cat_cols+self.shift_cols+self.roll_cols

    def objective(self, trial):
        PARAMS = {
            'boosting_type': 'gbdt',
            'objective': 'rmse',
            #'tweedie_variance_power': trial.suggest_uniform('tweedie_variance_power', 1.1, 1.9),
            'metric': 'rmse',
            'subsample': trial.suggest_uniform('subsample', 0.4, 0.9),
            'subsample_freq': trial.suggest_uniform('subsample_freq', 0.4, 0.9),
            'learning_rate': trial.suggest_loguniform('learning_rate', 0.03, 0.5),
            'num_leaves': trial.suggest_int('num_leaves', 4, 2**8),
            'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
            'max_bin': 100,
            'n_estimators': 1400,
            'boost_from_average': False,
            'verbose': -1,

            'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
            'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
            'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),

            'random_state':2020
        }
        score = 0

        data = self.X.copy()
        for i in range(28):
            
            if i%7==0:
                data[self.shift_cols] = data.groupby(self.ID)[self.shift_cols].shift(7)
            data[self.roll_cols] = data.groupby(self.ID)[self.roll_cols].shift(1)
            
            split = data[data.d.isin(self.trn_days[-500:])]['TARGET']
            split = split.mean()-3.*split.std()
            X = data[data.TARGET>split]
            X.reset_index(drop=True, inplace=True)
            k = StratifiedKFold(n_splits=8, random_state=2020, shuffle=True)
            
            y = (200*minmax_scale(X['TARGET'])).astype(int)
            t_score=0
            for trn, val in k.split(X,y=y, groups=y):
                train_set = lgb.Dataset(X.loc[trn,self.features], X.loc[trn,'TARGET'])
                val_set = lgb.Dataset(X.loc[val,self.features], X.loc[val,'TARGET'])

                model = lgb.train(train_set=train_set, valid_sets=[train_set, val_set], params=PARAMS, num_boost_round=3000, 
                                  early_stopping_rounds=100, verbose_eval=10000)
                
                preds = model.predict(X.loc[val,self.features])
                
                #preds = np.e**(preds)-1
                #true_y = np.e**(X.loc[val,'TARGET'])-1
                
                true_y = X.loc[val,'TARGET']
                t_score+=np.sqrt(mean_squared_error(preds, true_y))/8
            score+=t_score/28
        return score

In [4]:
def all_flow(study, n_trials):
    d_cols = [f'd_{i+1}' for i in range(1969)]
    d_cols = d_cols[:-28]
    trn_d_cols = d_cols[-630:]
    #ID='dept_idXstore_id'
    ID='all_id'

    train['dept_idXstore_id'] = train['dept_id'].astype(str)+'X'+train['store_id'].astype(str)
    price_data['dept_idXstore_id'] = train['dept_id'].astype(str)+'X'+train['store_id'].astype(str)
    train['all_id'] = 'all_id'
    price_data['all_id'] = 'all_id'
    
    data = make_data_all_id(ID, trn_d_cols, train, price_data, calendar)
    
    #data = make_data_dept_cat_id(ID=ID, trn_d_cols=trn_d_cols, train=train,
     #                                price_data=price_data, calendar=calendar, log=True)

    optimize_optuna = Optimize_Optuna(data, ID)

    study.optimize(optimize_optuna.objective, n_trials=n_trials)
    return study

In [5]:
%%time
#with open(f'../input/fork-of-fork-of-optuna-of-make-data-create-m5-all/study_24times.pickle', 'rb') as f:
 #   study = pickle.load(f)

study = optuna.create_study(direction='minimize')
n_trials=120
study = all_flow(study, n_trials)

Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[45]	training's rmse: 8256.45	valid_1's rmse: 9237.47
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[160]	training's rmse: 6396.75	valid_1's rmse: 7443.26
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[261]	training's rmse: 5519.99	valid_1's rmse: 9069.06
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[852]	training's rmse: 3273.09	valid_1's rmse: 8234.49
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[107]	training's rmse: 6941.95	valid_1's rmse: 7358.96
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[90]	training's rmse: 7039.6	valid_1's rmse: 9854.25
Training until validation scores don't improve for 100 rounds
Early stopping, best iteratio

[I 2020-06-20 17:42:55,255] Finished trial#0 with value: 8610.295793518226 with parameters: {'subsample': 0.7205823350381939, 'subsample_freq': 0.7775324877370502, 'learning_rate': 0.1053618409823162, 'num_leaves': 161, 'feature_fraction': 0.8120820774440378, 'bagging_freq': 3, 'min_child_samples': 77, 'lambda_l1': 6.221405742074153e-08, 'lambda_l2': 6.380707811495632e-07}. Best is trial#0 with value: 8610.295793518226.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[14]	training's rmse: 7291.09	valid_1's rmse: 9361.33
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[54]	training's rmse: 4906.92	valid_1's rmse: 7461.89
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[63]	training's rmse: 4487.65	valid_1's rmse: 8769.8
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[239]	training's rmse: 1513.32	valid_1's rmse: 8531.63
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[19]	training's rmse: 7206.16	valid_1's rmse: 7555.93
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[21]	training's rmse: 6624.13	valid_1's rmse: 9343.12
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration i

[I 2020-06-20 17:43:30,430] Finished trial#1 with value: 8397.646476408609 with parameters: {'subsample': 0.753309532376164, 'subsample_freq': 0.6138565514296205, 'learning_rate': 0.2799120714112573, 'num_leaves': 128, 'feature_fraction': 0.6959794274505047, 'bagging_freq': 7, 'min_child_samples': 46, 'lambda_l1': 1.158289628018569e-06, 'lambda_l2': 1.0199631150072201}. Best is trial#1 with value: 8397.646476408609.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[8]	training's rmse: 6516.39	valid_1's rmse: 8755.18
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[7]	training's rmse: 7784.46	valid_1's rmse: 8662.28
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[45]	training's rmse: 3577.44	valid_1's rmse: 9984.53
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[23]	training's rmse: 4627.22	valid_1's rmse: 9678.51
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[10]	training's rmse: 6986.39	valid_1's rmse: 9011.91
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[37]	training's rmse: 3869.83	valid_1's rmse: 9566.87
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:

[I 2020-06-20 17:43:58,224] Finished trial#2 with value: 9434.10351274371 with parameters: {'subsample': 0.43950233175729564, 'subsample_freq': 0.6485902566108803, 'learning_rate': 0.46861960718498324, 'num_leaves': 47, 'feature_fraction': 0.5553921470693628, 'bagging_freq': 1, 'min_child_samples': 15, 'lambda_l1': 0.058930924945932335, 'lambda_l2': 0.10215089510164246}. Best is trial#1 with value: 8397.646476408609.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[14]	training's rmse: 7475.21	valid_1's rmse: 9130.94
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1]	training's rmse: 119301	valid_1's rmse: 119538
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[264]	training's rmse: 719.379	valid_1's rmse: 8894.3
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[4]	training's rmse: 118879	valid_1's rmse: 122425
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1]	training's rmse: 119197	valid_1's rmse: 120272
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[195]	training's rmse: 1192.61	valid_1's rmse: 9105.05
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[48]	

[I 2020-06-20 17:44:21,951] Finished trial#3 with value: 71552.97483059306 with parameters: {'subsample': 0.5981706260919764, 'subsample_freq': 0.8160518160275867, 'learning_rate': 0.31001179037863336, 'num_leaves': 95, 'feature_fraction': 0.5181218189377766, 'bagging_freq': 6, 'min_child_samples': 24, 'lambda_l1': 1.9374590777191007e-06, 'lambda_l2': 4.302285636262053}. Best is trial#1 with value: 8397.646476408609.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[114]	training's rmse: 5827.21	valid_1's rmse: 9389.61
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[51]	training's rmse: 7385.25	valid_1's rmse: 7373.44
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[239]	training's rmse: 4352.46	valid_1's rmse: 8642.18
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[357]	training's rmse: 3681.11	valid_1's rmse: 8719.35
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[32]	training's rmse: 7815.72	valid_1's rmse: 7317.17
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[136]	training's rmse: 5491.97	valid_1's rmse: 9191.69
Training until validation scores don't improve for 100 rounds
Early stopping, best iterati

[I 2020-06-20 17:44:50,684] Finished trial#4 with value: 8502.939616370295 with parameters: {'subsample': 0.7763867638255053, 'subsample_freq': 0.6302596400765328, 'learning_rate': 0.18495814454521536, 'num_leaves': 32, 'feature_fraction': 0.5568587206241078, 'bagging_freq': 2, 'min_child_samples': 77, 'lambda_l1': 2.481721709161123e-07, 'lambda_l2': 0.004197788155262032}. Best is trial#1 with value: 8397.646476408609.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[12]	training's rmse: 6083.83	valid_1's rmse: 9137.98
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[16]	training's rmse: 5347.72	valid_1's rmse: 6763.09
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[33]	training's rmse: 3791.94	valid_1's rmse: 7947.99
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[88]	training's rmse: 1591.79	valid_1's rmse: 9722.88
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[10]	training's rmse: 6782.95	valid_1's rmse: 7539.94
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[10]	training's rmse: 6692.68	valid_1's rmse: 9772.71
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration i

[I 2020-06-20 17:45:24,342] Finished trial#5 with value: 8701.398252389985 with parameters: {'subsample': 0.726171934748312, 'subsample_freq': 0.8917998706732696, 'learning_rate': 0.4910338731292202, 'num_leaves': 176, 'feature_fraction': 0.6619555051121888, 'bagging_freq': 4, 'min_child_samples': 32, 'lambda_l1': 4.402340694394218e-06, 'lambda_l2': 1.2827558176728963e-06}. Best is trial#1 with value: 8397.646476408609.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[56]	training's rmse: 5871.03	valid_1's rmse: 9085.69
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[155]	training's rmse: 3953.65	valid_1's rmse: 7007.95
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[64]	training's rmse: 5834.42	valid_1's rmse: 8308.82
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[95]	training's rmse: 5231.93	valid_1's rmse: 8444.42
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[43]	training's rmse: 6525.24	valid_1's rmse: 7230.56
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[216]	training's rmse: 3077.68	valid_1's rmse: 8722.46
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration

[I 2020-06-20 17:54:25,841] Finished trial#13 with value: 8074.0045315208845 with parameters: {'subsample': 0.8794501928238844, 'subsample_freq': 0.5161496576288074, 'learning_rate': 0.03358191804368562, 'num_leaves': 255, 'feature_fraction': 0.9992751850725369, 'bagging_freq': 6, 'min_child_samples': 56, 'lambda_l1': 0.00019817384173690842, 'lambda_l2': 2.2025938056498965e-08}. Best is trial#12 with value: 7982.843439525784.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[151]	training's rmse: 6915.24	valid_1's rmse: 8928.27
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[252]	training's rmse: 6322.4	valid_1's rmse: 7159.3
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[306]	training's rmse: 5799.21	valid_1's rmse: 8472.9
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[881]	training's rmse: 3984.96	valid_1's rmse: 8570.11
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[84]	training's rmse: 7925.79	valid_1's rmse: 7242.09
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[219]	training's rmse: 6324.84	valid_1's rmse: 9232.55
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration

[I 2020-06-20 17:55:16,834] Finished trial#14 with value: 8493.939671946004 with parameters: {'subsample': 0.8939647490500229, 'subsample_freq': 0.5373844696672408, 'learning_rate': 0.06762758973359069, 'num_leaves': 224, 'feature_fraction': 0.9881508819831698, 'bagging_freq': 7, 'min_child_samples': 94, 'lambda_l1': 5.14853346603018e-05, 'lambda_l2': 1.1739469544165382e-08}. Best is trial#12 with value: 7982.843439525784.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[124]	training's rmse: 6587.24	valid_1's rmse: 9006.58
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[334]	training's rmse: 4911.55	valid_1's rmse: 6678.59
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[678]	training's rmse: 3387.55	valid_1's rmse: 8455.32
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1400]	training's rmse: 1902.49	valid_1's rmse: 7820.7
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[113]	training's rmse: 7019.92	valid_1's rmse: 7160.54
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[365]	training's rmse: 4595.41	valid_1's rmse: 8816.21
Training until validation scores don't improve for 100 rounds
Early stoppin

[I 2020-06-20 17:56:22,805] Finished trial#15 with value: 8307.810681377827 with parameters: {'subsample': 0.8442152759545846, 'subsample_freq': 0.5409752462798192, 'learning_rate': 0.05155770785781222, 'num_leaves': 254, 'feature_fraction': 0.9868068875352215, 'bagging_freq': 6, 'min_child_samples': 64, 'lambda_l1': 5.709024660170504e-05, 'lambda_l2': 2.2275451809549844e-08}. Best is trial#12 with value: 7982.843439525784.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[44]	training's rmse: 6217.5	valid_1's rmse: 9264.33
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[79]	training's rmse: 5223.02	valid_1's rmse: 7085.26
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[144]	training's rmse: 3635.23	valid_1's rmse: 8368.59
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[310]	training's rmse: 2001.9	valid_1's rmse: 8352.77
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[100]	training's rmse: 4545.3	valid_1's rmse: 7105.96
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[77]	training's rmse: 5177.43	valid_1's rmse: 8222.35
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration i

[I 2020-06-20 17:57:19,387] Finished trial#16 with value: 8078.037084594404 with parameters: {'subsample': 0.8170604480612884, 'subsample_freq': 0.4056390313850926, 'learning_rate': 0.0990923484224844, 'num_leaves': 217, 'feature_fraction': 0.8107671778793456, 'bagging_freq': 5, 'min_child_samples': 40, 'lambda_l1': 0.011208166936119575, 'lambda_l2': 1.1829456840514728e-07}. Best is trial#12 with value: 7982.843439525784.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[150]	training's rmse: 6313.99	valid_1's rmse: 9201.37
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[251]	training's rmse: 5762.99	valid_1's rmse: 7100.33
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[408]	training's rmse: 4626.43	valid_1's rmse: 8281.2
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[708]	training's rmse: 3561.58	valid_1's rmse: 8209.09
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[250]	training's rmse: 5639.49	valid_1's rmse: 7111.39
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[341]	training's rmse: 4910.54	valid_1's rmse: 8547.13
Training until validation scores don't improve for 100 rounds
Early stopping, best iterat

[I 2020-06-20 17:58:27,034] Finished trial#17 with value: 8140.735268151435 with parameters: {'subsample': 0.8988370919919635, 'subsample_freq': 0.7183561986204798, 'learning_rate': 0.041613427466922336, 'num_leaves': 113, 'feature_fraction': 0.7781269862937757, 'bagging_freq': 7, 'min_child_samples': 64, 'lambda_l1': 6.695373951611163e-05, 'lambda_l2': 8.586935959280985e-05}. Best is trial#12 with value: 7982.843439525784.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[57]	training's rmse: 4558.02	valid_1's rmse: 8862.09
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[226]	training's rmse: 1245.86	valid_1's rmse: 6613.97
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[295]	training's rmse: 855.447	valid_1's rmse: 7964.76
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[321]	training's rmse: 909.31	valid_1's rmse: 8179.48
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[113]	training's rmse: 2818.17	valid_1's rmse: 7093.37
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[444]	training's rmse: 883.869	valid_1's rmse: 8039.61
Training until validation scores don't improve for 100 rounds
Early stopping, best iterati

[I 2020-06-20 18:00:30,162] Finished trial#18 with value: 7990.12360222598 with parameters: {'subsample': 0.6615876035276251, 'subsample_freq': 0.5322784207817218, 'learning_rate': 0.07655788302080727, 'num_leaves': 256, 'feature_fraction': 0.9995429994820406, 'bagging_freq': 6, 'min_child_samples': 16, 'lambda_l1': 0.003960204325284381, 'lambda_l2': 3.8108817119324393e-06}. Best is trial#12 with value: 7982.843439525784.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[56]	training's rmse: 3464.68	valid_1's rmse: 9587.64
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[65]	training's rmse: 3404.16	valid_1's rmse: 7270.82
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[310]	training's rmse: 381.502	valid_1's rmse: 8142.6
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[543]	training's rmse: 305.705	valid_1's rmse: 7950.51
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[67]	training's rmse: 2936.11	valid_1's rmse: 7798.56
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[405]	training's rmse: 698.487	valid_1's rmse: 8378
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is

[I 2020-06-20 18:03:23,959] Finished trial#19 with value: 8080.28833960663 with parameters: {'subsample': 0.6452824990968548, 'subsample_freq': 0.5759045911305818, 'learning_rate': 0.07979622597421514, 'num_leaves': 80, 'feature_fraction': 0.9113988482544026, 'bagging_freq': 5, 'min_child_samples': 10, 'lambda_l1': 0.009343158226099768, 'lambda_l2': 4.141375772789519e-06}. Best is trial#12 with value: 7982.843439525784.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[62]	training's rmse: 4410.9	valid_1's rmse: 9202.27
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[77]	training's rmse: 4025.67	valid_1's rmse: 6585.01
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[108]	training's rmse: 3104.96	valid_1's rmse: 8210.91
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[410]	training's rmse: 682.684	valid_1's rmse: 8262.01
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[72]	training's rmse: 4039.09	valid_1's rmse: 7426.91
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[249]	training's rmse: 1516.07	valid_1's rmse: 7872.5
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration 

[I 2020-06-20 18:05:19,743] Finished trial#20 with value: 7921.938521698207 with parameters: {'subsample': 0.6553115583972013, 'subsample_freq': 0.45214940516892566, 'learning_rate': 0.07877993445849085, 'num_leaves': 202, 'feature_fraction': 0.9979402405232145, 'bagging_freq': 3, 'min_child_samples': 18, 'lambda_l1': 0.008800471184787197, 'lambda_l2': 0.0005716256646760471}. Best is trial#20 with value: 7921.938521698207.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[59]	training's rmse: 5117.98	valid_1's rmse: 9075.14
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[118]	training's rmse: 3439.87	valid_1's rmse: 6572.72
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[102]	training's rmse: 3781.32	valid_1's rmse: 7946.27
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[701]	training's rmse: 358.187	valid_1's rmse: 7816.95
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[88]	training's rmse: 4042.84	valid_1's rmse: 7159.69
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[300]	training's rmse: 1632.6	valid_1's rmse: 8082.62
Training until validation scores don't improve for 100 rounds
Early stopping, best iteratio

[I 2020-06-20 18:06:51,621] Finished trial#21 with value: 7925.872375889527 with parameters: {'subsample': 0.6710275048927423, 'subsample_freq': 0.45259245935623166, 'learning_rate': 0.07565162107629089, 'num_leaves': 198, 'feature_fraction': 0.9923168005063592, 'bagging_freq': 3, 'min_child_samples': 22, 'lambda_l1': 0.004538774894933904, 'lambda_l2': 0.0003856015917118042}. Best is trial#20 with value: 7921.938521698207.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[101]	training's rmse: 5250.97	valid_1's rmse: 9001.86
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[117]	training's rmse: 5141.62	valid_1's rmse: 7032.94
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[122]	training's rmse: 5006.8	valid_1's rmse: 8198.03
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[414]	training's rmse: 2184.7	valid_1's rmse: 8065.07
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[122]	training's rmse: 5029.63	valid_1's rmse: 7110.47
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1400]	training's rmse: 460.303	valid_1's rmse: 7708.02
Training until validation scores don't improve for 100 rounds
Early stopping

[I 2020-06-20 18:08:23,480] Finished trial#22 with value: 8011.974466427924 with parameters: {'subsample': 0.6628288927376448, 'subsample_freq': 0.44185883907477536, 'learning_rate': 0.049396063380916476, 'num_leaves': 201, 'feature_fraction': 0.9497687361019715, 'bagging_freq': 3, 'min_child_samples': 27, 'lambda_l1': 0.09680284159019208, 'lambda_l2': 0.0004923916851513085}. Best is trial#20 with value: 7921.938521698207.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[57]	training's rmse: 4753.57	valid_1's rmse: 9356.64
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[79]	training's rmse: 4003.39	valid_1's rmse: 6751.47
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[133]	training's rmse: 2793.6	valid_1's rmse: 8040
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[365]	training's rmse: 833.061	valid_1's rmse: 8383.82
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[99]	training's rmse: 3404.46	valid_1's rmse: 7541.61
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[49]	training's rmse: 5286.04	valid_1's rmse: 8416.33
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:

[I 2020-06-20 18:19:59,322] Finished trial#27 with value: 7924.923332018814 with parameters: {'subsample': 0.6912632166246685, 'subsample_freq': 0.7035241011616958, 'learning_rate': 0.05925506804843565, 'num_leaves': 204, 'feature_fraction': 0.7637477911146193, 'bagging_freq': 1, 'min_child_samples': 28, 'lambda_l1': 1.0398670147517131e-05, 'lambda_l2': 0.0014867979646002339}. Best is trial#26 with value: 7917.899964323089.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[118]	training's rmse: 5154.06	valid_1's rmse: 8914.39
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[155]	training's rmse: 4792.26	valid_1's rmse: 7184.28
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[351]	training's rmse: 2733.76	valid_1's rmse: 7935.23
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[594]	training's rmse: 1673.22	valid_1's rmse: 7677.61
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[127]	training's rmse: 5209.55	valid_1's rmse: 6913.41
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1400]	training's rmse: 533.56	valid_1's rmse: 7809.85
Training until validation scores don't improve for 100 rounds
Early stoppin

[I 2020-06-20 18:27:41,660] Finished trial#30 with value: 8001.648518611948 with parameters: {'subsample': 0.6071764881152282, 'subsample_freq': 0.7537100047396093, 'learning_rate': 0.09822632124649375, 'num_leaves': 157, 'feature_fraction': 0.7324697304834354, 'bagging_freq': 2, 'min_child_samples': 11, 'lambda_l1': 1.8569968689926265e-08, 'lambda_l2': 7.441397638433546e-05}. Best is trial#29 with value: 7844.753799435294.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[79]	training's rmse: 3805.43	valid_1's rmse: 9263.97
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[96]	training's rmse: 3455.71	valid_1's rmse: 6991.18
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1400]	training's rmse: 64.7146	valid_1's rmse: 8003.63
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[483]	training's rmse: 513.93	valid_1's rmse: 7889.87
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[96]	training's rmse: 3403.4	valid_1's rmse: 6971.38
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1025]	training's rmse: 317.532	valid_1's rmse: 7596.28
Training until validation scores don't improve for 100 rounds
Early stopping, 

[I 2020-06-20 18:35:56,924] Finished trial#32 with value: 7929.654049215197 with parameters: {'subsample': 0.7069955703396397, 'subsample_freq': 0.685032236323675, 'learning_rate': 0.06101999500726414, 'num_leaves': 139, 'feature_fraction': 0.6747188351908918, 'bagging_freq': 1, 'min_child_samples': 9, 'lambda_l1': 1.5985359108725975e-08, 'lambda_l2': 0.01914346740645249}. Best is trial#31 with value: 7828.675521416073.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[111]	training's rmse: 4080.83	valid_1's rmse: 9132.17
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[138]	training's rmse: 3616.34	valid_1's rmse: 7075.92
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1400]	training's rmse: 126.479	valid_1's rmse: 7741.15
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1049]	training's rmse: 229.149	valid_1's rmse: 7871.12
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[148]	training's rmse: 3341.43	valid_1's rmse: 7188.06
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[135]	training's rmse: 3643.01	valid_1's rmse: 8201.53
Training until validation scores don't improve for 100 rounds
Early stopp

[I 2020-06-20 18:52:50,914] Finished trial#35 with value: 8020.725278330342 with parameters: {'subsample': 0.7561864414421362, 'subsample_freq': 0.7738697412850355, 'learning_rate': 0.038298817198982434, 'num_leaves': 124, 'feature_fraction': 0.6141249290723014, 'bagging_freq': 1, 'min_child_samples': 5, 'lambda_l1': 5.731476131882632e-08, 'lambda_l2': 0.10015691170741281}. Best is trial#31 with value: 7828.675521416073.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1]	training's rmse: 119348	valid_1's rmse: 119215
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1]	training's rmse: 119301	valid_1's rmse: 119538
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[2]	training's rmse: 119200	valid_1's rmse: 120236
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1]	training's rmse: 118879	valid_1's rmse: 122425
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1]	training's rmse: 119197	valid_1's rmse: 120272
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[13]	training's rmse: 119435	valid_1's rmse: 118599
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1]	training's

[I 2020-06-20 18:53:04,074] Finished trial#36 with value: 119311.10861107758 with parameters: {'subsample': 0.7709049559015898, 'subsample_freq': 0.6630900743090127, 'learning_rate': 0.04979308730732951, 'num_leaves': 163, 'feature_fraction': 0.49119786002857824, 'bagging_freq': 2, 'min_child_samples': 14, 'lambda_l1': 7.905824800002957e-07, 'lambda_l2': 8.713939323735282}. Best is trial#31 with value: 7828.675521416073.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[147]	training's rmse: 4762.87	valid_1's rmse: 8832.63
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[171]	training's rmse: 4524.72	valid_1's rmse: 7455.4
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[338]	training's rmse: 2494.44	valid_1's rmse: 8067.38
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1106]	training's rmse: 440.961	valid_1's rmse: 7823.71
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[190]	training's rmse: 4137.94	valid_1's rmse: 7019.82
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[253]	training's rmse: 3287.68	valid_1's rmse: 8173.14
Training until validation scores don't improve for 100 rounds
Early stopping, best itera

[I 2020-06-20 18:55:54,268] Finished trial#37 with value: 7792.227381771547 with parameters: {'subsample': 0.8019589722792279, 'subsample_freq': 0.7448460968273639, 'learning_rate': 0.0336430904869489, 'num_leaves': 171, 'feature_fraction': 0.6346327332425397, 'bagging_freq': 1, 'min_child_samples': 22, 'lambda_l1': 4.291103291116985e-08, 'lambda_l2': 0.4575674467599291}. Best is trial#37 with value: 7792.227381771547.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[142]	training's rmse: 4711.56	valid_1's rmse: 8692.48
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[173]	training's rmse: 4316.71	valid_1's rmse: 7261.7
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[254]	training's rmse: 3171.62	valid_1's rmse: 8316.73
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[294]	training's rmse: 2849.66	valid_1's rmse: 8262.19
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[160]	training's rmse: 4514.26	valid_1's rmse: 6777.27
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[252]	training's rmse: 3227.72	valid_1's rmse: 8197.78
Training until validation scores don't improve for 100 rounds
Early stopping, best iterat

[I 2020-06-20 18:58:14,666] Finished trial#38 with value: 7824.813996750644 with parameters: {'subsample': 0.804883549301928, 'subsample_freq': 0.7470764233286812, 'learning_rate': 0.03726603373967164, 'num_leaves': 145, 'feature_fraction': 0.7156500865566298, 'bagging_freq': 2, 'min_child_samples': 23, 'lambda_l1': 3.483178298808278e-07, 'lambda_l2': 0.7386280920505031}. Best is trial#37 with value: 7792.227381771547.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[144]	training's rmse: 6742.37	valid_1's rmse: 8860.65
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[546]	training's rmse: 3732.98	valid_1's rmse: 7125.67
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[446]	training's rmse: 4176.8	valid_1's rmse: 8547.97
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1400]	training's rmse: 1603.79	valid_1's rmse: 7667.95
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[175]	training's rmse: 6519.44	valid_1's rmse: 6970.96
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[299]	training's rmse: 5059.54	valid_1's rmse: 8528.29
Training until validation scores don't improve for 100 rounds
Early stoppin

[I 2020-06-20 19:52:13,648] Finished trial#59 with value: 7855.698956607295 with parameters: {'subsample': 0.8113110390578081, 'subsample_freq': 0.726265259253253, 'learning_rate': 0.04117742230800165, 'num_leaves': 191, 'feature_fraction': 0.6859762193971962, 'bagging_freq': 4, 'min_child_samples': 18, 'lambda_l1': 1.8146335446236048e-06, 'lambda_l2': 1.3448701963167353}. Best is trial#56 with value: 7754.49915661229.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[100]	training's rmse: 2531.89	valid_1's rmse: 8863.75
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[111]	training's rmse: 2207.18	valid_1's rmse: 7449.65
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[469]	training's rmse: 203.706	valid_1's rmse: 7800.1
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[306]	training's rmse: 384.354	valid_1's rmse: 8029.44
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[115]	training's rmse: 2014.15	valid_1's rmse: 7596.59
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[264]	training's rmse: 503.796	valid_1's rmse: 8190.25
Training until validation scores don't improve for 100 rounds
Early stopping, best iterat

[I 2020-06-20 19:58:24,093] Finished trial#60 with value: 7931.849554613704 with parameters: {'subsample': 0.8535750923200838, 'subsample_freq': 0.8017990611760816, 'learning_rate': 0.04618250122578634, 'num_leaves': 151, 'feature_fraction': 0.5935508218928804, 'bagging_freq': 3, 'min_child_samples': 8, 'lambda_l1': 8.798655591026934e-08, 'lambda_l2': 0.1557391389102486}. Best is trial#56 with value: 7754.49915661229.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[160]	training's rmse: 3378.28	valid_1's rmse: 8939.31
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[176]	training's rmse: 3347.2	valid_1's rmse: 7086.32
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1400]	training's rmse: 135.655	valid_1's rmse: 7825.71
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1000]	training's rmse: 305.446	valid_1's rmse: 7907.29
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[160]	training's rmse: 3545.94	valid_1's rmse: 6981.05
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[280]	training's rmse: 2187.55	valid_1's rmse: 7725.07
Training until validation scores don't improve for 100 rounds
Early stoppi

[I 2020-06-20 20:02:12,443] Finished trial#61 with value: 7747.608468368883 with parameters: {'subsample': 0.8724201995695599, 'subsample_freq': 0.8245230152083978, 'learning_rate': 0.033734749233626946, 'num_leaves': 150, 'feature_fraction': 0.7113126115441982, 'bagging_freq': 2, 'min_child_samples': 19, 'lambda_l1': 3.29892699552751e-08, 'lambda_l2': 0.007986517170172382}. Best is trial#61 with value: 7747.608468368883.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[16]	training's rmse: 4465.74	valid_1's rmse: 9500.54
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[13]	training's rmse: 5308.19	valid_1's rmse: 7282.34
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[34]	training's rmse: 2846.46	valid_1's rmse: 7883.47
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[62]	training's rmse: 1444.3	valid_1's rmse: 8055.14
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[13]	training's rmse: 5330.18	valid_1's rmse: 7884.79
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[157]	training's rmse: 262.103	valid_1's rmse: 9440.26
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration i

[I 2020-06-20 20:02:58,297] Finished trial#62 with value: 8304.897488435949 with parameters: {'subsample': 0.8984483672735051, 'subsample_freq': 0.8260679137849783, 'learning_rate': 0.37708195294150954, 'num_leaves': 166, 'feature_fraction': 0.7188177026740904, 'bagging_freq': 2, 'min_child_samples': 30, 'lambda_l1': 2.2092038931164452e-07, 'lambda_l2': 0.04175300104442812}. Best is trial#61 with value: 7747.608468368883.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[160]	training's rmse: 3881.72	valid_1's rmse: 8830.28
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[222]	training's rmse: 3099.75	valid_1's rmse: 7131.44
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[260]	training's rmse: 2648.89	valid_1's rmse: 8015.04
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1224]	training's rmse: 275.467	valid_1's rmse: 7964.69
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[165]	training's rmse: 3951.6	valid_1's rmse: 7095.61
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[393]	training's rmse: 1710.99	valid_1's rmse: 8191.59
Training until validation scores don't improve for 100 rounds
Early stopping, best itera

[I 2020-06-20 20:06:39,538] Finished trial#63 with value: 7790.419629586135 with parameters: {'subsample': 0.8644415642444514, 'subsample_freq': 0.7915674425059394, 'learning_rate': 0.030016421268433065, 'num_leaves': 152, 'feature_fraction': 0.6899557948084947, 'bagging_freq': 3, 'min_child_samples': 19, 'lambda_l1': 2.8715443085663365e-08, 'lambda_l2': 0.0874128168203327}. Best is trial#61 with value: 7747.608468368883.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[89]	training's rmse: 4055.45	valid_1's rmse: 8999.94
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[90]	training's rmse: 4280.64	valid_1's rmse: 7118.57
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[169]	training's rmse: 2420.63	valid_1's rmse: 8098.03
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[532]	training's rmse: 498.504	valid_1's rmse: 7537.47
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[129]	training's rmse: 3133.28	valid_1's rmse: 7084.93
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[313]	training's rmse: 1172.19	valid_1's rmse: 8041.81
Training until validation scores don't improve for 100 rounds
Early stopping, best iterati

[I 2020-06-20 21:07:45,617] Finished trial#80 with value: 7903.572989951041 with parameters: {'subsample': 0.852345299127247, 'subsample_freq': 0.8227838677279203, 'learning_rate': 0.03796666644464156, 'num_leaves': 167, 'feature_fraction': 0.6259464230739396, 'bagging_freq': 4, 'min_child_samples': 11, 'lambda_l1': 6.132816616326964e-08, 'lambda_l2': 0.005342865764276236}. Best is trial#61 with value: 7747.608468368883.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[162]	training's rmse: 5067.53	valid_1's rmse: 8794.92
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[240]	training's rmse: 3605.16	valid_1's rmse: 7858.39
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[580]	training's rmse: 1166.35	valid_1's rmse: 8281.38
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[1400]	training's rmse: 218.237	valid_1's rmse: 7722.42
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[192]	training's rmse: 4503.85	valid_1's rmse: 6576.91
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[404]	training's rmse: 1896.14	valid_1's rmse: 8147.43
Training until validation scores don't improve for 100 rounds
Early stoppi

[I 2020-06-20 21:28:14,289] Finished trial#84 with value: 8143.9851503391255 with parameters: {'subsample': 0.8995422381430098, 'subsample_freq': 0.8128905241968885, 'learning_rate': 0.033886234955565045, 'num_leaves': 122, 'feature_fraction': 0.6417729069306088, 'bagging_freq': 4, 'min_child_samples': 6, 'lambda_l1': 1.6385791991221796e-07, 'lambda_l2': 0.000853738433900505}. Best is trial#61 with value: 7747.608468368883.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[104]	training's rmse: 4183.36	valid_1's rmse: 8710
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[135]	training's rmse: 3673.4	valid_1's rmse: 6923.22
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[177]	training's rmse: 2863.99	valid_1's rmse: 7965.39
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[434]	training's rmse: 1030.25	valid_1's rmse: 7823.67
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[120]	training's rmse: 3817.64	valid_1's rmse: 6735.1
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[537]	training's rmse: 711.082	valid_1's rmse: 7538.69
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration 

[I 2020-06-20 21:30:47,861] Finished trial#85 with value: 7779.8160246806765 with parameters: {'subsample': 0.8237003743566558, 'subsample_freq': 0.7117845079084757, 'learning_rate': 0.04691556081719107, 'num_leaves': 187, 'feature_fraction': 0.7399595016510659, 'bagging_freq': 4, 'min_child_samples': 21, 'lambda_l1': 5.894502995454709e-08, 'lambda_l2': 0.028576123453676012}. Best is trial#61 with value: 7747.608468368883.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[110]	training's rmse: 4589.62	valid_1's rmse: 8885.12
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[138]	training's rmse: 4212.66	valid_1's rmse: 7085.6
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[339]	training's rmse: 1906.71	valid_1's rmse: 7685.53
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[529]	training's rmse: 1114.96	valid_1's rmse: 7726.25
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[139]	training's rmse: 4027.66	valid_1's rmse: 7013.33
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[260]	training's rmse: 2671.6	valid_1's rmse: 8029.74
Training until validation scores don't improve for 100 rounds
Early stopping, best iterati

[I 2020-06-20 21:32:50,802] Finished trial#86 with value: 7810.201569669486 with parameters: {'subsample': 0.8216731572816709, 'subsample_freq': 0.6984850863534191, 'learning_rate': 0.04726789194360615, 'num_leaves': 15, 'feature_fraction': 0.7426581925176399, 'bagging_freq': 5, 'min_child_samples': 26, 'lambda_l1': 2.791154595059684e-05, 'lambda_l2': 0.0147519836888878}. Best is trial#61 with value: 7747.608468368883.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[116]	training's rmse: 4658.04	valid_1's rmse: 8897.88
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[235]	training's rmse: 2914.95	valid_1's rmse: 6722.61
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[263]	training's rmse: 2527.33	valid_1's rmse: 7738.38
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[748]	training's rmse: 657.785	valid_1's rmse: 7697.75
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[164]	training's rmse: 3678.97	valid_1's rmse: 6927.83
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[265]	training's rmse: 2638.95	valid_1's rmse: 7696.37
Training until validation scores don't improve for 100 rounds
Early stopping, best itera

[I 2020-06-20 21:35:17,700] Finished trial#87 with value: 7767.017921509412 with parameters: {'subsample': 0.8373452086919387, 'subsample_freq': 0.6492890018614154, 'learning_rate': 0.03773615570876872, 'num_leaves': 209, 'feature_fraction': 0.6644474047140736, 'bagging_freq': 5, 'min_child_samples': 22, 'lambda_l1': 5.372163275072926e-08, 'lambda_l2': 0.00020210342086870284}. Best is trial#61 with value: 7747.608468368883.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[175]	training's rmse: 4331.56	valid_1's rmse: 8911.05
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[225]	training's rmse: 3943.17	valid_1's rmse: 7109.59
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[261]	training's rmse: 3408.29	valid_1's rmse: 7882.88
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[1235]	training's rmse: 530.998	valid_1's rmse: 7474.59
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[151]	training's rmse: 4861.24	valid_1's rmse: 6870.89
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[295]	training's rmse: 3240.83	valid_1's rmse: 8205.87
Training until validation scores don't improve for 100 rounds
Early stopping, best iter

[I 2020-06-20 22:46:43,403] Finished trial#117 with value: 7820.478436214834 with parameters: {'subsample': 0.8982417530733399, 'subsample_freq': 0.6196396196401561, 'learning_rate': 0.04436259403932327, 'num_leaves': 221, 'feature_fraction': 0.8548337511196978, 'bagging_freq': 6, 'min_child_samples': 17, 'lambda_l1': 5.766384713913302e-06, 'lambda_l2': 0.0010994900671454676}. Best is trial#116 with value: 7712.522877229348.


Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[102]	training's rmse: 4320.43	valid_1's rmse: 9165.34
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[100]	training's rmse: 4638.13	valid_1's rmse: 6932.46
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[277]	training's rmse: 1959.71	valid_1's rmse: 7815.25
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[507]	training's rmse: 906.941	valid_1's rmse: 7898.03
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[116]	training's rmse: 4069.17	valid_1's rmse: 7155.36
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[88]	training's rmse: 5100.05	valid_1's rmse: 8104.86
Training until validation scores don't improve for 100 rounds
Early stopping, best iterat

In [6]:
print(f"""
Number of finished trials :     {len(study.trials)}
Best value                :  {study.best_value :.3f}
""")

print('Best trial:', study.best_trial.params)


Number of finished trials :     120
Best value                :  7712.523

Best trial: {'subsample': 0.8980424161870934, 'subsample_freq': 0.6061652675361959, 'learning_rate': 0.04371615591331765, 'num_leaves': 213, 'feature_fraction': 0.7997065596025078, 'bagging_freq': 5, 'min_child_samples': 24, 'lambda_l1': 2.693536203464105e-06, 'lambda_l2': 0.0021954225706444447}


In [7]:
a = len(study.trials)
with open(f'study_{a}times.pickle', 'wb') as f:
    pickle.dump(study, f)