In [14]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt 
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
import catboost as cb
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, ShuffleSplit,StratifiedKFold,TimeSeriesSplit,KFold,GroupKFold,train_test_split,GroupShuffleSplit,StratifiedShuffleSplit
from sklearn.metrics import roc_auc_score,mean_squared_error,mean_absolute_error,log_loss,confusion_matrix
import sqlite3
import xgboost as xgb
import datetime
from sklearn.linear_model import LogisticRegression
from scipy.stats import pearsonr
import gc
from sklearn.model_selection import TimeSeriesSplit
#from bayes_opt import BayesianOptimization
import re
from string import punctuation
from scipy.spatial import Voronoi
from scipy.spatial import ConvexHull
from scipy.spatial import Delaunay
from tqdm.notebook import tqdm
from numba import jit
from collections import Counter
import json
import joblib
import multiprocessing
import time
from scipy.sparse import csr_matrix
import gc

In [15]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int32', 'int64', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [16]:
sample_submission = pd.read_csv('../data/sample_submission.csv')
sell_prices = pd.read_csv('../data/sell_prices.csv')
sales_train = pd.read_csv('../data/sales_train_validation.csv')
calendar = pd.read_csv('../data/calendar.csv')

In [17]:
sales_train_long_format = pd.melt(sales_train,id_vars=['id','item_id','dept_id','cat_id','store_id','state_id'],var_name = 'day_num',value_name='sale')

In [18]:
def transform_day_to_num(str1):
    return int(str1[2:])
sales_train_long_format['day_num'] = sales_train_long_format['day_num'].map(transform_day_to_num)
calendar['date'] = pd.to_datetime(calendar['date'])
calendar['day_num'] = calendar['d'].map(transform_day_to_num)
map_day_date = calendar[['date','day_num']].set_index('day_num')['date']
sales_train_long_format['date'] = sales_train_long_format['day_num'].map(map_day_date)
list1 = ['wm_yr_wk','event_name_1', 'event_type_1', 'event_name_2', 'event_type_2','snap_CA', 'snap_TX', 'snap_WI','day_num']
sales_train_long_format = sales_train_long_format.merge(calendar[list1],on='day_num',how='left')

In [19]:
sales_train_long_format = sales_train_long_format.merge(sell_prices,how='left',on = ['store_id','item_id','wm_yr_wk'])
#sales_train_long_format = reduce_mem_usage(sales_train_long_format)

### metric
#### fast metrirc

In [20]:
data_for_weight = sales_train_long_format[sales_train_long_format.day_num.between(1886,1913)].copy()
data_for_weight['sale_used'] = data_for_weight['sale'] * data_for_weight['sell_price']
sales = pd.read_csv('../data/sales_train_validation.csv')

In [21]:
dummies_list = [sales.state_id, sales.store_id, 
                sales.cat_id, sales.dept_id, 
                sales.state_id + sales.cat_id, sales.state_id + sales.dept_id,
                sales.store_id + sales.cat_id, sales.store_id + sales.dept_id, 
                sales.item_id, sales.state_id + sales.item_id, sales.id]


## First element Level_0 aggregation 'all_sales':
dummies_df_list =[pd.DataFrame(np.ones(sales.shape[0]).astype(np.int8), 
                               index=sales.index, columns=['all']).T]

# List of dummy dataframes:
for i, cats in enumerate(dummies_list):
    dummies_df_list +=[pd.get_dummies(cats, drop_first=False).astype(np.int8).T]
    
# Concat dummy dataframes in one go:
## Level is constructed for free.
roll_mat_df = pd.concat(dummies_df_list, keys=list(range(12)), 
                        names=['level','id']).astype(np.int8, copy=False)

# Save values as sparse matrix & save index for future reference:
roll_index = roll_mat_df.index
roll_mat_csr = csr_matrix(roll_mat_df.values)
roll_mat_csr.shape

(42840, 30490)

In [22]:
# Fucntion to calculate S weights:
def get_s(drop_days=0):
    
    """
    drop_days: int, equals 0 by default, so S is calculated on all data.
               If equals 28, last 28 days want be used in calculating S.
    """
    
    # Rollup sales:
    d_name = ['d_' + str(i+1) for i in range(1913-drop_days)]
    sales_train_val = roll_mat_csr * sales[d_name].values

    # Find sales start index:
    start_no = np.argmax(sales_train_val>0, axis=1)
    
    # Replace days less than min day number with np.nan:
    # Next code line is super slow:
    flag = np.dot(np.diag(1/(start_no+1)) , np.tile(np.arange(1,1914-drop_days),(roll_mat_csr.shape[0],1)))<1
    sales_train_val = np.where(flag, np.nan, sales_train_val)

    # Denominator of RMSSE / RMSSE
    weight1 = np.nansum(np.diff(sales_train_val,axis=1)**2,axis=1)/(1913-start_no - 1)
    
    return weight1

In [23]:
S = get_s(drop_days=0)
S.shape

(42840,)

In [26]:
def get_w(sale_usd):
    """
    """
    # Calculate the total sales in USD for each item id:
    total_sales_usd = sale_usd.groupby(['id'],sort=False)['sale_used'].apply(np.sum).values
    
    # Roll up total sales by ids to higher levels:
    weight2 = roll_mat_csr * total_sales_usd
    
    return weight2/np.sum(weight2)

In [27]:
W = get_w(data_for_weight[['id','sale_used']])
W.shape

(42840,)

In [29]:
SW = W/np.sqrt(S)

In [30]:
# Function to do quick rollups:
def rollup(v):
    '''
    v - np.array of size (30490 rows, n day columns)
    v_rolledup - array of size (n, 42840)
    '''
    return (v.T*roll_mat_csr.T).T


# Function to calculate WRMSSE:
def wrmsse(preds, y_true, score_only=False, s = S, w = W, sw=SW):
    '''
    preds - Predictions: pd.DataFrame of size (30490 rows, N day columns)
    y_true - True values: pd.DataFrame of size (30490 rows, N day columns)
    sequence_length - np.array of size (42840,)
    sales_weight - sales weights based on last 28 days: np.array (42840,)
    '''
    
    if score_only:
        return np.sum(
                np.sqrt(
                    np.mean(
                        np.square(rollup(preds.values-y_true.values))
                            ,axis=1)) * sw)
    else: 
        score_matrix = (np.square(rollup(preds.values-y_true.values)) * np.square(w)[:, None]) / s[:, None]
        score = np.sum(np.sqrt(np.mean(score_matrix,axis=1)))
        return score, score_matrix

In [81]:
class WRMSSE_Evalator_super_version():
    def __init__(self,sw,roll_mat_csr):
        self.sw = sw
        self.roll_mat_csr = roll_mat_csr
    def rollup(self,v):
        '''
        v - np.array of size (30490 rows, n day columns) start with h.... not food
        v_rolledup - array of size (n, 42840)
        '''
        return (v.T*roll_mat_csr.T).T
    def feval(self,y_true,y_pred):
        """
        y_true,y_pred: np.ndarray
        """
        return 'WRMSSE',np.sum(
                np.sqrt(
                    np.mean(
                        np.square(self.rollup(y_pred.reshape(30490,-1,order = 'F')-y_true.reshape(30490,-1,order = 'F')))
                            ,axis=1)) * self.sw),False

In [82]:
evaluator_super = WRMSSE_Evalator_super_version(sw = SW,roll_mat_csr=roll_mat_csr)

#### my metric

In [44]:
class WRMSSE_Evaluator():
    def __init__(self,std_df:pd.DataFrame):
        """
        std_sd:sale_train 格式 
        """
        self.std_df = std_df.copy()
        self.std_df['all_id'] = 0
        self.id_cols = ['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id','all_id']
        
#         self.train_df = train_df.copy()
#         self.valid_df = valid_df.copy()
#         self.train_df['all_id'] = 0
#         self.valid_df['all_id'] = 0
#         self.train_df = self.train_df[self.id_cols]
#         self.valid_df = self.valid_df[self.id_cols]
        self.group_ids = [
            'all_id',
            'state_id',
            'store_id',
            'cat_id',
            'dept_id',
            ['state_id', 'cat_id'],
            ['state_id', 'dept_id'],
            ['store_id', 'cat_id'],
            ['store_id', 'dept_id'],
            'item_id',
            ['item_id', 'state_id'],
            ['item_id', 'store_id']
        ]

    def rmsse(self, pred: pd.DataFrame, lv: int):
        
        #start = time.time()
        score = (np.square(pred)).mean(axis=1)
        #end = time.time()
        scale_y = getattr(self, f'lv{lv}_scale')
        #print('RMSSE内部',end-start)
        return np.sqrt(score / scale_y)

    def feval(self, y_true,y_pred):

        #start = time.time()
        diff = ((np.array(y_true) - np.array(y_pred))).reshape((-1,30490)).T
        col_eval = [f'c_{_}' for _ in range(diff.shape[1])]
        valid_diff = pd.DataFrame(diff, columns=col_eval)
        valid_diff = pd.concat([self.std_df[self.id_cols], valid_diff], axis=1, sort=False)
        #end = time.time()
        #print(end-start)
        
        all_scores = 0
        self.score_list = []
        for i, group_id in enumerate(self.group_ids):
            #start = time.time()
            lv_scores = self.rmsse(valid_diff.groupby(group_id,sort=False)[col_eval].sum(), i + 1)
            #end = time.time()
            #print(f'gorup_{i}','rmsse',end-start)
            
            #start = time.time()
            weight = getattr(self, f'lv{i + 1}_weight')
            lv_scores = weight*lv_scores
            setattr(self,f'lv{i+1}_loss',lv_scores/12)
            all_scores+=lv_scores.sum()/12
            self.score_list.append(lv_scores.sum())
            #end = time.time()
            #print(f'gorup_{i}',end-start)
            
        return 'WRMSSE', all_scores, False

    def get_weight(self,weight_df1:pd.DataFrame):
        """
        输入df需要筛选过加入weight的天数 long形态
        """
        weight_df = weight_df1.copy()
        weight_df['all_id'] = 0
        weight_df['sale'] = weight_df['sale'].fillna(0)
        weight_df['sell_price'] = weight_df['sell_price'].fillna(0)
        weight_df['total_price'] = weight_df['sale'] * weight_df['sell_price']
        for i, group_id in enumerate(tqdm(self.group_ids)):
            lv_weight = weight_df.groupby(group_id,sort=False)['total_price'].sum()
            setattr(self, f'lv{i + 1}_weight', lv_weight / lv_weight.sum())
    def get_scale(self,scale_df1:pd.DataFrame):
        """
        输入sales_train
        """
        scale_df = scale_df1.copy()
        scale_df['all_id'] = 0
        #scale_df['sale'] = scale_df['sale'].fillna(0)
        col = scale_df.columns[scale_df.columns.str.startswith('d_')].tolist()
        for i, group_id in enumerate(tqdm(self.group_ids)):
            temp_scale = scale_df.groupby(group_id,sort=False)[col].sum()
            scale = []
            scale_index = []
            for _, row in temp_scale.iterrows():
                series = row.values[np.argmax(row.values != 0):]
                scale.append(((series[1:] - series[:-1]) ** 2).mean())
                scale_index.append(_)
                
            temp1 = pd.Series(np.array(scale),index=scale_index)
            setattr(self,f'lv{i + 1}_scale',temp1)

In [45]:
sales_train = pd.read_csv('../data/sales_train_validation.csv')
evaluator = WRMSSE_Evaluator(sales_train)
evaluator.get_weight(sales_train_long_format[(sales_train_long_format.day_num<=1913)&(sales_train_long_format.day_num>=1886)])
evaluator.get_scale(sales_train[[_ for _ in sales_train.columns if _ not in ['all_id']]
                              ])

HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))




### featuring adding

In [207]:
train1 = pd.read_pickle('data_part1.pkl')

KeyboardInterrupt: 

In [None]:
train1.shape

In [None]:
for _ in ['CA','TX','WI']:
    train1.loc[train1.state_id==_,'snap'] = list(train1.loc[train1.state_id==_,f'snap_{_}'])

In [49]:
drop_col = ['day_num','date','wm_yr_wk','snap_CA','snap_TX','snap_WI','sale']
cat_col = ['item_id','dept_id','cat_id','store_id','state_id','event_name_1', 'event_type_1', 'event_name_2', 'event_type_2','id']

In [50]:
for cat in cat_col:
    lbl = preprocessing.LabelEncoder()
    train1[cat] = lbl.fit_transform(train1[cat].astype(str))
    print(cat)

item_id
dept_id
cat_id
store_id
state_id
event_name_1
event_type_1
event_name_2
event_type_2


In [51]:
train1.fillna(-999,inplace=True)

In [168]:
X_train = train1[(train1.day_num<=1885)&(train1.day_num>=1885-1-364)].drop(drop_col,axis=1).copy()
y_train = train1.loc[(train1.day_num<=1885)&(train1.day_num>=1885-1-364),'sale']
X_valid = train1[(train1.day_num<=1913)&(train1.day_num>=1886)].drop(drop_col,axis=1).copy()
y_valid = train1.loc[(train1.day_num<=1913)&(train1.day_num>=1886),'sale']
y_train[y_train==-999] = 0
y_valid[y_valid==-999] = 0

In [147]:
X_train.shape

(11159340, 89)

In [198]:
def func_custom_1_2(scalar):
    if scalar ==0:
        return 0
    elif scalar <= 10**(-8):
        return 10**4
    else:
        return scalar **(-1/2)

In [199]:
func1 = np.vectorize(func_custom_1_2)

In [200]:
def custom_obj(y_true, y_pred):
    residual = (y_true - y_pred).astype(np.float128)
    residual = residual.reshape((-1,30490)) 
    d = residual.shape[0]    
    residual_42840 = residual * roll_mat_csr.T
    mean_square_42840 = np.square(residual_42840).sum(axis = 0)
    temp_1 = func1(mean_square_42840)
    part1 = ((1/np.sqrt(S)) * temp_1)/np.sqrt(d)
    part_all = np.multiply(part1,residual_42840) 
    grad = -(part_all * roll_mat_csr).reshape((-1))
    hess_part1 = np.multiply(-(1/np.sqrt(S))/np.sqrt(d)*temp_1**3,np.square(residual_42840))
    hess_part2 = (1/np.sqrt(S))/np.sqrt(d)*temp_1
    hess = ((hess_part1 + hess_part2)*roll_mat_csr).reshape((-1))
    return grad,hess

In [151]:
def rmse(y_true, y_pred):
    return 'RMSE', np.sqrt(np.mean(np.power((y_pred) - (y_true), 2))), False

In [171]:
def func_custom_1_2(scalar):
    if scalar <= 10**(-8):
        return 10**4
    else:
        return scalar **(-1/2)

In [179]:
### add clip 10**-12
lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.4077	valid_0's RMSE: 2.46099
[20]	valid_0's WRMSSE: 1.16522	valid_0's RMSE: 2.03513
[30]	valid_0's WRMSSE: 0.702458	valid_0's RMSE: 1.9301
[40]	valid_0's WRMSSE: 0.567804	valid_0's RMSE: 1.90517
[50]	valid_0's WRMSSE: 0.527994	valid_0's RMSE: 1.89673
[60]	valid_0's WRMSSE: 0.513228	valid_0's RMSE: 1.8927
[70]	valid_0's WRMSSE: 0.506294	valid_0's RMSE: 1.89032
[80]	valid_0's WRMSSE: 0.500432	valid_0's RMSE: 1.88839
[90]	valid_0's WRMSSE: 0.498318	valid_0's RMSE: 1.88653
[100]	valid_0's WRMSSE: 0.495208	valid_0's RMSE: 1.88513
[110]	valid_0's WRMSSE: 0.493699	valid_0's RMSE: 1.8838
[120]	valid_0's WRMSSE: 0.491817	valid_0's RMSE: 1.88245
[130]	valid_0's WRMSSE: 0.490924	valid_0's RMSE: 1.88117
[140]	valid_0's WRMSSE: 0.490298	valid_0's RMSE: 1.88071
[150]	valid_0's WRMSSE: 0.489538	valid_0's RMSE: 1.87967
[160]	valid_0's WRMSSE: 0.487933	valid_0's RMSE: 1.87934
[170]	valid_0's WRMSSE: 0.487691	valid_0's

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc70852b1e0>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [173]:
### add clip 10**-8 cancel 0
lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.4077	valid_0's RMSE: 2.46099
[20]	valid_0's WRMSSE: 1.16522	valid_0's RMSE: 2.03513
[30]	valid_0's WRMSSE: 0.702458	valid_0's RMSE: 1.9301
[40]	valid_0's WRMSSE: 0.567804	valid_0's RMSE: 1.90517
[50]	valid_0's WRMSSE: 0.527994	valid_0's RMSE: 1.89673
[60]	valid_0's WRMSSE: 0.513228	valid_0's RMSE: 1.8927
[70]	valid_0's WRMSSE: 0.506294	valid_0's RMSE: 1.89032
[80]	valid_0's WRMSSE: 0.500432	valid_0's RMSE: 1.88839
[90]	valid_0's WRMSSE: 0.498318	valid_0's RMSE: 1.88653
[100]	valid_0's WRMSSE: 0.495208	valid_0's RMSE: 1.88513
[110]	valid_0's WRMSSE: 0.493837	valid_0's RMSE: 1.88365
[120]	valid_0's WRMSSE: 0.492381	valid_0's RMSE: 1.88241
[130]	valid_0's WRMSSE: 0.490501	valid_0's RMSE: 1.88121
[140]	valid_0's WRMSSE: 0.489744	valid_0's RMSE: 1.88056
[150]	valid_0's WRMSSE: 0.48937	valid_0's RMSE: 1.87985
[160]	valid_0's WRMSSE: 0.488638	valid_0's RMSE: 1.87954
[170]	valid_0's WRMSSE: 0.486779	valid_0's

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c69ff620>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [178]:
lgb_re2 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1)

In [170]:
### add clip 10**-8
lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.4077	valid_0's RMSE: 2.46099
[20]	valid_0's WRMSSE: 1.16522	valid_0's RMSE: 2.03513
[30]	valid_0's WRMSSE: 0.702458	valid_0's RMSE: 1.9301
[40]	valid_0's WRMSSE: 0.567804	valid_0's RMSE: 1.90517
[50]	valid_0's WRMSSE: 0.527994	valid_0's RMSE: 1.89673
[60]	valid_0's WRMSSE: 0.513228	valid_0's RMSE: 1.8927
[70]	valid_0's WRMSSE: 0.506294	valid_0's RMSE: 1.89032
[80]	valid_0's WRMSSE: 0.500432	valid_0's RMSE: 1.88839
[90]	valid_0's WRMSSE: 0.498318	valid_0's RMSE: 1.88653
[100]	valid_0's WRMSSE: 0.495208	valid_0's RMSE: 1.88513
[110]	valid_0's WRMSSE: 0.493837	valid_0's RMSE: 1.88365
[120]	valid_0's WRMSSE: 0.492381	valid_0's RMSE: 1.88241
[130]	valid_0's WRMSSE: 0.490501	valid_0's RMSE: 1.88121
[140]	valid_0's WRMSSE: 0.489744	valid_0's RMSE: 1.88056
[150]	valid_0's WRMSSE: 0.48937	valid_0's RMSE: 1.87985
[160]	valid_0's WRMSSE: 0.488638	valid_0's RMSE: 1.87954
[170]	valid_0's WRMSSE: 0.486779	valid_0's

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c69ff620>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [153]:
### add clip
lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.4077	valid_0's RMSE: 2.46099
[20]	valid_0's WRMSSE: 1.16522	valid_0's RMSE: 2.03513
[30]	valid_0's WRMSSE: 0.702458	valid_0's RMSE: 1.9301
[40]	valid_0's WRMSSE: 0.567804	valid_0's RMSE: 1.90517
[50]	valid_0's WRMSSE: 0.527994	valid_0's RMSE: 1.89673
[60]	valid_0's WRMSSE: 0.513228	valid_0's RMSE: 1.8927
[70]	valid_0's WRMSSE: 0.506294	valid_0's RMSE: 1.89032
[80]	valid_0's WRMSSE: 0.500432	valid_0's RMSE: 1.88839
[90]	valid_0's WRMSSE: 0.498208	valid_0's RMSE: 1.88649
[100]	valid_0's WRMSSE: 0.494136	valid_0's RMSE: 1.88532
[110]	valid_0's WRMSSE: 0.492417	valid_0's RMSE: 1.88429
[120]	valid_0's WRMSSE: 0.490829	valid_0's RMSE: 1.88324
[130]	valid_0's WRMSSE: 0.489961	valid_0's RMSE: 1.8817
[140]	valid_0's WRMSSE: 0.489274	valid_0's RMSE: 1.88092
[150]	valid_0's WRMSSE: 0.488726	valid_0's RMSE: 1.87991
[160]	valid_0's WRMSSE: 0.48827	valid_0's RMSE: 1.87927
[170]	valid_0's WRMSSE: 0.48789	valid_0's R

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c69ffd90>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [84]:

lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.4077	valid_0's RMSE: 2.46099
[20]	valid_0's WRMSSE: 1.16522	valid_0's RMSE: 2.03513
[30]	valid_0's WRMSSE: 0.702458	valid_0's RMSE: 1.9301
[40]	valid_0's WRMSSE: 0.567804	valid_0's RMSE: 1.90517
[50]	valid_0's WRMSSE: 0.527994	valid_0's RMSE: 1.89673
[60]	valid_0's WRMSSE: 0.513228	valid_0's RMSE: 1.8927
[70]	valid_0's WRMSSE: 0.506294	valid_0's RMSE: 1.89032
[80]	valid_0's WRMSSE: 0.500432	valid_0's RMSE: 1.88839
[90]	valid_0's WRMSSE: 0.498318	valid_0's RMSE: 1.88653
[100]	valid_0's WRMSSE: 0.495208	valid_0's RMSE: 1.88513
[110]	valid_0's WRMSSE: 0.493699	valid_0's RMSE: 1.8838
[120]	valid_0's WRMSSE: 0.491817	valid_0's RMSE: 1.88245
[130]	valid_0's WRMSSE: 0.490924	valid_0's RMSE: 1.88117
[140]	valid_0's WRMSSE: 0.490298	valid_0's RMSE: 1.88071
[150]	valid_0's WRMSSE: 0.489538	valid_0's RMSE: 1.87967
[160]	valid_0's WRMSSE: 0.487867	valid_0's RMSE: 1.87935
[170]	valid_0's WRMSSE: 0.487625	valid_0's

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c91e1378>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [85]:
lgb_re3 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,min_data_in_leaf =  2**8-1)

In [86]:

lgb_re3.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 1.57567	valid_0's RMSE: 2.64149
[20]	valid_0's WRMSSE: 1.14346	valid_0's RMSE: 2.19562
[30]	valid_0's WRMSSE: 0.886479	valid_0's RMSE: 2.00782
[40]	valid_0's WRMSSE: 0.740383	valid_0's RMSE: 1.93324
[50]	valid_0's WRMSSE: 0.654043	valid_0's RMSE: 1.90254
[60]	valid_0's WRMSSE: 0.604324	valid_0's RMSE: 1.89012
[70]	valid_0's WRMSSE: 0.574098	valid_0's RMSE: 1.8841
[80]	valid_0's WRMSSE: 0.552182	valid_0's RMSE: 1.8814
[90]	valid_0's WRMSSE: 0.538379	valid_0's RMSE: 1.87962
[100]	valid_0's WRMSSE: 0.530081	valid_0's RMSE: 1.87841
[110]	valid_0's WRMSSE: 0.523992	valid_0's RMSE: 1.87758
[120]	valid_0's WRMSSE: 0.519015	valid_0's RMSE: 1.87692
[130]	valid_0's WRMSSE: 0.517028	valid_0's RMSE: 1.87637
[140]	valid_0's WRMSSE: 0.514616	valid_0's RMSE: 1.87572
[150]	valid_0's WRMSSE: 0.513511	valid_0's RMSE: 1.87565
[160]	valid_0's WRMSSE: 0.511077	valid_0's RMSE: 1.87527
[170]	valid_0's WRMSSE: 0.510724	valid_0

KeyboardInterrupt: 

In [88]:
lgb_re4 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,min_data_in_leaf =  2**8-1,objective='tweedie')

In [89]:

lgb_re4.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 1.9203	valid_0's RMSE: 3.32246
[20]	valid_0's WRMSSE: 1.63035	valid_0's RMSE: 3.02362
[30]	valid_0's WRMSSE: 1.32648	valid_0's RMSE: 2.69143
[40]	valid_0's WRMSSE: 1.06025	valid_0's RMSE: 2.38488
[50]	valid_0's WRMSSE: 0.857795	valid_0's RMSE: 2.16012
[60]	valid_0's WRMSSE: 0.722147	valid_0's RMSE: 2.02327
[70]	valid_0's WRMSSE: 0.638949	valid_0's RMSE: 1.9542
[80]	valid_0's WRMSSE: 0.586294	valid_0's RMSE: 1.92093
[90]	valid_0's WRMSSE: 0.55793	valid_0's RMSE: 1.90588
[100]	valid_0's WRMSSE: 0.537583	valid_0's RMSE: 1.89684
[110]	valid_0's WRMSSE: 0.526311	valid_0's RMSE: 1.89228
[120]	valid_0's WRMSSE: 0.514664	valid_0's RMSE: 1.8893
[130]	valid_0's WRMSSE: 0.509033	valid_0's RMSE: 1.88668
[140]	valid_0's WRMSSE: 0.503479	valid_0's RMSE: 1.88426
[150]	valid_0's WRMSSE: 0.501958	valid_0's RMSE: 1.88298
[160]	valid_0's WRMSSE: 0.499965	valid_0's RMSE: 1.88175
[170]	valid_0's WRMSSE: 0.499191	valid_0's R

KeyboardInterrupt: 

In [154]:
X_train = train1[(train1.day_num<=1885)&(train1.day_num>=1885-1-364)&(train1.date!='2015-12-25')].drop(drop_col,axis=1).copy()
y_train = train1.loc[(train1.day_num<=1885)&(train1.day_num>=1885-1-364)&(train1.date!='2015-12-25'),'sale']
X_valid = train1[(train1.day_num<=1913)&(train1.day_num>=1886)].drop(drop_col,axis=1).copy()
y_valid = train1.loc[(train1.day_num<=1913)&(train1.day_num>=1886),'sale']
y_train[y_train==-999] = 0
y_valid[y_valid==-999] = 0

In [155]:
X_train.shape

(11128850, 89)

In [156]:
lgb_re2 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1)

In [157]:
### add clip

lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.39835	valid_0's RMSE: 2.45346
[20]	valid_0's WRMSSE: 1.15425	valid_0's RMSE: 2.03038
[30]	valid_0's WRMSSE: 0.695587	valid_0's RMSE: 1.92839
[40]	valid_0's WRMSSE: 0.564787	valid_0's RMSE: 1.90534
[50]	valid_0's WRMSSE: 0.527349	valid_0's RMSE: 1.89771
[60]	valid_0's WRMSSE: 0.513985	valid_0's RMSE: 1.89377
[70]	valid_0's WRMSSE: 0.506717	valid_0's RMSE: 1.89133
[80]	valid_0's WRMSSE: 0.502145	valid_0's RMSE: 1.88957
[90]	valid_0's WRMSSE: 0.499353	valid_0's RMSE: 1.88782
[100]	valid_0's WRMSSE: 0.496294	valid_0's RMSE: 1.88607
[110]	valid_0's WRMSSE: 0.494545	valid_0's RMSE: 1.88404
[120]	valid_0's WRMSSE: 0.492418	valid_0's RMSE: 1.8827
[130]	valid_0's WRMSSE: 0.491035	valid_0's RMSE: 1.88198
[140]	valid_0's WRMSSE: 0.490311	valid_0's RMSE: 1.88081
[150]	valid_0's WRMSSE: 0.489509	valid_0's RMSE: 1.87978
[160]	valid_0's WRMSSE: 0.488654	valid_0's RMSE: 1.87911
[170]	valid_0's WRMSSE: 0.487559	valid_

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c69ffd90>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [97]:

lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.39835	valid_0's RMSE: 2.45346
[20]	valid_0's WRMSSE: 1.15425	valid_0's RMSE: 2.03038
[30]	valid_0's WRMSSE: 0.695587	valid_0's RMSE: 1.92839
[40]	valid_0's WRMSSE: 0.564787	valid_0's RMSE: 1.90534
[50]	valid_0's WRMSSE: 0.527349	valid_0's RMSE: 1.89771
[60]	valid_0's WRMSSE: 0.513985	valid_0's RMSE: 1.89377
[70]	valid_0's WRMSSE: 0.506717	valid_0's RMSE: 1.89133
[80]	valid_0's WRMSSE: 0.502195	valid_0's RMSE: 1.88954
[90]	valid_0's WRMSSE: 0.499773	valid_0's RMSE: 1.88761
[100]	valid_0's WRMSSE: 0.496592	valid_0's RMSE: 1.88565
[110]	valid_0's WRMSSE: 0.495132	valid_0's RMSE: 1.88353
[120]	valid_0's WRMSSE: 0.493028	valid_0's RMSE: 1.88238
[130]	valid_0's WRMSSE: 0.490376	valid_0's RMSE: 1.88164
[140]	valid_0's WRMSSE: 0.489492	valid_0's RMSE: 1.88085
[150]	valid_0's WRMSSE: 0.488385	valid_0's RMSE: 1.87974
[160]	valid_0's WRMSSE: 0.487822	valid_0's RMSE: 1.87888
[170]	valid_0's WRMSSE: 0.487414	valid

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c91e1378>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [98]:
pd.DataFrame(lgb_re2.feature_importances_,index=X_train.columns).sort_values(0,ascending=False)

Unnamed: 0,0
rolling_sale_28_mean,86402.379553
rolling_sale_14_mean,54087.704597
rolling_sale_7_mean,18689.628203
rolling_sale_91_mean,7214.236200
rolling_sale_28_std,6267.006862
rolling_sale_dayofweek_52_mean,3390.096602
dayofweek,2741.749269
rolling_sale_dayofweek_52_quantile75,2732.591454
rolling_sale_14_quantile75,2644.489157
rolling_sale_364_mean,2464.654596


In [135]:
lgb_re2 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1,max_bin = 63)

In [136]:

lgb_re2.fit(X_train,y_train,eval_set = [(X_train,y_train),(X_valid,y_valid)],verbose=1,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50,categorical_feature=cat_col)

New categorical_feature is ['cat_id', 'dept_id', 'event_name_1', 'event_name_2', 'event_type_1', 'event_type_2', 'item_id', 'state_id', 'store_id']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['cat_id', 'dept_id', 'event_name_1', 'event_name_2', 'event_type_1', 'event_type_2', 'item_id', 'state_id', 'store_id']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


[1]	training's WRMSSE: 4.50754	training's RMSE: 3.62865	valid_1's WRMSSE: 4.88574	valid_1's RMSE: 3.63883
Training until validation scores don't improve for 50 rounds
[2]	training's WRMSSE: 4.16561	training's RMSE: 3.45067	valid_1's WRMSSE: 4.5155	valid_1's RMSE: 3.44998
[3]	training's WRMSSE: 3.84857	training's RMSE: 3.28597	valid_1's WRMSSE: 4.17084	valid_1's RMSE: 3.275
[4]	training's WRMSSE: 3.55624	training's RMSE: 3.13779	valid_1's WRMSSE: 3.85207	valid_1's RMSE: 3.11557
[5]	training's WRMSSE: 3.28566	training's RMSE: 3.0042	valid_1's WRMSSE: 3.5581	valid_1's RMSE: 2.97403
[6]	training's WRMSSE: 3.03814	training's RMSE: 2.88619	valid_1's WRMSSE: 3.28833	valid_1's RMSE: 2.8474
[7]	training's WRMSSE: 2.80873	training's RMSE: 2.7801	valid_1's WRMSSE: 3.03856	valid_1's RMSE: 2.73323
[8]	training's WRMSSE: 2.59804	training's RMSE: 2.68539	valid_1's WRMSSE: 2.80856	valid_1's RMSE: 2.63228
[9]	training's WRMSSE: 2.40444	training's RMSE: 2.60038	valid_1's WRMSSE: 2.59652	valid_1's RMSE: 

[77]	training's WRMSSE: 0.539349	training's RMSE: 1.96008	valid_1's WRMSSE: 0.50818	valid_1's RMSE: 1.89234
[78]	training's WRMSSE: 0.538592	training's RMSE: 1.9596	valid_1's WRMSSE: 0.507098	valid_1's RMSE: 1.89189
[79]	training's WRMSSE: 0.538169	training's RMSE: 1.95909	valid_1's WRMSSE: 0.506524	valid_1's RMSE: 1.89149
[80]	training's WRMSSE: 0.537757	training's RMSE: 1.95872	valid_1's WRMSSE: 0.506449	valid_1's RMSE: 1.89142
[81]	training's WRMSSE: 0.537266	training's RMSE: 1.95843	valid_1's WRMSSE: 0.506334	valid_1's RMSE: 1.89133
[82]	training's WRMSSE: 0.536949	training's RMSE: 1.95803	valid_1's WRMSSE: 0.505909	valid_1's RMSE: 1.89126
[83]	training's WRMSSE: 0.536417	training's RMSE: 1.95768	valid_1's WRMSSE: 0.505616	valid_1's RMSE: 1.89108
[84]	training's WRMSSE: 0.535602	training's RMSE: 1.95739	valid_1's WRMSSE: 0.50526	valid_1's RMSE: 1.89095
[85]	training's WRMSSE: 0.535162	training's RMSE: 1.95713	valid_1's WRMSSE: 0.505285	valid_1's RMSE: 1.89083
[86]	training's WRMSSE

[152]	training's WRMSSE: 0.506095	training's RMSE: 1.93545	valid_1's WRMSSE: 0.490663	valid_1's RMSE: 1.88124
[153]	training's WRMSSE: 0.505975	training's RMSE: 1.93516	valid_1's WRMSSE: 0.490715	valid_1's RMSE: 1.88117
[154]	training's WRMSSE: 0.505908	training's RMSE: 1.93499	valid_1's WRMSSE: 0.490749	valid_1's RMSE: 1.8811
[155]	training's WRMSSE: 0.505786	training's RMSE: 1.93485	valid_1's WRMSSE: 0.490733	valid_1's RMSE: 1.88104
[156]	training's WRMSSE: 0.504843	training's RMSE: 1.93462	valid_1's WRMSSE: 0.490348	valid_1's RMSE: 1.88093
[157]	training's WRMSSE: 0.504734	training's RMSE: 1.93435	valid_1's WRMSSE: 0.490296	valid_1's RMSE: 1.88084
[158]	training's WRMSSE: 0.504618	training's RMSE: 1.93405	valid_1's WRMSSE: 0.49022	valid_1's RMSE: 1.88076
[159]	training's WRMSSE: 0.504308	training's RMSE: 1.93382	valid_1's WRMSSE: 0.490108	valid_1's RMSE: 1.8807
[160]	training's WRMSSE: 0.50421	training's RMSE: 1.93363	valid_1's WRMSSE: 0.490131	valid_1's RMSE: 1.88056
[161]	training

  


[216]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[217]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[218]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[219]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[220]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[221]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[222]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[223]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[224]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[225]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[226]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1'

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_bin=63,
       max_depth=-1, metric='None', min_child_samples=20,
       min_child_weight=0.001, min_data_in_leaf=255, min_split_gain=0.0,
       n_estimators=1000, n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c69ffe18>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [144]:
lgb_re2 = lgb.LGBMRegressor(n_estimators=5000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.01 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1,max_bin = 63)

In [140]:

lgb_re2.fit(X_train,y_train,eval_set = [(X_train,y_train),(X_valid,y_valid)],verbose=1,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50,categorical_feature=cat_col)

New categorical_feature is ['cat_id', 'dept_id', 'event_name_1', 'event_name_2', 'event_type_1', 'event_type_2', 'item_id', 'state_id', 'store_id']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['cat_id', 'dept_id', 'event_name_1', 'event_name_2', 'event_type_1', 'event_type_2', 'item_id', 'state_id', 'store_id']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


[1]	training's WRMSSE: 4.80529	training's RMSE: 3.7891	valid_1's WRMSSE: 5.20822	valid_1's RMSE: 3.80853
Training until validation scores don't improve for 50 rounds
[2]	training's WRMSSE: 4.73251	training's RMSE: 3.74985	valid_1's WRMSSE: 5.12926	valid_1's RMSE: 3.76659
[3]	training's WRMSSE: 4.66045	training's RMSE: 3.7108	valid_1's WRMSSE: 5.05099	valid_1's RMSE: 3.72499
[4]	training's WRMSSE: 4.5896	training's RMSE: 3.67249	valid_1's WRMSSE: 4.97407	valid_1's RMSE: 3.68474
[5]	training's WRMSSE: 4.5196	training's RMSE: 3.63486	valid_1's WRMSSE: 4.89821	valid_1's RMSE: 3.64501
[6]	training's WRMSSE: 4.45112	training's RMSE: 3.59846	valid_1's WRMSSE: 4.82398	valid_1's RMSE: 3.60659
[7]	training's WRMSSE: 4.38331	training's RMSE: 3.56233	valid_1's WRMSSE: 4.75048	valid_1's RMSE: 3.56821
[8]	training's WRMSSE: 4.31668	training's RMSE: 3.52697	valid_1's WRMSSE: 4.67819	valid_1's RMSE: 3.53072
[9]	training's WRMSSE: 4.25096	training's RMSE: 3.49207	valid_1's WRMSSE: 4.60695	valid_1's RMS

[78]	training's WRMSSE: 1.53036	training's RMSE: 2.254	valid_1's WRMSSE: 1.63104	valid_1's RMSE: 2.17275
[79]	training's WRMSSE: 1.50984	training's RMSE: 2.24683	valid_1's WRMSSE: 1.60819	valid_1's RMSE: 2.16514
[80]	training's WRMSSE: 1.48972	training's RMSE: 2.23998	valid_1's WRMSSE: 1.58573	valid_1's RMSE: 2.1579
[81]	training's WRMSSE: 1.46989	training's RMSE: 2.23323	valid_1's WRMSSE: 1.56365	valid_1's RMSE: 2.15078
[82]	training's WRMSSE: 1.45055	training's RMSE: 2.22671	valid_1's WRMSSE: 1.542	valid_1's RMSE: 2.14387
[83]	training's WRMSSE: 1.43155	training's RMSE: 2.22037	valid_1's WRMSSE: 1.52088	valid_1's RMSE: 2.1372
[84]	training's WRMSSE: 1.41293	training's RMSE: 2.21405	valid_1's WRMSSE: 1.50012	valid_1's RMSE: 2.13061
[85]	training's WRMSSE: 1.39465	training's RMSE: 2.20805	valid_1's WRMSSE: 1.47961	valid_1's RMSE: 2.12418
[86]	training's WRMSSE: 1.37661	training's RMSE: 2.20223	valid_1's WRMSSE: 1.45946	valid_1's RMSE: 2.11804
[87]	training's WRMSSE: 1.35905	training's 

[154]	training's WRMSSE: 0.730202	training's RMSE: 2.01837	valid_1's WRMSSE: 0.716218	valid_1's RMSE: 1.93308
[155]	training's WRMSSE: 0.726333	training's RMSE: 2.0174	valid_1's WRMSSE: 0.711648	valid_1's RMSE: 1.93225
[156]	training's WRMSSE: 0.722385	training's RMSE: 2.01649	valid_1's WRMSSE: 0.706992	valid_1's RMSE: 1.93143
[157]	training's WRMSSE: 0.718652	training's RMSE: 2.01557	valid_1's WRMSSE: 0.702492	valid_1's RMSE: 1.93061
[158]	training's WRMSSE: 0.714979	training's RMSE: 2.01462	valid_1's WRMSSE: 0.698019	valid_1's RMSE: 1.92978
[159]	training's WRMSSE: 0.711277	training's RMSE: 2.0138	valid_1's WRMSSE: 0.693655	valid_1's RMSE: 1.92902
[160]	training's WRMSSE: 0.70796	training's RMSE: 2.01289	valid_1's WRMSSE: 0.689814	valid_1's RMSE: 1.92826
[161]	training's WRMSSE: 0.704643	training's RMSE: 2.01209	valid_1's WRMSSE: 0.685859	valid_1's RMSE: 1.92758
[162]	training's WRMSSE: 0.701326	training's RMSE: 2.01128	valid_1's WRMSSE: 0.681893	valid_1's RMSE: 1.9269
[163]	training

[229]	training's WRMSSE: 0.592398	training's RMSE: 1.98278	valid_1's WRMSSE: 0.554761	valid_1's RMSE: 1.90473
[230]	training's WRMSSE: 0.591653	training's RMSE: 1.98256	valid_1's WRMSSE: 0.55395	valid_1's RMSE: 1.90459
[231]	training's WRMSSE: 0.591015	training's RMSE: 1.98233	valid_1's WRMSSE: 0.553293	valid_1's RMSE: 1.90445
[232]	training's WRMSSE: 0.590237	training's RMSE: 1.98205	valid_1's WRMSSE: 0.552602	valid_1's RMSE: 1.90427
[233]	training's WRMSSE: 0.589429	training's RMSE: 1.98185	valid_1's WRMSSE: 0.551769	valid_1's RMSE: 1.90412
[234]	training's WRMSSE: 0.588676	training's RMSE: 1.98159	valid_1's WRMSSE: 0.551046	valid_1's RMSE: 1.90392
[235]	training's WRMSSE: 0.587988	training's RMSE: 1.98132	valid_1's WRMSSE: 0.550313	valid_1's RMSE: 1.90371
[236]	training's WRMSSE: 0.587212	training's RMSE: 1.98105	valid_1's WRMSSE: 0.549462	valid_1's RMSE: 1.90351
[237]	training's WRMSSE: 0.586735	training's RMSE: 1.98074	valid_1's WRMSSE: 0.54894	valid_1's RMSE: 1.90331
[238]	traini

[304]	training's WRMSSE: 0.557895	training's RMSE: 1.96829	valid_1's WRMSSE: 0.522815	valid_1's RMSE: 1.89595
[305]	training's WRMSSE: 0.557459	training's RMSE: 1.96815	valid_1's WRMSSE: 0.52259	valid_1's RMSE: 1.89588
[306]	training's WRMSSE: 0.557234	training's RMSE: 1.968	valid_1's WRMSSE: 0.52243	valid_1's RMSE: 1.89581
[307]	training's WRMSSE: 0.556934	training's RMSE: 1.96786	valid_1's WRMSSE: 0.52221	valid_1's RMSE: 1.89576
[308]	training's WRMSSE: 0.55664	training's RMSE: 1.9677	valid_1's WRMSSE: 0.521838	valid_1's RMSE: 1.89563
[309]	training's WRMSSE: 0.556457	training's RMSE: 1.96762	valid_1's WRMSSE: 0.521717	valid_1's RMSE: 1.8956
[310]	training's WRMSSE: 0.556216	training's RMSE: 1.9675	valid_1's WRMSSE: 0.521539	valid_1's RMSE: 1.89557
[311]	training's WRMSSE: 0.556036	training's RMSE: 1.96736	valid_1's WRMSSE: 0.521425	valid_1's RMSE: 1.89549
[312]	training's WRMSSE: 0.555811	training's RMSE: 1.96726	valid_1's WRMSSE: 0.521293	valid_1's RMSE: 1.89546
[313]	training's WR

[379]	training's WRMSSE: 0.540589	training's RMSE: 1.95948	valid_1's WRMSSE: 0.509943	valid_1's RMSE: 1.8914
[380]	training's WRMSSE: 0.540039	training's RMSE: 1.95932	valid_1's WRMSSE: 0.509519	valid_1's RMSE: 1.89131
[381]	training's WRMSSE: 0.53988	training's RMSE: 1.95923	valid_1's WRMSSE: 0.509469	valid_1's RMSE: 1.89126
[382]	training's WRMSSE: 0.539823	training's RMSE: 1.95916	valid_1's WRMSSE: 0.509426	valid_1's RMSE: 1.89123
[383]	training's WRMSSE: 0.539693	training's RMSE: 1.95905	valid_1's WRMSSE: 0.50938	valid_1's RMSE: 1.89117
[384]	training's WRMSSE: 0.539581	training's RMSE: 1.95899	valid_1's WRMSSE: 0.509292	valid_1's RMSE: 1.89115
[385]	training's WRMSSE: 0.539431	training's RMSE: 1.95887	valid_1's WRMSSE: 0.509155	valid_1's RMSE: 1.89108
[386]	training's WRMSSE: 0.539339	training's RMSE: 1.95877	valid_1's WRMSSE: 0.509048	valid_1's RMSE: 1.89102
[387]	training's WRMSSE: 0.539275	training's RMSE: 1.95871	valid_1's WRMSSE: 0.508982	valid_1's RMSE: 1.89099
[388]	trainin

[454]	training's WRMSSE: 0.531161	training's RMSE: 1.95331	valid_1's WRMSSE: 0.503716	valid_1's RMSE: 1.88848
[455]	training's WRMSSE: 0.531075	training's RMSE: 1.95322	valid_1's WRMSSE: 0.503678	valid_1's RMSE: 1.88844
[456]	training's WRMSSE: 0.530992	training's RMSE: 1.95313	valid_1's WRMSSE: 0.503593	valid_1's RMSE: 1.8884
[457]	training's WRMSSE: 0.530927	training's RMSE: 1.95305	valid_1's WRMSSE: 0.503516	valid_1's RMSE: 1.88837
[458]	training's WRMSSE: 0.530786	training's RMSE: 1.95296	valid_1's WRMSSE: 0.503467	valid_1's RMSE: 1.88833
[459]	training's WRMSSE: 0.530679	training's RMSE: 1.95286	valid_1's WRMSSE: 0.50335	valid_1's RMSE: 1.88827
[460]	training's WRMSSE: 0.53064	training's RMSE: 1.9528	valid_1's WRMSSE: 0.503315	valid_1's RMSE: 1.88824
[461]	training's WRMSSE: 0.530553	training's RMSE: 1.95273	valid_1's WRMSSE: 0.503291	valid_1's RMSE: 1.8882
[462]	training's WRMSSE: 0.530333	training's RMSE: 1.95268	valid_1's WRMSSE: 0.5032	valid_1's RMSE: 1.8882
[463]	training's W

[529]	training's WRMSSE: 0.524277	training's RMSE: 1.94777	valid_1's WRMSSE: 0.499836	valid_1's RMSE: 1.88647
[530]	training's WRMSSE: 0.524182	training's RMSE: 1.94769	valid_1's WRMSSE: 0.499715	valid_1's RMSE: 1.8864
[531]	training's WRMSSE: 0.52404	training's RMSE: 1.94763	valid_1's WRMSSE: 0.499548	valid_1's RMSE: 1.88638
[532]	training's WRMSSE: 0.524013	training's RMSE: 1.94757	valid_1's WRMSSE: 0.499533	valid_1's RMSE: 1.88637
[533]	training's WRMSSE: 0.523976	training's RMSE: 1.94752	valid_1's WRMSSE: 0.499509	valid_1's RMSE: 1.88636
[534]	training's WRMSSE: 0.523929	training's RMSE: 1.94745	valid_1's WRMSSE: 0.499468	valid_1's RMSE: 1.88632
[535]	training's WRMSSE: 0.523871	training's RMSE: 1.94739	valid_1's WRMSSE: 0.499432	valid_1's RMSE: 1.88629
[536]	training's WRMSSE: 0.523782	training's RMSE: 1.94734	valid_1's WRMSSE: 0.499406	valid_1's RMSE: 1.88628
[537]	training's WRMSSE: 0.523728	training's RMSE: 1.94729	valid_1's WRMSSE: 0.49938	valid_1's RMSE: 1.88625
[538]	trainin

[604]	training's WRMSSE: 0.518464	training's RMSE: 1.94278	valid_1's WRMSSE: 0.496217	valid_1's RMSE: 1.88449
[605]	training's WRMSSE: 0.518399	training's RMSE: 1.94269	valid_1's WRMSSE: 0.496174	valid_1's RMSE: 1.88442
[606]	training's WRMSSE: 0.51829	training's RMSE: 1.94259	valid_1's WRMSSE: 0.496123	valid_1's RMSE: 1.88437
[607]	training's WRMSSE: 0.518187	training's RMSE: 1.94253	valid_1's WRMSSE: 0.496147	valid_1's RMSE: 1.88434
[608]	training's WRMSSE: 0.518085	training's RMSE: 1.94248	valid_1's WRMSSE: 0.496092	valid_1's RMSE: 1.88429
[609]	training's WRMSSE: 0.518023	training's RMSE: 1.94241	valid_1's WRMSSE: 0.496067	valid_1's RMSE: 1.88428
[610]	training's WRMSSE: 0.517975	training's RMSE: 1.94235	valid_1's WRMSSE: 0.496081	valid_1's RMSE: 1.88425
[611]	training's WRMSSE: 0.517924	training's RMSE: 1.94227	valid_1's WRMSSE: 0.49605	valid_1's RMSE: 1.8842
[612]	training's WRMSSE: 0.51789	training's RMSE: 1.94222	valid_1's WRMSSE: 0.496009	valid_1's RMSE: 1.88417
[613]	training

[679]	training's WRMSSE: 0.512894	training's RMSE: 1.93803	valid_1's WRMSSE: 0.493798	valid_1's RMSE: 1.88266
[680]	training's WRMSSE: 0.512769	training's RMSE: 1.93798	valid_1's WRMSSE: 0.493689	valid_1's RMSE: 1.88264
[681]	training's WRMSSE: 0.51265	training's RMSE: 1.93791	valid_1's WRMSSE: 0.493594	valid_1's RMSE: 1.88261
[682]	training's WRMSSE: 0.512494	training's RMSE: 1.93784	valid_1's WRMSSE: 0.493507	valid_1's RMSE: 1.88256
[683]	training's WRMSSE: 0.512387	training's RMSE: 1.9378	valid_1's WRMSSE: 0.493384	valid_1's RMSE: 1.88254
[684]	training's WRMSSE: 0.512253	training's RMSE: 1.93774	valid_1's WRMSSE: 0.49323	valid_1's RMSE: 1.88247
[685]	training's WRMSSE: 0.512148	training's RMSE: 1.93766	valid_1's WRMSSE: 0.493176	valid_1's RMSE: 1.88247
[686]	training's WRMSSE: 0.512072	training's RMSE: 1.93761	valid_1's WRMSSE: 0.493143	valid_1's RMSE: 1.88247
[687]	training's WRMSSE: 0.511989	training's RMSE: 1.93753	valid_1's WRMSSE: 0.493122	valid_1's RMSE: 1.88245
[688]	trainin

[754]	training's WRMSSE: 0.507789	training's RMSE: 1.93401	valid_1's WRMSSE: 0.491646	valid_1's RMSE: 1.88124
[755]	training's WRMSSE: 0.507702	training's RMSE: 1.93395	valid_1's WRMSSE: 0.491558	valid_1's RMSE: 1.88118
[756]	training's WRMSSE: 0.507687	training's RMSE: 1.93391	valid_1's WRMSSE: 0.49156	valid_1's RMSE: 1.88117
[757]	training's WRMSSE: 0.507664	training's RMSE: 1.93385	valid_1's WRMSSE: 0.491567	valid_1's RMSE: 1.88117
[758]	training's WRMSSE: 0.507613	training's RMSE: 1.9338	valid_1's WRMSSE: 0.491574	valid_1's RMSE: 1.88116
[759]	training's WRMSSE: 0.507595	training's RMSE: 1.93377	valid_1's WRMSSE: 0.491561	valid_1's RMSE: 1.88115
[760]	training's WRMSSE: 0.507534	training's RMSE: 1.93374	valid_1's WRMSSE: 0.491476	valid_1's RMSE: 1.88114
[761]	training's WRMSSE: 0.50747	training's RMSE: 1.93369	valid_1's WRMSSE: 0.491419	valid_1's RMSE: 1.88111
[762]	training's WRMSSE: 0.507436	training's RMSE: 1.93365	valid_1's WRMSSE: 0.491408	valid_1's RMSE: 1.8811
[763]	training

[829]	training's WRMSSE: 0.503883	training's RMSE: 1.93016	valid_1's WRMSSE: 0.48985	valid_1's RMSE: 1.87969
[830]	training's WRMSSE: 0.503849	training's RMSE: 1.93014	valid_1's WRMSSE: 0.489832	valid_1's RMSE: 1.87968
[831]	training's WRMSSE: 0.503833	training's RMSE: 1.9301	valid_1's WRMSSE: 0.489835	valid_1's RMSE: 1.87968
[832]	training's WRMSSE: 0.503735	training's RMSE: 1.93005	valid_1's WRMSSE: 0.489817	valid_1's RMSE: 1.87966
[833]	training's WRMSSE: 0.503715	training's RMSE: 1.93001	valid_1's WRMSSE: 0.489812	valid_1's RMSE: 1.87966
[834]	training's WRMSSE: 0.503664	training's RMSE: 1.92996	valid_1's WRMSSE: 0.489792	valid_1's RMSE: 1.87963
[835]	training's WRMSSE: 0.50365	training's RMSE: 1.9299	valid_1's WRMSSE: 0.489801	valid_1's RMSE: 1.87963
[836]	training's WRMSSE: 0.50358	training's RMSE: 1.92982	valid_1's WRMSSE: 0.489771	valid_1's RMSE: 1.87959
[837]	training's WRMSSE: 0.503562	training's RMSE: 1.92976	valid_1's WRMSSE: 0.489779	valid_1's RMSE: 1.87957
[838]	training'

[904]	training's WRMSSE: 0.49979	training's RMSE: 1.92668	valid_1's WRMSSE: 0.488464	valid_1's RMSE: 1.87849
[905]	training's WRMSSE: 0.499774	training's RMSE: 1.92665	valid_1's WRMSSE: 0.488456	valid_1's RMSE: 1.87848
[906]	training's WRMSSE: 0.499693	training's RMSE: 1.9266	valid_1's WRMSSE: 0.488358	valid_1's RMSE: 1.87843
[907]	training's WRMSSE: 0.499666	training's RMSE: 1.92659	valid_1's WRMSSE: 0.488337	valid_1's RMSE: 1.87843
[908]	training's WRMSSE: 0.499647	training's RMSE: 1.92653	valid_1's WRMSSE: 0.488322	valid_1's RMSE: 1.87841
[909]	training's WRMSSE: 0.499632	training's RMSE: 1.92647	valid_1's WRMSSE: 0.488318	valid_1's RMSE: 1.87839
[910]	training's WRMSSE: 0.499618	training's RMSE: 1.92641	valid_1's WRMSSE: 0.488318	valid_1's RMSE: 1.87834
[911]	training's WRMSSE: 0.499601	training's RMSE: 1.92635	valid_1's WRMSSE: 0.488314	valid_1's RMSE: 1.87831
[912]	training's WRMSSE: 0.499569	training's RMSE: 1.9263	valid_1's WRMSSE: 0.488296	valid_1's RMSE: 1.87828
[913]	trainin

[979]	training's WRMSSE: 0.496156	training's RMSE: 1.92356	valid_1's WRMSSE: 0.486989	valid_1's RMSE: 1.87737
[980]	training's WRMSSE: 0.496126	training's RMSE: 1.92352	valid_1's WRMSSE: 0.486987	valid_1's RMSE: 1.87735
[981]	training's WRMSSE: 0.496111	training's RMSE: 1.92347	valid_1's WRMSSE: 0.486976	valid_1's RMSE: 1.87734
[982]	training's WRMSSE: 0.496082	training's RMSE: 1.92343	valid_1's WRMSSE: 0.48696	valid_1's RMSE: 1.87731
[983]	training's WRMSSE: 0.496052	training's RMSE: 1.9234	valid_1's WRMSSE: 0.486965	valid_1's RMSE: 1.87731
[984]	training's WRMSSE: 0.496034	training's RMSE: 1.92338	valid_1's WRMSSE: 0.486956	valid_1's RMSE: 1.8773
[985]	training's WRMSSE: 0.496009	training's RMSE: 1.92334	valid_1's WRMSSE: 0.486922	valid_1's RMSE: 1.87729
[986]	training's WRMSSE: 0.495991	training's RMSE: 1.92331	valid_1's WRMSSE: 0.486925	valid_1's RMSE: 1.87729
[987]	training's WRMSSE: 0.495934	training's RMSE: 1.92329	valid_1's WRMSSE: 0.486906	valid_1's RMSE: 1.87728
[988]	trainin

[1054]	training's WRMSSE: 0.492433	training's RMSE: 1.92099	valid_1's WRMSSE: 0.486125	valid_1's RMSE: 1.87648
[1055]	training's WRMSSE: 0.492425	training's RMSE: 1.92097	valid_1's WRMSSE: 0.486095	valid_1's RMSE: 1.87648
[1056]	training's WRMSSE: 0.492417	training's RMSE: 1.92093	valid_1's WRMSSE: 0.486104	valid_1's RMSE: 1.87646
[1057]	training's WRMSSE: 0.492401	training's RMSE: 1.92087	valid_1's WRMSSE: 0.486101	valid_1's RMSE: 1.87646
[1058]	training's WRMSSE: 0.492374	training's RMSE: 1.92083	valid_1's WRMSSE: 0.486113	valid_1's RMSE: 1.87645
[1059]	training's WRMSSE: 0.49234	training's RMSE: 1.92081	valid_1's WRMSSE: 0.486111	valid_1's RMSE: 1.87645
[1060]	training's WRMSSE: 0.492293	training's RMSE: 1.92079	valid_1's WRMSSE: 0.486034	valid_1's RMSE: 1.87644
[1061]	training's WRMSSE: 0.492282	training's RMSE: 1.92073	valid_1's WRMSSE: 0.486042	valid_1's RMSE: 1.87642
[1062]	training's WRMSSE: 0.492274	training's RMSE: 1.92071	valid_1's WRMSSE: 0.486042	valid_1's RMSE: 1.8764
[10

[1129]	training's WRMSSE: 0.490564	training's RMSE: 1.91842	valid_1's WRMSSE: 0.485666	valid_1's RMSE: 1.87581
[1130]	training's WRMSSE: 0.490549	training's RMSE: 1.91839	valid_1's WRMSSE: 0.485669	valid_1's RMSE: 1.8758
[1131]	training's WRMSSE: 0.490534	training's RMSE: 1.91838	valid_1's WRMSSE: 0.485664	valid_1's RMSE: 1.8758
[1132]	training's WRMSSE: 0.490511	training's RMSE: 1.91834	valid_1's WRMSSE: 0.485664	valid_1's RMSE: 1.87578
[1133]	training's WRMSSE: 0.490484	training's RMSE: 1.91831	valid_1's WRMSSE: 0.485643	valid_1's RMSE: 1.87579
[1134]	training's WRMSSE: 0.49044	training's RMSE: 1.91823	valid_1's WRMSSE: 0.485654	valid_1's RMSE: 1.87578
[1135]	training's WRMSSE: 0.490406	training's RMSE: 1.91817	valid_1's WRMSSE: 0.485629	valid_1's RMSE: 1.87575
[1136]	training's WRMSSE: 0.490398	training's RMSE: 1.91814	valid_1's WRMSSE: 0.485641	valid_1's RMSE: 1.87573
[1137]	training's WRMSSE: 0.49038	training's RMSE: 1.91813	valid_1's WRMSSE: 0.485639	valid_1's RMSE: 1.87573
[1138

  


[1173]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[1174]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[1175]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[1176]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[1177]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[1178]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[1179]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[1180]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[1181]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[1182]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[1183]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: n

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.01, max_bin=63,
       max_depth=-1, metric='None', min_child_samples=20,
       min_child_weight=0.001, min_data_in_leaf=255, min_split_gain=0.0,
       n_estimators=5000, n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c69ffe18>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [145]:
#### 小于0.00001 直接打成100
lgb_re2.fit(X_train,y_train,eval_set = [(X_train,y_train),(X_valid,y_valid)],verbose=1,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50,categorical_feature=cat_col)

New categorical_feature is ['cat_id', 'dept_id', 'event_name_1', 'event_name_2', 'event_type_1', 'event_type_2', 'item_id', 'state_id', 'store_id']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
New categorical_feature is ['cat_id', 'dept_id', 'event_name_1', 'event_name_2', 'event_type_1', 'event_type_2', 'item_id', 'state_id', 'store_id']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


[1]	training's WRMSSE: 4.80529	training's RMSE: 3.7891	valid_1's WRMSSE: 5.20822	valid_1's RMSE: 3.80853
Training until validation scores don't improve for 50 rounds
[2]	training's WRMSSE: 4.73251	training's RMSE: 3.74985	valid_1's WRMSSE: 5.12926	valid_1's RMSE: 3.76659
[3]	training's WRMSSE: 4.66045	training's RMSE: 3.7108	valid_1's WRMSSE: 5.05099	valid_1's RMSE: 3.72499
[4]	training's WRMSSE: 4.5896	training's RMSE: 3.67249	valid_1's WRMSSE: 4.97407	valid_1's RMSE: 3.68474
[5]	training's WRMSSE: 4.5196	training's RMSE: 3.63486	valid_1's WRMSSE: 4.89821	valid_1's RMSE: 3.64501
[6]	training's WRMSSE: 4.45112	training's RMSE: 3.59846	valid_1's WRMSSE: 4.82398	valid_1's RMSE: 3.60659
[7]	training's WRMSSE: 4.38331	training's RMSE: 3.56233	valid_1's WRMSSE: 4.75048	valid_1's RMSE: 3.56821
[8]	training's WRMSSE: 4.31668	training's RMSE: 3.52697	valid_1's WRMSSE: 4.67819	valid_1's RMSE: 3.53072
[9]	training's WRMSSE: 4.25096	training's RMSE: 3.49207	valid_1's WRMSSE: 4.60695	valid_1's RMS

[78]	training's WRMSSE: 1.53041	training's RMSE: 2.25422	valid_1's WRMSSE: 1.63095	valid_1's RMSE: 2.17267
[79]	training's WRMSSE: 1.50989	training's RMSE: 2.24705	valid_1's WRMSSE: 1.60809	valid_1's RMSE: 2.16504
[80]	training's WRMSSE: 1.48976	training's RMSE: 2.24019	valid_1's WRMSSE: 1.58566	valid_1's RMSE: 2.15781
[81]	training's WRMSSE: 1.46998	training's RMSE: 2.23348	valid_1's WRMSSE: 1.56361	valid_1's RMSE: 2.15072
[82]	training's WRMSSE: 1.45063	training's RMSE: 2.22696	valid_1's WRMSSE: 1.54194	valid_1's RMSE: 2.14381
[83]	training's WRMSSE: 1.43161	training's RMSE: 2.22061	valid_1's WRMSSE: 1.52076	valid_1's RMSE: 2.13714
[84]	training's WRMSSE: 1.413	training's RMSE: 2.21428	valid_1's WRMSSE: 1.5	valid_1's RMSE: 2.13055
[85]	training's WRMSSE: 1.39469	training's RMSE: 2.20827	valid_1's WRMSSE: 1.47946	valid_1's RMSE: 2.12411
[86]	training's WRMSSE: 1.3766	training's RMSE: 2.20244	valid_1's WRMSSE: 1.45927	valid_1's RMSE: 2.11797
[87]	training's WRMSSE: 1.35905	training's R

[154]	training's WRMSSE: 0.730478	training's RMSE: 2.01827	valid_1's WRMSSE: 0.716163	valid_1's RMSE: 1.93296
[155]	training's WRMSSE: 0.726612	training's RMSE: 2.01731	valid_1's WRMSSE: 0.711566	valid_1's RMSE: 1.93213
[156]	training's WRMSSE: 0.722678	training's RMSE: 2.0164	valid_1's WRMSSE: 0.706908	valid_1's RMSE: 1.93132
[157]	training's WRMSSE: 0.718941	training's RMSE: 2.0155	valid_1's WRMSSE: 0.702374	valid_1's RMSE: 1.9305
[158]	training's WRMSSE: 0.715258	training's RMSE: 2.01461	valid_1's WRMSSE: 0.698009	valid_1's RMSE: 1.92975
[159]	training's WRMSSE: 0.711502	training's RMSE: 2.01375	valid_1's WRMSSE: 0.693561	valid_1's RMSE: 1.92897
[160]	training's WRMSSE: 0.70818	training's RMSE: 2.01283	valid_1's WRMSSE: 0.689728	valid_1's RMSE: 1.92821
[161]	training's WRMSSE: 0.70484	training's RMSE: 2.01205	valid_1's WRMSSE: 0.685665	valid_1's RMSE: 1.92753
[162]	training's WRMSSE: 0.701536	training's RMSE: 2.01124	valid_1's WRMSSE: 0.681734	valid_1's RMSE: 1.92685
[163]	training'

[229]	training's WRMSSE: 0.592356	training's RMSE: 1.98275	valid_1's WRMSSE: 0.554661	valid_1's RMSE: 1.90466
[230]	training's WRMSSE: 0.591617	training's RMSE: 1.98255	valid_1's WRMSSE: 0.55382	valid_1's RMSE: 1.90453
[231]	training's WRMSSE: 0.590991	training's RMSE: 1.98232	valid_1's WRMSSE: 0.553185	valid_1's RMSE: 1.9044
[232]	training's WRMSSE: 0.59021	training's RMSE: 1.98204	valid_1's WRMSSE: 0.552481	valid_1's RMSE: 1.90422
[233]	training's WRMSSE: 0.589429	training's RMSE: 1.98188	valid_1's WRMSSE: 0.55173	valid_1's RMSE: 1.90409
[234]	training's WRMSSE: 0.588719	training's RMSE: 1.98162	valid_1's WRMSSE: 0.55104	valid_1's RMSE: 1.90392
[235]	training's WRMSSE: 0.588052	training's RMSE: 1.98136	valid_1's WRMSSE: 0.550326	valid_1's RMSE: 1.90375
[236]	training's WRMSSE: 0.587326	training's RMSE: 1.98109	valid_1's WRMSSE: 0.549539	valid_1's RMSE: 1.90356
[237]	training's WRMSSE: 0.586841	training's RMSE: 1.98085	valid_1's WRMSSE: 0.54899	valid_1's RMSE: 1.90337
[238]	training's

[304]	training's WRMSSE: 0.558044	training's RMSE: 1.96845	valid_1's WRMSSE: 0.522742	valid_1's RMSE: 1.89605
[305]	training's WRMSSE: 0.557617	training's RMSE: 1.96834	valid_1's WRMSSE: 0.522469	valid_1's RMSE: 1.896
[306]	training's WRMSSE: 0.557381	training's RMSE: 1.96823	valid_1's WRMSSE: 0.522326	valid_1's RMSE: 1.89595
[307]	training's WRMSSE: 0.557103	training's RMSE: 1.96806	valid_1's WRMSSE: 0.522068	valid_1's RMSE: 1.89587
[308]	training's WRMSSE: 0.556849	training's RMSE: 1.96791	valid_1's WRMSSE: 0.521766	valid_1's RMSE: 1.89577
[309]	training's WRMSSE: 0.556607	training's RMSE: 1.9678	valid_1's WRMSSE: 0.52164	valid_1's RMSE: 1.89573
[310]	training's WRMSSE: 0.556345	training's RMSE: 1.96763	valid_1's WRMSSE: 0.521435	valid_1's RMSE: 1.89566
[311]	training's WRMSSE: 0.556118	training's RMSE: 1.96746	valid_1's WRMSSE: 0.521308	valid_1's RMSE: 1.89554
[312]	training's WRMSSE: 0.55588	training's RMSE: 1.96735	valid_1's WRMSSE: 0.52117	valid_1's RMSE: 1.8955
[313]	training's 

[379]	training's WRMSSE: 0.540771	training's RMSE: 1.95984	valid_1's WRMSSE: 0.509554	valid_1's RMSE: 1.89151
[380]	training's WRMSSE: 0.540203	training's RMSE: 1.95969	valid_1's WRMSSE: 0.509141	valid_1's RMSE: 1.89141
[381]	training's WRMSSE: 0.540058	training's RMSE: 1.95955	valid_1's WRMSSE: 0.508999	valid_1's RMSE: 1.89133
[382]	training's WRMSSE: 0.539966	training's RMSE: 1.95948	valid_1's WRMSSE: 0.508919	valid_1's RMSE: 1.89131
[383]	training's WRMSSE: 0.539817	training's RMSE: 1.95943	valid_1's WRMSSE: 0.508796	valid_1's RMSE: 1.89129
[384]	training's WRMSSE: 0.539679	training's RMSE: 1.95938	valid_1's WRMSSE: 0.508682	valid_1's RMSE: 1.89127
[385]	training's WRMSSE: 0.539542	training's RMSE: 1.95927	valid_1's WRMSSE: 0.508607	valid_1's RMSE: 1.89123
[386]	training's WRMSSE: 0.539437	training's RMSE: 1.95919	valid_1's WRMSSE: 0.508523	valid_1's RMSE: 1.8912
[387]	training's WRMSSE: 0.539351	training's RMSE: 1.95912	valid_1's WRMSSE: 0.508458	valid_1's RMSE: 1.89118
[388]	train

[454]	training's WRMSSE: 0.530664	training's RMSE: 1.95359	valid_1's WRMSSE: 0.503312	valid_1's RMSE: 1.88869
[455]	training's WRMSSE: 0.530575	training's RMSE: 1.95349	valid_1's WRMSSE: 0.503264	valid_1's RMSE: 1.88864
[456]	training's WRMSSE: 0.530285	training's RMSE: 1.95339	valid_1's WRMSSE: 0.503236	valid_1's RMSE: 1.8886
[457]	training's WRMSSE: 0.530233	training's RMSE: 1.95331	valid_1's WRMSSE: 0.503191	valid_1's RMSE: 1.88854
[458]	training's WRMSSE: 0.530125	training's RMSE: 1.95323	valid_1's WRMSSE: 0.503193	valid_1's RMSE: 1.88853
[459]	training's WRMSSE: 0.530067	training's RMSE: 1.95311	valid_1's WRMSSE: 0.503131	valid_1's RMSE: 1.88845
[460]	training's WRMSSE: 0.529985	training's RMSE: 1.95307	valid_1's WRMSSE: 0.503044	valid_1's RMSE: 1.88842
[461]	training's WRMSSE: 0.529886	training's RMSE: 1.95299	valid_1's WRMSSE: 0.50303	valid_1's RMSE: 1.88838
[462]	training's WRMSSE: 0.529668	training's RMSE: 1.95293	valid_1's WRMSSE: 0.502885	valid_1's RMSE: 1.88837
[463]	traini

[529]	training's WRMSSE: 0.522586	training's RMSE: 1.9484	valid_1's WRMSSE: 0.498885	valid_1's RMSE: 1.88652
[530]	training's WRMSSE: 0.522517	training's RMSE: 1.94831	valid_1's WRMSSE: 0.498789	valid_1's RMSE: 1.88644
[531]	training's WRMSSE: 0.522415	training's RMSE: 1.94825	valid_1's WRMSSE: 0.498693	valid_1's RMSE: 1.88641
[532]	training's WRMSSE: 0.52238	training's RMSE: 1.94816	valid_1's WRMSSE: 0.498668	valid_1's RMSE: 1.88639
[533]	training's WRMSSE: 0.52234	training's RMSE: 1.94813	valid_1's WRMSSE: 0.498643	valid_1's RMSE: 1.88638
[534]	training's WRMSSE: 0.522297	training's RMSE: 1.94806	valid_1's WRMSSE: 0.498613	valid_1's RMSE: 1.88636
[535]	training's WRMSSE: 0.522263	training's RMSE: 1.94802	valid_1's WRMSSE: 0.498612	valid_1's RMSE: 1.88636
[536]	training's WRMSSE: 0.522214	training's RMSE: 1.94799	valid_1's WRMSSE: 0.498565	valid_1's RMSE: 1.88635
[537]	training's WRMSSE: 0.522186	training's RMSE: 1.94796	valid_1's WRMSSE: 0.498544	valid_1's RMSE: 1.88633
[538]	trainin

[604]	training's WRMSSE: 0.516236	training's RMSE: 1.94358	valid_1's WRMSSE: 0.495052	valid_1's RMSE: 1.88429
[605]	training's WRMSSE: 0.516184	training's RMSE: 1.94351	valid_1's WRMSSE: 0.495012	valid_1's RMSE: 1.88424
[606]	training's WRMSSE: 0.516119	training's RMSE: 1.94341	valid_1's WRMSSE: 0.49498	valid_1's RMSE: 1.88417
[607]	training's WRMSSE: 0.516057	training's RMSE: 1.94331	valid_1's WRMSSE: 0.494957	valid_1's RMSE: 1.88413
[608]	training's WRMSSE: 0.516021	training's RMSE: 1.94326	valid_1's WRMSSE: 0.494932	valid_1's RMSE: 1.8841
[609]	training's WRMSSE: 0.515912	training's RMSE: 1.94321	valid_1's WRMSSE: 0.494854	valid_1's RMSE: 1.88409
[610]	training's WRMSSE: 0.515853	training's RMSE: 1.94317	valid_1's WRMSSE: 0.494789	valid_1's RMSE: 1.88406
[611]	training's WRMSSE: 0.515814	training's RMSE: 1.9431	valid_1's WRMSSE: 0.494762	valid_1's RMSE: 1.88402
[612]	training's WRMSSE: 0.515789	training's RMSE: 1.94307	valid_1's WRMSSE: 0.494728	valid_1's RMSE: 1.88398
[613]	trainin

[679]	training's WRMSSE: 0.511037	training's RMSE: 1.93914	valid_1's WRMSSE: 0.492522	valid_1's RMSE: 1.88241
[680]	training's WRMSSE: 0.510957	training's RMSE: 1.93908	valid_1's WRMSSE: 0.492436	valid_1's RMSE: 1.88238
[681]	training's WRMSSE: 0.510912	training's RMSE: 1.93901	valid_1's WRMSSE: 0.492448	valid_1's RMSE: 1.8824
[682]	training's WRMSSE: 0.510821	training's RMSE: 1.93895	valid_1's WRMSSE: 0.49239	valid_1's RMSE: 1.88237
[683]	training's WRMSSE: 0.51072	training's RMSE: 1.9389	valid_1's WRMSSE: 0.4923	valid_1's RMSE: 1.88234
[684]	training's WRMSSE: 0.510687	training's RMSE: 1.93886	valid_1's WRMSSE: 0.492286	valid_1's RMSE: 1.88234
[685]	training's WRMSSE: 0.510548	training's RMSE: 1.93879	valid_1's WRMSSE: 0.492262	valid_1's RMSE: 1.88234
[686]	training's WRMSSE: 0.510505	training's RMSE: 1.93875	valid_1's WRMSSE: 0.492222	valid_1's RMSE: 1.88232
[687]	training's WRMSSE: 0.510421	training's RMSE: 1.93866	valid_1's WRMSSE: 0.492199	valid_1's RMSE: 1.88229
[688]	training's

[754]	training's WRMSSE: 0.506415	training's RMSE: 1.93525	valid_1's WRMSSE: 0.490548	valid_1's RMSE: 1.88107
[755]	training's WRMSSE: 0.506389	training's RMSE: 1.93521	valid_1's WRMSSE: 0.490565	valid_1's RMSE: 1.88106
[756]	training's WRMSSE: 0.506368	training's RMSE: 1.93516	valid_1's WRMSSE: 0.490569	valid_1's RMSE: 1.88105
[757]	training's WRMSSE: 0.506355	training's RMSE: 1.93513	valid_1's WRMSSE: 0.490558	valid_1's RMSE: 1.88103
[758]	training's WRMSSE: 0.506332	training's RMSE: 1.9351	valid_1's WRMSSE: 0.490554	valid_1's RMSE: 1.88103
[759]	training's WRMSSE: 0.50632	training's RMSE: 1.93505	valid_1's WRMSSE: 0.490554	valid_1's RMSE: 1.881
[760]	training's WRMSSE: 0.506279	training's RMSE: 1.93503	valid_1's WRMSSE: 0.490516	valid_1's RMSE: 1.88099
[761]	training's WRMSSE: 0.506209	training's RMSE: 1.93501	valid_1's WRMSSE: 0.490489	valid_1's RMSE: 1.88099
[762]	training's WRMSSE: 0.506169	training's RMSE: 1.93498	valid_1's WRMSSE: 0.490482	valid_1's RMSE: 1.88098
[763]	training

[829]	training's WRMSSE: 0.502476	training's RMSE: 1.93147	valid_1's WRMSSE: 0.488821	valid_1's RMSE: 1.87966
[830]	training's WRMSSE: 0.502458	training's RMSE: 1.93143	valid_1's WRMSSE: 0.488824	valid_1's RMSE: 1.87964
[831]	training's WRMSSE: 0.502432	training's RMSE: 1.93138	valid_1's WRMSSE: 0.488832	valid_1's RMSE: 1.87963
[832]	training's WRMSSE: 0.502393	training's RMSE: 1.93135	valid_1's WRMSSE: 0.488834	valid_1's RMSE: 1.87963
[833]	training's WRMSSE: 0.502377	training's RMSE: 1.93131	valid_1's WRMSSE: 0.488825	valid_1's RMSE: 1.87961
[834]	training's WRMSSE: 0.50235	training's RMSE: 1.93127	valid_1's WRMSSE: 0.488804	valid_1's RMSE: 1.8796
[835]	training's WRMSSE: 0.502335	training's RMSE: 1.93123	valid_1's WRMSSE: 0.488781	valid_1's RMSE: 1.8796
[836]	training's WRMSSE: 0.502314	training's RMSE: 1.93121	valid_1's WRMSSE: 0.488775	valid_1's RMSE: 1.87958
[837]	training's WRMSSE: 0.502284	training's RMSE: 1.93116	valid_1's WRMSSE: 0.488778	valid_1's RMSE: 1.87958
[838]	trainin

[904]	training's WRMSSE: 0.498672	training's RMSE: 1.92787	valid_1's WRMSSE: 0.487767	valid_1's RMSE: 1.87853
[905]	training's WRMSSE: 0.498659	training's RMSE: 1.92784	valid_1's WRMSSE: 0.487755	valid_1's RMSE: 1.87852
[906]	training's WRMSSE: 0.498598	training's RMSE: 1.92781	valid_1's WRMSSE: 0.487744	valid_1's RMSE: 1.87851
[907]	training's WRMSSE: 0.498588	training's RMSE: 1.92779	valid_1's WRMSSE: 0.48775	valid_1's RMSE: 1.87851
[908]	training's WRMSSE: 0.498569	training's RMSE: 1.92774	valid_1's WRMSSE: 0.487742	valid_1's RMSE: 1.87851
[909]	training's WRMSSE: 0.49855	training's RMSE: 1.92771	valid_1's WRMSSE: 0.48775	valid_1's RMSE: 1.87852
[910]	training's WRMSSE: 0.498513	training's RMSE: 1.92768	valid_1's WRMSSE: 0.487769	valid_1's RMSE: 1.87851
[911]	training's WRMSSE: 0.498492	training's RMSE: 1.92765	valid_1's WRMSSE: 0.487765	valid_1's RMSE: 1.87851
[912]	training's WRMSSE: 0.498458	training's RMSE: 1.92761	valid_1's WRMSSE: 0.487744	valid_1's RMSE: 1.87849
[913]	trainin

[979]	training's WRMSSE: 0.49507	training's RMSE: 1.92447	valid_1's WRMSSE: 0.486668	valid_1's RMSE: 1.8775
[980]	training's WRMSSE: 0.495039	training's RMSE: 1.92443	valid_1's WRMSSE: 0.486673	valid_1's RMSE: 1.87749
[981]	training's WRMSSE: 0.49502	training's RMSE: 1.92438	valid_1's WRMSSE: 0.486675	valid_1's RMSE: 1.87747
[982]	training's WRMSSE: 0.495001	training's RMSE: 1.92433	valid_1's WRMSSE: 0.486688	valid_1's RMSE: 1.87747
[983]	training's WRMSSE: 0.494954	training's RMSE: 1.92431	valid_1's WRMSSE: 0.486591	valid_1's RMSE: 1.87744
[984]	training's WRMSSE: 0.494943	training's RMSE: 1.9243	valid_1's WRMSSE: 0.48659	valid_1's RMSE: 1.87744
[985]	training's WRMSSE: 0.494924	training's RMSE: 1.92425	valid_1's WRMSSE: 0.486601	valid_1's RMSE: 1.87741
[986]	training's WRMSSE: 0.494855	training's RMSE: 1.92419	valid_1's WRMSSE: 0.486541	valid_1's RMSE: 1.87737
[987]	training's WRMSSE: 0.494817	training's RMSE: 1.92418	valid_1's WRMSSE: 0.486539	valid_1's RMSE: 1.87736
[988]	training'

[1054]	training's WRMSSE: 0.492169	training's RMSE: 1.92173	valid_1's WRMSSE: 0.486021	valid_1's RMSE: 1.87656
[1055]	training's WRMSSE: 0.492159	training's RMSE: 1.92171	valid_1's WRMSSE: 0.486009	valid_1's RMSE: 1.87655
[1056]	training's WRMSSE: 0.49215	training's RMSE: 1.92168	valid_1's WRMSSE: 0.486001	valid_1's RMSE: 1.87653
[1057]	training's WRMSSE: 0.492138	training's RMSE: 1.92164	valid_1's WRMSSE: 0.485998	valid_1's RMSE: 1.87653
[1058]	training's WRMSSE: 0.492107	training's RMSE: 1.92162	valid_1's WRMSSE: 0.485996	valid_1's RMSE: 1.87652
[1059]	training's WRMSSE: 0.492088	training's RMSE: 1.92159	valid_1's WRMSSE: 0.485992	valid_1's RMSE: 1.87651
[1060]	training's WRMSSE: 0.49204	training's RMSE: 1.92156	valid_1's WRMSSE: 0.485966	valid_1's RMSE: 1.8765
[1061]	training's WRMSSE: 0.492024	training's RMSE: 1.92151	valid_1's WRMSSE: 0.485967	valid_1's RMSE: 1.87649
[1062]	training's WRMSSE: 0.492004	training's RMSE: 1.92149	valid_1's WRMSSE: 0.48597	valid_1's RMSE: 1.87648
[1063

[1129]	training's WRMSSE: 0.490143	training's RMSE: 1.91923	valid_1's WRMSSE: 0.485363	valid_1's RMSE: 1.87573
[1130]	training's WRMSSE: 0.490137	training's RMSE: 1.91922	valid_1's WRMSSE: 0.485363	valid_1's RMSE: 1.87573
[1131]	training's WRMSSE: 0.490119	training's RMSE: 1.9192	valid_1's WRMSSE: 0.485362	valid_1's RMSE: 1.87573
[1132]	training's WRMSSE: 0.490111	training's RMSE: 1.91916	valid_1's WRMSSE: 0.485375	valid_1's RMSE: 1.87572
[1133]	training's WRMSSE: 0.490094	training's RMSE: 1.91914	valid_1's WRMSSE: 0.485368	valid_1's RMSE: 1.87572
[1134]	training's WRMSSE: 0.489964	training's RMSE: 1.91908	valid_1's WRMSSE: 0.485206	valid_1's RMSE: 1.87567
[1135]	training's WRMSSE: 0.489929	training's RMSE: 1.91902	valid_1's WRMSSE: 0.485179	valid_1's RMSE: 1.87563
[1136]	training's WRMSSE: 0.489915	training's RMSE: 1.91898	valid_1's WRMSSE: 0.48519	valid_1's RMSE: 1.87562
[1137]	training's WRMSSE: 0.489907	training's RMSE: 1.91897	valid_1's WRMSSE: 0.485194	valid_1's RMSE: 1.87562
[11

[1204]	training's WRMSSE: 0.487851	training's RMSE: 1.91681	valid_1's WRMSSE: 0.484377	valid_1's RMSE: 1.8751
[1205]	training's WRMSSE: 0.487823	training's RMSE: 1.91678	valid_1's WRMSSE: 0.484403	valid_1's RMSE: 1.87509
[1206]	training's WRMSSE: 0.48781	training's RMSE: 1.91674	valid_1's WRMSSE: 0.484401	valid_1's RMSE: 1.87507
[1207]	training's WRMSSE: 0.487803	training's RMSE: 1.91669	valid_1's WRMSSE: 0.484407	valid_1's RMSE: 1.87502
[1208]	training's WRMSSE: 0.487788	training's RMSE: 1.91665	valid_1's WRMSSE: 0.484398	valid_1's RMSE: 1.87502
[1209]	training's WRMSSE: 0.487775	training's RMSE: 1.91661	valid_1's WRMSSE: 0.484399	valid_1's RMSE: 1.87502
[1210]	training's WRMSSE: 0.487762	training's RMSE: 1.91658	valid_1's WRMSSE: 0.484402	valid_1's RMSE: 1.87501
[1211]	training's WRMSSE: 0.487748	training's RMSE: 1.91652	valid_1's WRMSSE: 0.484403	valid_1's RMSE: 1.87496
[1212]	training's WRMSSE: 0.487731	training's RMSE: 1.91647	valid_1's WRMSSE: 0.484401	valid_1's RMSE: 1.87494
[12

[1279]	training's WRMSSE: 0.485952	training's RMSE: 1.91439	valid_1's WRMSSE: 0.484047	valid_1's RMSE: 1.87445
[1280]	training's WRMSSE: 0.485937	training's RMSE: 1.91436	valid_1's WRMSSE: 0.484	valid_1's RMSE: 1.87445
[1281]	training's WRMSSE: 0.485919	training's RMSE: 1.91435	valid_1's WRMSSE: 0.484011	valid_1's RMSE: 1.87444
[1282]	training's WRMSSE: 0.485892	training's RMSE: 1.91433	valid_1's WRMSSE: 0.483989	valid_1's RMSE: 1.87443
[1283]	training's WRMSSE: 0.485889	training's RMSE: 1.91431	valid_1's WRMSSE: 0.483989	valid_1's RMSE: 1.87442
[1284]	training's WRMSSE: 0.485872	training's RMSE: 1.91428	valid_1's WRMSSE: 0.483984	valid_1's RMSE: 1.87442
[1285]	training's WRMSSE: 0.48584	training's RMSE: 1.91424	valid_1's WRMSSE: 0.483952	valid_1's RMSE: 1.8744
[1286]	training's WRMSSE: 0.485787	training's RMSE: 1.9142	valid_1's WRMSSE: 0.483979	valid_1's RMSE: 1.87441
[1287]	training's WRMSSE: 0.485762	training's RMSE: 1.91417	valid_1's WRMSSE: 0.483975	valid_1's RMSE: 1.87441
[1288]	

[1354]	training's WRMSSE: 0.484161	training's RMSE: 1.91223	valid_1's WRMSSE: 0.483376	valid_1's RMSE: 1.87393
[1355]	training's WRMSSE: 0.484138	training's RMSE: 1.9122	valid_1's WRMSSE: 0.483368	valid_1's RMSE: 1.87393
[1356]	training's WRMSSE: 0.484101	training's RMSE: 1.91217	valid_1's WRMSSE: 0.483395	valid_1's RMSE: 1.87392
[1357]	training's WRMSSE: 0.48409	training's RMSE: 1.91214	valid_1's WRMSSE: 0.483408	valid_1's RMSE: 1.87391
[1358]	training's WRMSSE: 0.484084	training's RMSE: 1.91214	valid_1's WRMSSE: 0.483412	valid_1's RMSE: 1.87391
[1359]	training's WRMSSE: 0.484076	training's RMSE: 1.9121	valid_1's WRMSSE: 0.483415	valid_1's RMSE: 1.87392
[1360]	training's WRMSSE: 0.484066	training's RMSE: 1.91207	valid_1's WRMSSE: 0.483421	valid_1's RMSE: 1.87392
[1361]	training's WRMSSE: 0.484057	training's RMSE: 1.91204	valid_1's WRMSSE: 0.483417	valid_1's RMSE: 1.87391
[1362]	training's WRMSSE: 0.484043	training's RMSE: 1.91203	valid_1's WRMSSE: 0.483416	valid_1's RMSE: 1.87391
[136

KeyboardInterrupt: 

In [158]:
X_train = train1[(train1.day_num<=1885- 27)&(train1.day_num>=1885-1-364- 27)&(train1.date!='2015-12-25')&(train1.date!='2014-12-25')].drop(drop_col,axis=1).copy()
y_train = train1.loc[(train1.day_num<=1885- 27)&(train1.day_num>=1885-1-364- 27)&(train1.date!='2015-12-25')&(train1.date!='2014-12-25'),'sale']
X_valid = train1[(train1.day_num<=1913 - 27)&(train1.day_num>=1886- 27)].drop(drop_col,axis=1).copy()
y_valid = train1.loc[(train1.day_num<=1913- 27)&(train1.day_num>=1886- 27),'sale']
y_train[y_train==-999] = 0
y_valid[y_valid==-999] = 0

In [159]:
lgb_re2 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1)

In [161]:
### add clip

lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.37742	valid_0's RMSE: 2.54007
[20]	valid_0's WRMSSE: 1.17241	valid_0's RMSE: 2.12117
[30]	valid_0's WRMSSE: 0.745133	valid_0's RMSE: 2.01284
[40]	valid_0's WRMSSE: 0.629294	valid_0's RMSE: 1.98434
[50]	valid_0's WRMSSE: 0.598389	valid_0's RMSE: 1.97467
[60]	valid_0's WRMSSE: 0.588105	valid_0's RMSE: 1.96962
[70]	valid_0's WRMSSE: 0.58433	valid_0's RMSE: 1.96709
[80]	valid_0's WRMSSE: 0.582542	valid_0's RMSE: 1.96555
[90]	valid_0's WRMSSE: 0.581332	valid_0's RMSE: 1.96344
[100]	valid_0's WRMSSE: 0.580369	valid_0's RMSE: 1.96203
[110]	valid_0's WRMSSE: 0.580139	valid_0's RMSE: 1.96145
[120]	valid_0's WRMSSE: 0.580197	valid_0's RMSE: 1.96074
[130]	valid_0's WRMSSE: 0.579147	valid_0's RMSE: 1.95984
[140]	valid_0's WRMSSE: 0.578739	valid_0's RMSE: 1.95904
[150]	valid_0's WRMSSE: 0.578298	valid_0's RMSE: 1.95843
[160]	valid_0's WRMSSE: 0.577648	valid_0's RMSE: 1.95766
[170]	valid_0's WRMSSE: 0.577068	valid_

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c69ffd90>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [104]:

lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.37742	valid_0's RMSE: 2.54007
[20]	valid_0's WRMSSE: 1.17241	valid_0's RMSE: 2.12117
[30]	valid_0's WRMSSE: 0.745133	valid_0's RMSE: 2.01284
[40]	valid_0's WRMSSE: 0.629294	valid_0's RMSE: 1.98434
[50]	valid_0's WRMSSE: 0.598389	valid_0's RMSE: 1.97467
[60]	valid_0's WRMSSE: 0.588105	valid_0's RMSE: 1.96962
[70]	valid_0's WRMSSE: 0.58433	valid_0's RMSE: 1.96709
[80]	valid_0's WRMSSE: 0.582672	valid_0's RMSE: 1.96556
[90]	valid_0's WRMSSE: 0.582356	valid_0's RMSE: 1.96347
[100]	valid_0's WRMSSE: 0.581274	valid_0's RMSE: 1.96186
[110]	valid_0's WRMSSE: 0.580791	valid_0's RMSE: 1.96118
[120]	valid_0's WRMSSE: 0.581276	valid_0's RMSE: 1.96105
[130]	valid_0's WRMSSE: 0.580891	valid_0's RMSE: 1.96023
[140]	valid_0's WRMSSE: 0.580324	valid_0's RMSE: 1.95935
[150]	valid_0's WRMSSE: 0.57956	valid_0's RMSE: 1.95896
[160]	valid_0's WRMSSE: 0.579294	valid_0's RMSE: 1.95823
[170]	valid_0's WRMSSE: 0.579097	valid_0

  
  
  # This is added back by InteractiveShellApp.init_path()
  # This is added back by InteractiveShellApp.init_path()


[290]	valid_0's WRMSSE: inf	valid_0's RMSE: inf
Early stopping, best iteration is:
[246]	valid_0's WRMSSE: 0.575458	valid_0's RMSE: 1.95571
Evaluated only: WRMSSE


LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c91e1378>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [133]:
###132
lgb_re2.fit(X_train,y_train,eval_set = [(X_train,y_train),(X_valid,y_valid)],verbose=1,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

[1]	training's WRMSSE: 4.45479	training's RMSE: 3.60456	valid_1's WRMSSE: 4.81607	valid_1's RMSE: 3.68804
Training until validation scores don't improve for 50 rounds
[2]	training's WRMSSE: 4.1171	training's RMSE: 3.42673	valid_1's WRMSSE: 4.45191	valid_1's RMSE: 3.5035
[3]	training's WRMSSE: 3.80428	training's RMSE: 3.26389	valid_1's WRMSSE: 4.11494	valid_1's RMSE: 3.33423
[4]	training's WRMSSE: 3.51563	training's RMSE: 3.11655	valid_1's WRMSSE: 3.80348	valid_1's RMSE: 3.17961
[5]	training's WRMSSE: 3.24855	training's RMSE: 2.98409	valid_1's WRMSSE: 3.51343	valid_1's RMSE: 3.04112
[6]	training's WRMSSE: 3.00384	training's RMSE: 2.86735	valid_1's WRMSSE: 3.2491	valid_1's RMSE: 2.91738
[7]	training's WRMSSE: 2.77691	training's RMSE: 2.76158	valid_1's WRMSSE: 3.00302	valid_1's RMSE: 2.80647
[8]	training's WRMSSE: 2.56916	training's RMSE: 2.66737	valid_1's WRMSSE: 2.77671	valid_1's RMSE: 2.70736
[9]	training's WRMSSE: 2.37767	training's RMSE: 2.58307	valid_1's WRMSSE: 2.56848	valid_1's RM

[77]	training's WRMSSE: 0.522831	training's RMSE: 1.95037	valid_1's WRMSSE: 0.582778	valid_1's RMSE: 1.96586
[78]	training's WRMSSE: 0.522296	training's RMSE: 1.9499	valid_1's WRMSSE: 0.582557	valid_1's RMSE: 1.96563
[79]	training's WRMSSE: 0.521919	training's RMSE: 1.94957	valid_1's WRMSSE: 0.582501	valid_1's RMSE: 1.96561
[80]	training's WRMSSE: 0.519736	training's RMSE: 1.94912	valid_1's WRMSSE: 0.582672	valid_1's RMSE: 1.96556
[81]	training's WRMSSE: 0.519227	training's RMSE: 1.94876	valid_1's WRMSSE: 0.582504	valid_1's RMSE: 1.96527
[82]	training's WRMSSE: 0.51879	training's RMSE: 1.94835	valid_1's WRMSSE: 0.582521	valid_1's RMSE: 1.96521
[83]	training's WRMSSE: 0.518363	training's RMSE: 1.94813	valid_1's WRMSSE: 0.582475	valid_1's RMSE: 1.96497
[84]	training's WRMSSE: 0.517593	training's RMSE: 1.94749	valid_1's WRMSSE: 0.582635	valid_1's RMSE: 1.96483
[85]	training's WRMSSE: 0.517339	training's RMSE: 1.94702	valid_1's WRMSSE: 0.582511	valid_1's RMSE: 1.96436
[86]	training's WRMSS

[152]	training's WRMSSE: 0.487827	training's RMSE: 1.92366	valid_1's WRMSSE: 0.579704	valid_1's RMSE: 1.95871
[153]	training's WRMSSE: 0.487684	training's RMSE: 1.92328	valid_1's WRMSSE: 0.579633	valid_1's RMSE: 1.95855
[154]	training's WRMSSE: 0.487439	training's RMSE: 1.92294	valid_1's WRMSSE: 0.579582	valid_1's RMSE: 1.95858
[155]	training's WRMSSE: 0.486636	training's RMSE: 1.92274	valid_1's WRMSSE: 0.579482	valid_1's RMSE: 1.95855
[156]	training's WRMSSE: 0.486291	training's RMSE: 1.92237	valid_1's WRMSSE: 0.579401	valid_1's RMSE: 1.95842
[157]	training's WRMSSE: 0.48607	training's RMSE: 1.92201	valid_1's WRMSSE: 0.579339	valid_1's RMSE: 1.95838
[158]	training's WRMSSE: 0.486005	training's RMSE: 1.92166	valid_1's WRMSSE: 0.57936	valid_1's RMSE: 1.95827
[159]	training's WRMSSE: 0.485901	training's RMSE: 1.92139	valid_1's WRMSSE: 0.579335	valid_1's RMSE: 1.95824
[160]	training's WRMSSE: 0.485743	training's RMSE: 1.92121	valid_1's WRMSSE: 0.579232	valid_1's RMSE: 1.95818
[161]	traini

[227]	training's WRMSSE: 0.472143	training's RMSE: 1.90558	valid_1's WRMSSE: 0.576622	valid_1's RMSE: 1.95607
[228]	training's WRMSSE: 0.472013	training's RMSE: 1.90535	valid_1's WRMSSE: 0.576611	valid_1's RMSE: 1.95603
[229]	training's WRMSSE: 0.471863	training's RMSE: 1.90515	valid_1's WRMSSE: 0.576455	valid_1's RMSE: 1.95591
[230]	training's WRMSSE: 0.471395	training's RMSE: 1.905	valid_1's WRMSSE: 0.576081	valid_1's RMSE: 1.95589
[231]	training's WRMSSE: 0.471302	training's RMSE: 1.90475	valid_1's WRMSSE: 0.575992	valid_1's RMSE: 1.95581
[232]	training's WRMSSE: 0.471231	training's RMSE: 1.90447	valid_1's WRMSSE: 0.575983	valid_1's RMSE: 1.95583
[233]	training's WRMSSE: 0.471158	training's RMSE: 1.90425	valid_1's WRMSSE: 0.575981	valid_1's RMSE: 1.9558
[234]	training's WRMSSE: 0.471001	training's RMSE: 1.9039	valid_1's WRMSSE: 0.575953	valid_1's RMSE: 1.95578
[235]	training's WRMSSE: 0.470866	training's RMSE: 1.90363	valid_1's WRMSSE: 0.57598	valid_1's RMSE: 1.95564
[236]	training'

  


[283]	training's WRMSSE: inf	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[284]	training's WRMSSE: inf	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[285]	training's WRMSSE: inf	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[286]	training's WRMSSE: inf	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[287]	training's WRMSSE: inf	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[288]	training's WRMSSE: inf	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[289]	training's WRMSSE: inf	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[290]	training's WRMSSE: inf	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf




[291]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[292]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[293]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[294]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[295]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
[296]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: inf	valid_1's RMSE: inf
Early stopping, best iteration is:
[246]	training's WRMSSE: 0.481817	training's RMSE: 1.92017	valid_1's WRMSSE: 0.57547	valid_1's RMSE: 1.95569
Evaluated only: WRMSSE


LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c69ffe18>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [105]:
pd.DataFrame(lgb_re2.feature_importances_,index=X_train.columns).sort_values(0,ascending=False)

Unnamed: 0,0
rolling_sale_28_mean,87141.288933
rolling_sale_14_mean,56283.416315
rolling_sale_7_mean,17893.890398
rolling_sale_28_std,7466.099073
rolling_sale_91_mean,6777.943901
rolling_sale_dayofweek_52_mean,3857.052207
rolling_sale_dayofweek_52_quantile75,2828.453954
dayofweek,2768.577187
rolling_sale_14_quantile75,2712.326157
rolling_sale_364_mean,2533.893001


In [109]:
X_train = train1[(train1.day_num<=1913 - 365)&(train1.day_num>=1913 - 365 - 364)&(train1.date!='2015-12-25')&(train1.date!='2014-12-25')].drop(drop_col,axis=1).copy()
y_train = train1.loc[(train1.day_num<=1913 - 365)&(train1.day_num>=1913 - 365 - 364)&(train1.date!='2015-12-25')&(train1.date!='2014-12-25'),'sale']
X_valid = train1[(train1.day_num<=1969 - 365)&(train1.day_num>=1914 - 365)].drop(drop_col,axis=1).copy()
y_valid = train1.loc[(train1.day_num<=1969 - 365)&(train1.day_num>=1914 - 365),'sale']
y_train[y_train==-999] = 0
y_valid[y_valid==-999] = 0

In [126]:
lgb_re2 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1)

In [111]:

lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.19161	valid_0's RMSE: 2.55361
[20]	valid_0's WRMSSE: 1.13524	valid_0's RMSE: 2.17808
[30]	valid_0's WRMSSE: 0.739586	valid_0's RMSE: 2.07345
[40]	valid_0's WRMSSE: 0.616608	valid_0's RMSE: 2.04285
[50]	valid_0's WRMSSE: 0.574425	valid_0's RMSE: 2.03068
[60]	valid_0's WRMSSE: 0.55651	valid_0's RMSE: 2.02307
[70]	valid_0's WRMSSE: 0.546072	valid_0's RMSE: 2.01855
[80]	valid_0's WRMSSE: 0.537512	valid_0's RMSE: 2.01509
[90]	valid_0's WRMSSE: 0.534495	valid_0's RMSE: 2.0131
[100]	valid_0's WRMSSE: 0.531604	valid_0's RMSE: 2.01062


  
  
  # This is added back by InteractiveShellApp.init_path()
  # This is added back by InteractiveShellApp.init_path()


[110]	valid_0's WRMSSE: nan	valid_0's RMSE: inf
[120]	valid_0's WRMSSE: nan	valid_0's RMSE: inf
[130]	valid_0's WRMSSE: nan	valid_0's RMSE: inf
[140]	valid_0's WRMSSE: nan	valid_0's RMSE: inf
[150]	valid_0's WRMSSE: nan	valid_0's RMSE: inf
Early stopping, best iteration is:
[106]	valid_0's WRMSSE: 0.530099	valid_0's RMSE: 2.00976
Evaluated only: WRMSSE


LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c91e1378>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [113]:

lgb_re2.fit(X_train,y_train,eval_set = [(X_train,y_train),(X_valid,y_valid)],verbose=1,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

[1]	training's WRMSSE: 4.10622	training's RMSE: 3.59316	valid_1's WRMSSE: 4.31389	valid_1's RMSE: 3.57134
Training until validation scores don't improve for 50 rounds
[2]	training's WRMSSE: 3.7983	training's RMSE: 3.41612	valid_1's WRMSSE: 3.9968	valid_1's RMSE: 3.40478
[3]	training's WRMSSE: 3.51389	training's RMSE: 3.25618	valid_1's WRMSSE: 3.70323	valid_1's RMSE: 3.25426
[4]	training's WRMSSE: 3.25126	training's RMSE: 3.11117	valid_1's WRMSSE: 3.43153	valid_1's RMSE: 3.11791
[5]	training's WRMSSE: 3.00885	training's RMSE: 2.98211	valid_1's WRMSSE: 3.18128	valid_1's RMSE: 2.99708
[6]	training's WRMSSE: 2.78557	training's RMSE: 2.8652	valid_1's WRMSSE: 2.95032	valid_1's RMSE: 2.88768
[7]	training's WRMSSE: 2.57887	training's RMSE: 2.75921	valid_1's WRMSSE: 2.73636	valid_1's RMSE: 2.78813
[8]	training's WRMSSE: 2.3889	training's RMSE: 2.6664	valid_1's WRMSSE: 2.53941	valid_1's RMSE: 2.70148
[9]	training's WRMSSE: 2.21374	training's RMSE: 2.58367	valid_1's WRMSSE: 2.35806	valid_1's RMSE

[77]	training's WRMSSE: 0.490016	training's RMSE: 1.93558	valid_1's WRMSSE: 0.539963	valid_1's RMSE: 2.01625
[78]	training's WRMSSE: 0.489261	training's RMSE: 1.93505	valid_1's WRMSSE: 0.539623	valid_1's RMSE: 2.01596
[79]	training's WRMSSE: 0.486999	training's RMSE: 1.93452	valid_1's WRMSSE: 0.538116	valid_1's RMSE: 2.01551
[80]	training's WRMSSE: 0.486055	training's RMSE: 1.93401	valid_1's WRMSSE: 0.537512	valid_1's RMSE: 2.01509
[81]	training's WRMSSE: 0.485766	training's RMSE: 1.9336	valid_1's WRMSSE: 0.537405	valid_1's RMSE: 2.01483
[82]	training's WRMSSE: 0.485462	training's RMSE: 1.93314	valid_1's WRMSSE: 0.537316	valid_1's RMSE: 2.01477
[83]	training's WRMSSE: 0.484921	training's RMSE: 1.93281	valid_1's WRMSSE: 0.537094	valid_1's RMSE: 2.01453
[84]	training's WRMSSE: 0.482695	training's RMSE: 1.93238	valid_1's WRMSSE: 0.535666	valid_1's RMSE: 2.01421
[85]	training's WRMSSE: 0.482457	training's RMSE: 1.93205	valid_1's WRMSSE: 0.535512	valid_1's RMSE: 2.01398
[86]	training's WRMS

  


[107]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf


  
  # This is added back by InteractiveShellApp.init_path()
  # This is added back by InteractiveShellApp.init_path()


[108]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[109]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[110]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[111]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[112]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[113]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[114]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[115]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[116]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[117]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf
[118]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1'

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7c91e1378>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [127]:
###改成float128
lgb_re2.fit(X_train,y_train,eval_set = [(X_train,y_train),(X_valid,y_valid)],verbose=1,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

[1]	training's WRMSSE: 4.10622	training's RMSE: 3.59316	valid_1's WRMSSE: 4.31389	valid_1's RMSE: 3.57134
Training until validation scores don't improve for 50 rounds
[2]	training's WRMSSE: 3.7983	training's RMSE: 3.41612	valid_1's WRMSSE: 3.9968	valid_1's RMSE: 3.40478
[3]	training's WRMSSE: 3.51389	training's RMSE: 3.25618	valid_1's WRMSSE: 3.70323	valid_1's RMSE: 3.25426
[4]	training's WRMSSE: 3.25126	training's RMSE: 3.11117	valid_1's WRMSSE: 3.43153	valid_1's RMSE: 3.11791
[5]	training's WRMSSE: 3.00885	training's RMSE: 2.98211	valid_1's WRMSSE: 3.18128	valid_1's RMSE: 2.99708
[6]	training's WRMSSE: 2.78557	training's RMSE: 2.8652	valid_1's WRMSSE: 2.95032	valid_1's RMSE: 2.88768
[7]	training's WRMSSE: 2.57887	training's RMSE: 2.75921	valid_1's WRMSSE: 2.73636	valid_1's RMSE: 2.78813
[8]	training's WRMSSE: 2.3889	training's RMSE: 2.6664	valid_1's WRMSSE: 2.53941	valid_1's RMSE: 2.70148
[9]	training's WRMSSE: 2.21374	training's RMSE: 2.58367	valid_1's WRMSSE: 2.35806	valid_1's RMSE

[77]	training's WRMSSE: 0.490016	training's RMSE: 1.93558	valid_1's WRMSSE: 0.539963	valid_1's RMSE: 2.01625
[78]	training's WRMSSE: 0.489261	training's RMSE: 1.93505	valid_1's WRMSSE: 0.539623	valid_1's RMSE: 2.01596
[79]	training's WRMSSE: 0.486999	training's RMSE: 1.93452	valid_1's WRMSSE: 0.538116	valid_1's RMSE: 2.01551
[80]	training's WRMSSE: 0.486055	training's RMSE: 1.93401	valid_1's WRMSSE: 0.537512	valid_1's RMSE: 2.01509
[81]	training's WRMSSE: 0.485766	training's RMSE: 1.9336	valid_1's WRMSSE: 0.537405	valid_1's RMSE: 2.01483
[82]	training's WRMSSE: 0.485462	training's RMSE: 1.93314	valid_1's WRMSSE: 0.537316	valid_1's RMSE: 2.01477
[83]	training's WRMSSE: 0.484921	training's RMSE: 1.93281	valid_1's WRMSSE: 0.537094	valid_1's RMSE: 2.01453
[84]	training's WRMSSE: 0.482695	training's RMSE: 1.93238	valid_1's WRMSSE: 0.535666	valid_1's RMSE: 2.01421
[85]	training's WRMSSE: 0.482457	training's RMSE: 1.93205	valid_1's WRMSSE: 0.535512	valid_1's RMSE: 2.01398
[86]	training's WRMS

  


[107]	training's WRMSSE: nan	training's RMSE: inf	valid_1's WRMSSE: nan	valid_1's RMSE: inf


KeyboardInterrupt: 

In [114]:
a = pd.Series([1,2,3]).astype('float')

In [123]:
np.float128

numpy.float128

In [None]:
np.fl

In [119]:
import numpy
# warning is not logged here. Perfect for clean unit test output
with numpy.errstate(divide='ignore'):
    c = numpy.float64(1.0) / 0.0

### score
- 0.4851 480
- 0.575458 57376
- 0.530099

###  add test1

In [180]:
X_train.columns

Index(['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id', 'event_name_1',
       'event_type_1', 'event_name_2', 'event_type_2', 'sell_price', 'year',
       'month', 'dayofweek', 'quarter', 'lag_sale_1', 'lag_sale_2',
       'lag_sale_3', 'lag_sale_4', 'lag_sale_5', 'lag_sale_6', 'lag_sale_7',
       'rolling_sale_3_mean', 'rolling_sale_7_mean', 'rolling_sale_7_std',
       'rolling_sale_7_median', 'rolling_sale_14_mean', 'rolling_sale_14_std',
       'rolling_sale_14_median', 'rolling_sale_28_mean', 'rolling_sale_28_std',
       'rolling_sale_28_median', 'rolling_sale_91_mean', 'rolling_sale_91_std',
       'rolling_sale_91_median', 'rolling_sale_182_mean',
       'rolling_sale_182_std', 'rolling_sale_182_median',
       'rolling_sale_364_mean', 'rolling_sale_364_std',
       'rolling_sale_364_median', 'rolling_sale_7_cv', 'rolling_sale_14_cv',
       'rolling_sale_28_cv', 'rolling_sale_91_cv', 'rolling_sale_182_cv',
       'rolling_sale_364_cv', 'rolling_sale_7_quantile25',
   

In [215]:
def func_custom_1_2(scalar):
    if scalar ==0:
        return 0
    elif scalar <= 10**(-8):
        return 10**4
    else:
        return scalar **(-1/2)

In [216]:
func1 = np.vectorize(func_custom_1_2)

In [217]:
def custom_obj(y_true, y_pred):
    residual = (y_true - y_pred).astype(np.float128)
    residual = residual.reshape((-1,30490)) 
    d = residual.shape[0]    
    residual_42840 = residual * roll_mat_csr.T
    mean_square_42840 = np.square(residual_42840).sum(axis = 0)
    temp_1 = func1(mean_square_42840)
    part1 = ((1/np.sqrt(S)) * temp_1)/np.sqrt(d)
    part_all = np.multiply(part1,residual_42840) 
    grad = -(part_all * roll_mat_csr).reshape((-1))
    hess_part1 = np.multiply(-(1/np.sqrt(S))/np.sqrt(d)*temp_1**3,np.square(residual_42840))
    hess_part2 = (1/np.sqrt(S))/np.sqrt(d)*temp_1
    hess = ((hess_part1 + hess_part2)*roll_mat_csr).reshape((-1))
    return grad,hess

In [181]:
X_train = train1[(train1.day_num<=1885)&(train1.day_num>=1885-1-364)&(train1.date!='2015-12-25')].drop(drop_col,axis=1).copy()
y_train = train1.loc[(train1.day_num<=1885)&(train1.day_num>=1885-1-364)&(train1.date!='2015-12-25'),'sale']
X_valid = train1[(train1.day_num<=1913)&(train1.day_num>=1886)].drop(drop_col,axis=1).copy()
y_valid = train1.loc[(train1.day_num<=1913)&(train1.day_num>=1886),'sale']
y_train[y_train==-999] = 0
y_valid[y_valid==-999] = 0

In [190]:
X_train['lag_7_14_ratio_mean'] = np.clip(X_train['rolling_sale_7_mean'] * 7 / (X_train['rolling_sale_14_mean']*14 - X_train['rolling_sale_7_mean'] * 7),-999,999)
X_train['lag_14_28_ratio_mean'] = np.clip(X_train['rolling_sale_14_mean'] * 14 / (X_train['rolling_sale_28_mean']*28 - X_train['rolling_sale_14_mean'] * 14),-999,999)
X_train['lag_7_14_ratio_median'] = np.clip(X_train['rolling_sale_7_median'] / X_train['rolling_sale_14_median'],-999,999)
X_train['lag_14_28_ratio_median'] = np.clip(X_train['rolling_sale_14_median']  / X_train['rolling_sale_28_median'],-999,999)

X_train['lag_7_14_diff_mean'] = X_train['rolling_sale_7_mean'] * 7 - (X_train['rolling_sale_14_mean']*14 - X_train['rolling_sale_7_mean'] * 7)
X_train['lag_14_28_diff_mean'] = X_train['rolling_sale_14_mean'] * 14 - (X_train['rolling_sale_28_mean']*28 - X_train['rolling_sale_14_mean'] * 14)
X_train['lag_7_14_diff_median'] = X_train['rolling_sale_7_median'] - X_train['rolling_sale_14_median']
X_train['lag_14_28_diff_median'] = X_train['rolling_sale_14_median']  - X_train['rolling_sale_28_median']
X_train.fillna(-999,inplace=True)

In [202]:
X_valid['lag_7_14_ratio_mean'] = np.clip(X_valid['rolling_sale_7_mean'] * 7 / (X_valid['rolling_sale_14_mean']*14 - X_valid['rolling_sale_7_mean'] * 7),-999,999)
X_valid['lag_14_28_ratio_mean'] = np.clip(X_valid['rolling_sale_14_mean'] * 14 / (X_valid['rolling_sale_28_mean']*28 - X_valid['rolling_sale_14_mean'] * 14),-999,999)
X_valid['lag_7_14_ratio_median'] = np.clip(X_valid['rolling_sale_7_median'] / X_valid['rolling_sale_14_median'],-999,999)
X_valid['lag_14_28_ratio_median'] = np.clip(X_valid['rolling_sale_14_median']  / X_valid['rolling_sale_28_median'],-999,999)

X_valid['lag_7_14_diff_mean'] = X_valid['rolling_sale_7_mean'] * 7 - (X_valid['rolling_sale_14_mean']*14 - X_valid['rolling_sale_7_mean'] * 7)
X_valid['lag_14_28_diff_mean'] = X_valid['rolling_sale_14_mean'] * 14 - (X_valid['rolling_sale_28_mean']*28 - X_valid['rolling_sale_14_mean'] * 14)
X_valid['lag_7_14_diff_median'] = X_valid['rolling_sale_7_median'] - X_valid['rolling_sale_14_median']
X_valid['lag_14_28_diff_median'] = X_valid['rolling_sale_14_median']  - X_valid['rolling_sale_28_median']
X_valid.fillna(-999,inplace=True)

In [205]:
add_list = ['lag_7_14_ratio_mean','lag_14_28_ratio_mean','lag_7_14_ratio_median','lag_14_28_ratio_median',
           'lag_7_14_diff_mean','lag_14_28_diff_mean','lag_7_14_diff_median','lag_14_28_diff_median']

In [203]:
lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.40059	valid_0's RMSE: 2.45251
[20]	valid_0's WRMSSE: 1.15888	valid_0's RMSE: 2.03083
[30]	valid_0's WRMSSE: 0.699217	valid_0's RMSE: 1.92816
[40]	valid_0's WRMSSE: 0.56777	valid_0's RMSE: 1.90386
[50]	valid_0's WRMSSE: 0.530414	valid_0's RMSE: 1.89582
[60]	valid_0's WRMSSE: 0.516311	valid_0's RMSE: 1.8911
[70]	valid_0's WRMSSE: 0.5072	valid_0's RMSE: 1.88823
[80]	valid_0's WRMSSE: 0.502527	valid_0's RMSE: 1.88642
[90]	valid_0's WRMSSE: 0.499122	valid_0's RMSE: 1.88489
[100]	valid_0's WRMSSE: 0.495211	valid_0's RMSE: 1.8834
[110]	valid_0's WRMSSE: 0.492223	valid_0's RMSE: 1.88233
[120]	valid_0's WRMSSE: 0.490358	valid_0's RMSE: 1.88103
[130]	valid_0's WRMSSE: 0.489176	valid_0's RMSE: 1.88022
[140]	valid_0's WRMSSE: 0.488839	valid_0's RMSE: 1.87962
[150]	valid_0's WRMSSE: 0.488125	valid_0's RMSE: 1.87911
[160]	valid_0's WRMSSE: 0.486724	valid_0's RMSE: 1.878
[170]	valid_0's WRMSSE: 0.486559	valid_0's RM

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc70852b1e0>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [204]:
pd.DataFrame(lgb_re2.feature_importances_,index=X_train.columns).sort_values(0,ascending=False)

Unnamed: 0,0
rolling_sale_28_mean,98780.277753
rolling_sale_14_mean,49073.054152
rolling_sale_7_mean,16023.867886
rolling_sale_dayofweek_52_mean,4714.203745
rolling_sale_364_mean,3242.140471
rolling_sale_dayofweek_52_quantile75,2973.342364
lag_14_28_ratio_mean,2816.472452
rolling_sale_28_std,2801.249147
rolling_sale_91_std,2721.982046
dayofweek,2697.461168


In [206]:
pd.DataFrame(lgb_re2.feature_importances_,index=X_train.columns).sort_values(0,ascending=False).loc[add_list]

Unnamed: 0,0
lag_7_14_ratio_mean,535.11978
lag_14_28_ratio_mean,2816.472452
lag_7_14_ratio_median,82.951708
lag_14_28_ratio_median,95.485843
lag_7_14_diff_mean,398.046599
lag_14_28_diff_mean,344.766909
lag_7_14_diff_median,48.924406
lag_14_28_diff_median,38.61984


### add id

In [249]:
def func_custom_1_2(scalar):
    if scalar ==0:
        return 0
    elif scalar <= 10**(-8):
        return 10**4
    else:
        return scalar **(-1/2)

In [250]:
func1 = np.vectorize(func_custom_1_2)

In [251]:
def custom_obj(y_true, y_pred):
    residual = (y_true - y_pred).astype(np.float128)
    residual = residual.reshape((-1,30490)) 
    d = residual.shape[0]    
    residual_42840 = residual * roll_mat_csr.T
    mean_square_42840 = np.square(residual_42840).sum(axis = 0)
    temp_1 = func1(mean_square_42840)
    part1 = ((1/np.sqrt(S)) * temp_1)/np.sqrt(d)
    part_all = np.multiply(part1,residual_42840) 
    grad = -(part_all * roll_mat_csr).reshape((-1))
    hess_part1 = np.multiply(-(1/np.sqrt(S))/np.sqrt(d)*temp_1**3,np.square(residual_42840))
    hess_part2 = (1/np.sqrt(S))/np.sqrt(d)*temp_1
    hess = ((hess_part1 + hess_part2)*roll_mat_csr).reshape((-1))
    return grad,hess

In [252]:
train1 = pd.read_pickle('data_part1.pkl')

In [253]:
train1.shape

(60034810, 96)

In [254]:
for _ in ['CA','TX','WI']:
    train1.loc[train1.state_id==_,'snap'] = list(train1.loc[train1.state_id==_,f'snap_{_}'])

In [255]:
drop_col = ['day_num','date','wm_yr_wk','snap_CA','snap_TX','snap_WI','sale']
cat_col = ['item_id','dept_id','cat_id','store_id','state_id','event_name_1', 'event_type_1', 'event_name_2', 'event_type_2','id']

In [261]:
for cat in cat_col:
    lbl = preprocessing.LabelEncoder()
    train1[cat] = lbl.fit_transform(train1[cat].astype(str))
    print(cat)

item_id
dept_id
cat_id
store_id
state_id
event_name_1
event_type_1
event_name_2
event_type_2
id


In [262]:
train1.fillna(-999,inplace=True)

In [263]:
X_train = train1[(train1.day_num<=1885)&(train1.day_num>=1885-1-364)&(train1.date!='2015-12-25')].drop(drop_col,axis=1).copy()
y_train = train1.loc[(train1.day_num<=1885)&(train1.day_num>=1885-1-364)&(train1.date!='2015-12-25'),'sale']
X_valid = train1[(train1.day_num<=1913)&(train1.day_num>=1886)].drop(drop_col,axis=1).copy()
y_valid = train1.loc[(train1.day_num<=1913)&(train1.day_num>=1886),'sale']
y_train[y_train==-999] = 0
y_valid[y_valid==-999] = 0

In [264]:
X_train['lag_7_14_ratio_mean'] = np.clip(X_train['rolling_sale_7_mean'] * 7 / (X_train['rolling_sale_14_mean']*14 - X_train['rolling_sale_7_mean'] * 7),-999,999)
X_train['lag_14_28_ratio_mean'] = np.clip(X_train['rolling_sale_14_mean'] * 14 / (X_train['rolling_sale_28_mean']*28 - X_train['rolling_sale_14_mean'] * 14),-999,999)
X_train['lag_7_14_ratio_median'] = np.clip(X_train['rolling_sale_7_median'] / X_train['rolling_sale_14_median'],-999,999)
X_train['lag_14_28_ratio_median'] = np.clip(X_train['rolling_sale_14_median']  / X_train['rolling_sale_28_median'],-999,999)

X_train['lag_7_14_diff_mean'] = X_train['rolling_sale_7_mean'] * 7 - (X_train['rolling_sale_14_mean']*14 - X_train['rolling_sale_7_mean'] * 7)
X_train['lag_14_28_diff_mean'] = X_train['rolling_sale_14_mean'] * 14 - (X_train['rolling_sale_28_mean']*28 - X_train['rolling_sale_14_mean'] * 14)
X_train['lag_7_14_diff_median'] = X_train['rolling_sale_7_median'] - X_train['rolling_sale_14_median']
X_train['lag_14_28_diff_median'] = X_train['rolling_sale_14_median']  - X_train['rolling_sale_28_median']
X_train.fillna(-999,inplace=True)

In [265]:
X_valid['lag_7_14_ratio_mean'] = np.clip(X_valid['rolling_sale_7_mean'] * 7 / (X_valid['rolling_sale_14_mean']*14 - X_valid['rolling_sale_7_mean'] * 7),-999,999)
X_valid['lag_14_28_ratio_mean'] = np.clip(X_valid['rolling_sale_14_mean'] * 14 / (X_valid['rolling_sale_28_mean']*28 - X_valid['rolling_sale_14_mean'] * 14),-999,999)
X_valid['lag_7_14_ratio_median'] = np.clip(X_valid['rolling_sale_7_median'] / X_valid['rolling_sale_14_median'],-999,999)
X_valid['lag_14_28_ratio_median'] = np.clip(X_valid['rolling_sale_14_median']  / X_valid['rolling_sale_28_median'],-999,999)

X_valid['lag_7_14_diff_mean'] = X_valid['rolling_sale_7_mean'] * 7 - (X_valid['rolling_sale_14_mean']*14 - X_valid['rolling_sale_7_mean'] * 7)
X_valid['lag_14_28_diff_mean'] = X_valid['rolling_sale_14_mean'] * 14 - (X_valid['rolling_sale_28_mean']*28 - X_valid['rolling_sale_14_mean'] * 14)
X_valid['lag_7_14_diff_median'] = X_valid['rolling_sale_7_median'] - X_valid['rolling_sale_14_median']
X_valid['lag_14_28_diff_median'] = X_valid['rolling_sale_14_median']  - X_valid['rolling_sale_28_median']
X_valid.fillna(-999,inplace=True)

In [220]:
lgb_re2 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1)

In [221]:
lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.39846	valid_0's RMSE: 2.45216
[20]	valid_0's WRMSSE: 1.15766	valid_0's RMSE: 2.02959
[30]	valid_0's WRMSSE: 0.702352	valid_0's RMSE: 1.92727
[40]	valid_0's WRMSSE: 0.566343	valid_0's RMSE: 1.90474
[50]	valid_0's WRMSSE: 0.529916	valid_0's RMSE: 1.89744
[60]	valid_0's WRMSSE: 0.515683	valid_0's RMSE: 1.89312
[70]	valid_0's WRMSSE: 0.50703	valid_0's RMSE: 1.89065
[80]	valid_0's WRMSSE: 0.500732	valid_0's RMSE: 1.88802
[90]	valid_0's WRMSSE: 0.496982	valid_0's RMSE: 1.88559
[100]	valid_0's WRMSSE: 0.494658	valid_0's RMSE: 1.88393
[110]	valid_0's WRMSSE: 0.493061	valid_0's RMSE: 1.88243
[120]	valid_0's WRMSSE: 0.491859	valid_0's RMSE: 1.88116
[130]	valid_0's WRMSSE: 0.490448	valid_0's RMSE: 1.88026
[140]	valid_0's WRMSSE: 0.487957	valid_0's RMSE: 1.87967
[150]	valid_0's WRMSSE: 0.487325	valid_0's RMSE: 1.87869
[160]	valid_0's WRMSSE: 0.486916	valid_0's RMSE: 1.87846
[170]	valid_0's WRMSSE: 0.485976	valid_

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7086cb840>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [223]:
pd.DataFrame(lgb_re2.feature_importances_,index=X_train.columns).sort_values(0,ascending=False)

Unnamed: 0,0
rolling_sale_28_mean,95073.988015
rolling_sale_14_mean,39882.594788
rolling_sale_7_mean,18660.063379
rolling_sale_28_quantile75,14838.065727
rolling_sale_dayofweek_52_mean,4662.955348
rolling_sale_28_std,3752.157256
rolling_sale_364_mean,3379.377961
rolling_sale_91_mean,3009.157637
lag_14_28_ratio_mean,2973.019135
rolling_sale_dayofweek_52_quantile75,2897.027534


In [224]:
####drop id
lgb_re2 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1)

In [225]:
lgb_re2.fit(X_train.drop(['id'],axis=1),y_train,eval_set = [(X_valid.drop(['id'],axis=1),y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.40059	valid_0's RMSE: 2.45251
[20]	valid_0's WRMSSE: 1.15888	valid_0's RMSE: 2.03083
[30]	valid_0's WRMSSE: 0.699217	valid_0's RMSE: 1.92816
[40]	valid_0's WRMSSE: 0.56777	valid_0's RMSE: 1.90386
[50]	valid_0's WRMSSE: 0.530414	valid_0's RMSE: 1.89582
[60]	valid_0's WRMSSE: 0.516311	valid_0's RMSE: 1.8911
[70]	valid_0's WRMSSE: 0.5072	valid_0's RMSE: 1.88823
[80]	valid_0's WRMSSE: 0.502527	valid_0's RMSE: 1.88642
[90]	valid_0's WRMSSE: 0.499122	valid_0's RMSE: 1.88489
[100]	valid_0's WRMSSE: 0.495211	valid_0's RMSE: 1.8834
[110]	valid_0's WRMSSE: 0.492223	valid_0's RMSE: 1.88233
[120]	valid_0's WRMSSE: 0.490358	valid_0's RMSE: 1.88103
[130]	valid_0's WRMSSE: 0.489176	valid_0's RMSE: 1.88022
[140]	valid_0's WRMSSE: 0.488839	valid_0's RMSE: 1.87962
[150]	valid_0's WRMSSE: 0.488125	valid_0's RMSE: 1.87911
[160]	valid_0's WRMSSE: 0.486724	valid_0's RMSE: 1.878
[170]	valid_0's WRMSSE: 0.486559	valid_0's RM

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc7086cb840>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [226]:
c = train1.iloc[-30490*100:].copy()

In [233]:
TARGET = 'sale'
def find_last_sale(df,n_day):
    
    # Limit initial df
    ls_df = df[['id','day_num',TARGET]]
    
    # Convert target to binary
    ls_df['non_zero'] = (ls_df[TARGET]>0).astype(np.int8)
    
    # Make lags to prevent any leakage
    ls_df['non_zero_lag'] = ls_df.groupby(['id'])['non_zero'].transform(lambda x: x.shift(n_day).rolling(2000,1).sum()).fillna(-1)

    temp_df = ls_df[['id','day_num','non_zero_lag']].drop_duplicates(subset=['id','non_zero_lag'])
    temp_df.columns = ['id','day_num_min','non_zero_lag']

    ls_df = ls_df.merge(temp_df, on=['id','non_zero_lag'], how='left')
    ls_df['last_sale'] = ls_df['day_num'] - ls_df['day_num_min']

    return ls_df[['last_sale']]

In [244]:
ls_df = c[['id','day_num',TARGET]]
ls_df['non_zero'] = (ls_df[TARGET]>0).astype(np.int8)
ls_df['non_zero_lag'] = ls_df.groupby(['id'])['non_zero'].transform(lambda x: x.shift(1).rolling(2000,1).sum()).fillna(-1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [246]:
temp_df = ls_df[['id','day_num','non_zero_lag']].drop_duplicates(subset=['id','non_zero_lag'])

In [247]:
temp_df

Unnamed: 0,id,day_num,non_zero_lag
56985810,14370,1870,-1.0
56985811,14380,1870,-1.0
56985812,14390,1870,-1.0
56985813,14400,1870,-1.0
56985814,14410,1870,-1.0
56985815,14420,1870,-1.0
56985816,14430,1870,-1.0
56985817,14440,1870,-1.0
56985818,14450,1870,-1.0
56985819,14460,1870,-1.0


In [234]:
b = find_last_sale(c,1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


In [240]:
c['last_sale'] = b

In [242]:
c.last_sale.value_counts()

Series([], Name: last_sale, dtype: int64)

### add last non-zero??

In [256]:
TARGET = 'sale'
def find_last_sale(df,n_day):
    
    # Limit initial df
    ls_df = df[['id','day_num',TARGET]]
    
    # Convert target to binary
    ls_df['non_zero'] = (ls_df[TARGET]>0).astype(np.int8)
    
    # Make lags to prevent any leakage
    ls_df['non_zero_lag'] = ls_df.groupby(['id'])['non_zero'].transform(lambda x: x.shift(n_day).rolling(2000,1).sum()).fillna(-1)

    temp_df = ls_df[['id','day_num','non_zero_lag']].drop_duplicates(subset=['id','non_zero_lag'])
    temp_df.columns = ['id','day_num_min','non_zero_lag']

    ls_df = ls_df.merge(temp_df, on=['id','non_zero_lag'], how='left')
    ls_df['last_sale'] = ls_df['day_num'] - ls_df['day_num_min']

    return ls_df[['last_sale']]

In [258]:
temp = find_last_sale(train1,1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


In [260]:
train1['last_sale_diff'] = temp.values

#### fold1

In [266]:
lgb_re2 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=12,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1)

In [267]:
lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.40055	valid_0's RMSE: 2.45303
[20]	valid_0's WRMSSE: 1.1563	valid_0's RMSE: 2.02957
[30]	valid_0's WRMSSE: 0.700509	valid_0's RMSE: 1.92957
[40]	valid_0's WRMSSE: 0.565931	valid_0's RMSE: 1.90564
[50]	valid_0's WRMSSE: 0.527569	valid_0's RMSE: 1.89817
[60]	valid_0's WRMSSE: 0.514544	valid_0's RMSE: 1.8941
[70]	valid_0's WRMSSE: 0.505963	valid_0's RMSE: 1.89132
[80]	valid_0's WRMSSE: 0.501377	valid_0's RMSE: 1.89001
[90]	valid_0's WRMSSE: 0.497029	valid_0's RMSE: 1.88775
[100]	valid_0's WRMSSE: 0.493531	valid_0's RMSE: 1.88562
[110]	valid_0's WRMSSE: 0.490625	valid_0's RMSE: 1.88436
[120]	valid_0's WRMSSE: 0.488768	valid_0's RMSE: 1.88323
[130]	valid_0's WRMSSE: 0.4873	valid_0's RMSE: 1.88233
[140]	valid_0's WRMSSE: 0.485419	valid_0's RMSE: 1.88154
[150]	valid_0's WRMSSE: 0.484824	valid_0's RMSE: 1.88048
[160]	valid_0's WRMSSE: 0.483993	valid_0's RMSE: 1.88006
[170]	valid_0's WRMSSE: 0.484026	valid_0's

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=12, num_leaves=255,
       objective=<function custom_obj at 0x7fc71813d488>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [268]:
pd.DataFrame(lgb_re2.feature_importances_,index=X_train.columns).sort_values(0,ascending=False)

Unnamed: 0,0
rolling_sale_28_mean,117225.747885
rolling_sale_14_mean,34671.211633
rolling_sale_7_mean,18504.770387
rolling_sale_dayofweek_52_mean,5057.493673
last_sale_diff,4697.483032
dayofweek,2590.372615
rolling_sale_dayofweek_52_quantile75,2432.115734
rolling_sale_364_mean,2302.643092
lag_sale_1,1809.991379
rolling_sale_28_quantile75,1454.189932


#### fold2

In [282]:
X_train = train1[(train1.day_num<=1885- 27)&(train1.day_num>=1885-1-364- 27)&(train1.date!='2015-12-25')&(train1.date!='2014-12-25')].drop(drop_col,axis=1).copy()
y_train = train1.loc[(train1.day_num<=1885- 27)&(train1.day_num>=1885-1-364- 27)&(train1.date!='2015-12-25')&(train1.date!='2014-12-25'),'sale']
X_valid = train1[(train1.day_num<=1913 - 27)&(train1.day_num>=1886- 27)].drop(drop_col,axis=1).copy()
y_valid = train1.loc[(train1.day_num<=1913- 27)&(train1.day_num>=1886- 27),'sale']
y_train[y_train==-999] = 0
y_valid[y_valid==-999] = 0

In [6]:
1942 - 364

1578

In [286]:
lgb_re2 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=8,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1)

In [287]:
lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.3758	valid_0's RMSE: 2.53618
[20]	valid_0's WRMSSE: 1.17072	valid_0's RMSE: 2.11921
[30]	valid_0's WRMSSE: 0.743888	valid_0's RMSE: 2.01121
[40]	valid_0's WRMSSE: 0.627253	valid_0's RMSE: 1.9842
[50]	valid_0's WRMSSE: 0.597604	valid_0's RMSE: 1.97505
[60]	valid_0's WRMSSE: 0.587702	valid_0's RMSE: 1.96969
[70]	valid_0's WRMSSE: 0.583508	valid_0's RMSE: 1.96679
[80]	valid_0's WRMSSE: 0.582168	valid_0's RMSE: 1.96523
[90]	valid_0's WRMSSE: 0.580588	valid_0's RMSE: 1.96348
[100]	valid_0's WRMSSE: 0.579733	valid_0's RMSE: 1.96201
[110]	valid_0's WRMSSE: 0.57862	valid_0's RMSE: 1.96078
[120]	valid_0's WRMSSE: 0.578192	valid_0's RMSE: 1.96006
[130]	valid_0's WRMSSE: 0.577565	valid_0's RMSE: 1.95906
[140]	valid_0's WRMSSE: 0.576834	valid_0's RMSE: 1.95826
[150]	valid_0's WRMSSE: 0.575497	valid_0's RMSE: 1.95746
[160]	valid_0's WRMSSE: 0.575277	valid_0's RMSE: 1.95724
[170]	valid_0's WRMSSE: 0.575252	valid_0'

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=8, num_leaves=255,
       objective=<function custom_obj at 0x7fc71813d488>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [288]:
pd.DataFrame(lgb_re2.feature_importances_,index=X_train.columns).sort_values(0,ascending=False)

Unnamed: 0,0
rolling_sale_28_mean,100242.796524
rolling_sale_14_mean,30949.817655
rolling_sale_7_mean,19842.335926
rolling_sale_91_mean,11656.401668
rolling_sale_28_quantile75,8235.890716
last_sale_diff,5259.848480
rolling_sale_dayofweek_52_mean,4675.995948
rolling_sale_28_std,4091.493200
rolling_sale_dayofweek_52_quantile75,2959.413061
dayofweek,2665.550516


#### fold3

In [289]:
X_train = train1[(train1.day_num<=1913 - 365)&(train1.day_num>=1913 - 365 - 364)&(train1.date!='2015-12-25')&(train1.date!='2014-12-25')].drop(drop_col,axis=1).copy()
y_train = train1.loc[(train1.day_num<=1913 - 365)&(train1.day_num>=1913 - 365 - 364)&(train1.date!='2015-12-25')&(train1.date!='2014-12-25'),'sale']
X_valid = train1[(train1.day_num<=1969 - 365)&(train1.day_num>=1914 - 365)].drop(drop_col,axis=1).copy()
y_valid = train1.loc[(train1.day_num<=1969 - 365)&(train1.day_num>=1914 - 365),'sale']
y_train[y_train==-999] = 0
y_valid[y_valid==-999] = 0

In [290]:
lgb_re2 = lgb.LGBMRegressor(n_estimators=1000, random_state=51,subsample=0.8,
                         colsample_bytree=0.8,learning_rate=0.05 ,importance_type = 'gain',
                 max_depth = -1, num_leaves = 2**8-1,metric='None',bagging_freq=1,n_jobs=8,
                         first_metric_only = True,objective=custom_obj,min_data_in_leaf =  2**8-1)

In [291]:
lgb_re2.fit(X_train,y_train,eval_set = [(X_valid,y_valid)],verbose=10,
           eval_metric =lambda y_true, y_pred: [evaluator_super.feval(y_true, y_pred), rmse(y_true, y_pred)],
           early_stopping_rounds=50)#,categorical_feature=cat_col)

Training until validation scores don't improve for 50 rounds
[10]	valid_0's WRMSSE: 2.19158	valid_0's RMSE: 2.55154
[20]	valid_0's WRMSSE: 1.1326	valid_0's RMSE: 2.17613
[30]	valid_0's WRMSSE: 0.73737	valid_0's RMSE: 2.07297
[40]	valid_0's WRMSSE: 0.616137	valid_0's RMSE: 2.04348
[50]	valid_0's WRMSSE: 0.57279	valid_0's RMSE: 2.02903
[60]	valid_0's WRMSSE: 0.55626	valid_0's RMSE: 2.02182
[70]	valid_0's WRMSSE: 0.546498	valid_0's RMSE: 2.01763
[80]	valid_0's WRMSSE: 0.540381	valid_0's RMSE: 2.01438
[90]	valid_0's WRMSSE: 0.536371	valid_0's RMSE: 2.01182
[100]	valid_0's WRMSSE: 0.532617	valid_0's RMSE: 2.00982
[110]	valid_0's WRMSSE: 0.530529	valid_0's RMSE: 2.00813
[120]	valid_0's WRMSSE: 0.527656	valid_0's RMSE: 2.00722
[130]	valid_0's WRMSSE: 0.525674	valid_0's RMSE: 2.00607
[140]	valid_0's WRMSSE: 0.524529	valid_0's RMSE: 2.00482
[150]	valid_0's WRMSSE: 0.523741	valid_0's RMSE: 2.00385
[160]	valid_0's WRMSSE: 0.522856	valid_0's RMSE: 2.00309
[170]	valid_0's WRMSSE: 0.52191	valid_0's 

LGBMRegressor(bagging_freq=1, boosting_type='gbdt', class_weight=None,
       colsample_bytree=0.8, first_metric_only=True,
       importance_type='gain', learning_rate=0.05, max_depth=-1,
       metric='None', min_child_samples=20, min_child_weight=0.001,
       min_data_in_leaf=255, min_split_gain=0.0, n_estimators=1000,
       n_jobs=8, num_leaves=255,
       objective=<function custom_obj at 0x7fc71813d488>, random_state=51,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=0.8,
       subsample_for_bin=200000, subsample_freq=0)

In [292]:
pd.DataFrame(lgb_re2.feature_importances_,index=X_train.columns).sort_values(0,ascending=False)

Unnamed: 0,0
rolling_sale_28_mean,117267.755975
rolling_sale_14_mean,33925.637450
rolling_sale_7_mean,19185.872955
last_sale_diff,4861.382887
rolling_sale_dayofweek_52_mean,4312.597926
dayofweek,2765.430206
lag_sale_1,2324.143749
rolling_sale_dayofweek_52_quantile75,2289.470479
rolling_sale_3_mean,2010.923507
rolling_sale_364_std,1861.371809
