In [99]:

"""
This is an upgraded version of Ceshine's LGBM starter script, simply adding
more average features and weekly average features on it.
"""
from datetime import date, timedelta

import pandas as pd
import numpy as np
import lightgbm as lgb
import sys
import math
import gc
import sklearn.metrics as skl_metrics
from sklearn.metrics import mean_squared_error

# import math
# import sklearn.metrics as skl_metrics
# from sklearn.metrics import mean_squared_error

from logging import StreamHandler, DEBUG, Formatter, FileHandler, getLogger

logger = getLogger(__name__)

pd.options.mode.chained_assignment = None  # default='warn'

DIR = '../logs/'

log_fmt = Formatter('%(asctime)s %(name)s %(lineno)d [%(levelname)s]\
    [%(funcName)s] %(message)s ')
handler = StreamHandler()
handler.setLevel('INFO')
handler.setFormatter(log_fmt)
logger.addHandler(handler)

handler = FileHandler(DIR + 'train.py.log', 'a')
handler.setLevel(DEBUG)
handler.setFormatter(log_fmt)
logger.setLevel(DEBUG)
logger.addHandler(handler)

logger.info('start')

##########################################################################

train_out = pd.read_pickle('../data/T40_train_storeitem_1s.p')
val_out = pd.read_pickle('../data/T40_val_storeitem_1s.p')
X_test_out = pd.read_pickle('../data/T40_test_storeitem_1s.p')


df_test = pd.read_csv(
    "../input/test_1s.csv", usecols=[0, 1, 2, 3, 4],
    dtype={'onpromotion': bool},
    parse_dates=["date"]  # , date_parser=parser
).set_index(
    ['store_nbr', 'item_nbr', 'date']
)
    
items = pd.read_csv(
    "../input/items.csv",
).set_index("item_nbr")
items = items.reindex(train_out.item_nbr)

items_val = pd.read_csv(
    "../input/items.csv",
).set_index("item_nbr")
items_val = items_val.reindex(val_out['item_nbr'])

logger.info('Load data successful')

2018-01-03 11:54:32,079 __main__ 41 [INFO]    [<module>] start 
2018-01-03 11:54:32,079 __main__ 41 [INFO]    [<module>] start 
2018-01-03 11:54:32,079 __main__ 41 [INFO]    [<module>] start 
2018-01-03 11:54:32,079 __main__ 41 [INFO]    [<module>] start 
2018-01-03 11:54:32,079 __main__ 41 [INFO]    [<module>] start 
2018-01-03 11:54:32,079 __main__ 41 [INFO]    [<module>] start 
2018-01-03 11:54:32,202 __main__ 68 [INFO]    [<module>] Load data successful 
2018-01-03 11:54:32,202 __main__ 68 [INFO]    [<module>] Load data successful 
2018-01-03 11:54:32,202 __main__ 68 [INFO]    [<module>] Load data successful 
2018-01-03 11:54:32,202 __main__ 68 [INFO]    [<module>] Load data successful 
2018-01-03 11:54:32,202 __main__ 68 [INFO]    [<module>] Load data successful 
2018-01-03 11:54:32,202 __main__ 68 [INFO]    [<module>] Load data successful 


In [49]:
X_test_out.tail(1)

Unnamed: 0,date,day_1_2017,item_nbr,mean_140_2017,mean_14_2017,mean_182_2017,mean_21_2017,mean_30_2017,mean_364_2017,mean_3_2017,...,promo_6,promo_7,promo_8,promo_9,promo_10,promo_11,promo_12,promo_13,promo_14,promo_15
3550,2017-08-16,0.0,2127114,0.004951,0.049511,0.003809,0.033007,0.023105,0.001904,0.0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
val_out.head(1)

Unnamed: 0,index,date,day_1_2017,item_nbr,mean_140_2017,mean_14_2017,mean_182_2017,mean_21_2017,mean_30_2017,mean_364_2017,...,day7,day8,day9,day10,day11,day12,day13,day14,day15,day16
0,0,2017-07-26,0.0,96995,0.112661,0.177493,0.086662,0.118329,0.105935,0.043331,...,0.0,0.693147,1.098612,0.0,0.0,1.098612,1.098612,0.0,0.0,0.693147


In [51]:
train_out.head(1)

Unnamed: 0,index,date,day_1_2017,item_nbr,mean_140_2017,mean_14_2017,mean_182_2017,mean_21_2017,mean_30_2017,mean_364_2017,...,day7,day8,day9,day10,day11,day12,day13,day14,day15,day16
0,0,2016-08-03,0.0,96995,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [97]:
logger.info('Preparing traing dataset...')

all_columns = train_out.columns.tolist()

y_columns = ['day'+str(i) for i in range(1, 17)]
x_columns = [item for item in all_columns if item not in y_columns]

features_all = x_columns
features_all.remove("index") 
features_all.remove("date") 
features_all.remove("item_nbr") 
features_all.remove("store_nbr") 

X_train_out = train_out[x_columns]
X_val_out = val_out[x_columns]

y_train = train_out[y_columns].values
y_val = val_out[y_columns].values

X_train_allF = X_train_out[features_all]
X_val_allF = X_val_out[features_all]
X_test_allF = X_test_out[features_all]

#del train_out, val_out
#del X_train_out, X_val_out, X_test_out
gc.collect()

##########################################################################
logger.info('Training and predicting models...')

params = {
    'num_leaves': 31,
    'objective': 'regression',
    'min_data_in_leaf': 300,
    'learning_rate': 0.1,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 2,
    'metric': 'l2',
    'num_threads': 4
}

MAX_ROUNDS = 500
val_pred = []
test_pred = []
cate_vars = []


2018-01-03 11:54:17,440 __main__ 1 [INFO]    [<module>] Preparing traing dataset... 
2018-01-03 11:54:17,440 __main__ 1 [INFO]    [<module>] Preparing traing dataset... 
2018-01-03 11:54:17,440 __main__ 1 [INFO]    [<module>] Preparing traing dataset... 
2018-01-03 11:54:17,440 __main__ 1 [INFO]    [<module>] Preparing traing dataset... 
2018-01-03 11:54:17,440 __main__ 1 [INFO]    [<module>] Preparing traing dataset... 
2018-01-03 11:54:17,531 __main__ 29 [INFO]    [<module>] Training and predicting models... 
2018-01-03 11:54:17,531 __main__ 29 [INFO]    [<module>] Training and predicting models... 
2018-01-03 11:54:17,531 __main__ 29 [INFO]    [<module>] Training and predicting models... 
2018-01-03 11:54:17,531 __main__ 29 [INFO]    [<module>] Training and predicting models... 
2018-01-03 11:54:17,531 __main__ 29 [INFO]    [<module>] Training and predicting models... 


In [105]:
train_out["date"] = pd.to_datetime(train_out["date"])

In [106]:
train_out = train_out.loc[train_out["date"] < '2017-07-19', ]

In [103]:
train_out.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46163 entries, 0 to 46162
Data columns (total 91 columns):
index                  46163 non-null int64
date                   46163 non-null object
day_1_2017             46163 non-null float64
item_nbr               46163 non-null int64
mean_140_2017          46163 non-null float64
mean_14_2017           46163 non-null float64
mean_182_2017          46163 non-null float64
mean_21_2017           46163 non-null float64
mean_30_2017           46163 non-null float64
mean_364_2017          46163 non-null float64
mean_3_2017            46163 non-null float64
mean_42_2017           46163 non-null float64
mean_60_2017           46163 non-null float64
mean_7_2017            46163 non-null float64
mean_91_2017           46163 non-null float64
mean_ly_14_2017        46163 non-null float64
mean_ly_21_2017        46163 non-null float64
mean_ly_30_2017        46163 non-null float64
mean_ly_7_2017         46163 non-null float64
mean_ly_n16d_2017     

In [83]:
train_out[date < date(2017,7,19),]

NameError: name 'datetime' is not defined

In [7]:

train_week_2017 = 9
    
features_all = X_train_allF.columns.tolist()

for i in range(16):
    print("=" * 70)
    logger.info("Step %d" % (i+1))
    print("=" * 70)
    features_t = features_all.copy()

    for j in range(7):
        if j != i%7:
            features_t.remove('mean_4_dow{}_2017'.format(j))
            features_t.remove('mean_20_dow{}_2017'.format(j))
            features_t.remove('mean_52_dow{}_2017'.format(j))
            features_t.remove('mean_ly3w_dow{}_2017'.format(j))
            features_t.remove('mean_ly8w_dow{}_2017'.format(j))

    X_train = X_train_allF[features_t]
    X_val = X_val_allF[features_t]
    X_test = X_test_allF[features_t]

    dtrain = lgb.Dataset(
        X_train, label=y_train[:, i],
        categorical_feature=cate_vars,
        weight=pd.concat([items["perishable"]]) * 0.25 + 1
    )



    bst = lgb.train(
        params, dtrain, num_boost_round=MAX_ROUNDS,
        valid_sets=[dtrain], early_stopping_rounds=50, verbose_eval=100
    )

    logger.info("\n".join(("%s: %.2f" % x) for x in sorted(
        zip(X_train.columns, bst.feature_importance("gain")),
        key=lambda x: x[1], reverse=True
    )))

    test_pred.append(bst.predict(
        X_test, num_iteration=bst.best_iteration or MAX_ROUNDS))


    val_pred.append(bst.predict(
        X_val, num_iteration=bst.best_iteration or MAX_ROUNDS))

#del X_train, y_train
#del dtrain
#gc.collect()

2018-01-03 10:37:56,116 __main__ 8 [INFO]    [<module>] Step 1 






Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.253313
[200]	training's l2: 0.237332
[300]	training's l2: 0.224244
[400]	training's l2: 0.212518


2018-01-03 10:38:00,115 __main__ 39 [INFO]    [<module>] mean_14_2017: 65963.74
mean_7_2017: 56151.99
mean_20_dow0_2017: 7203.99
mean_21_2017: 6732.14
mean_30_2017: 4899.73
promo_0: 4518.13
mean_4_dow0_2017: 3209.92
mean_3_2017: 3081.67
mean_52_dow0_2017: 3009.99
mean_91_2017: 1960.86
day_1_2017: 1808.26
mean_42_2017: 1564.58
mean_60_2017: 1443.28
mean_182_2017: 1141.14
mean_ly3w_dow0_2017: 1009.32
mean_ly_n16d_2017: 983.96
promo_14_2017: 964.05
mean_364_2017: 922.34
mean_ly8w_dow0_2017: 921.83
mean_140_2017: 887.87
mean_ly_7_2017: 848.34
mean_ly_14_2017: 790.11
mean_ly_30_2017: 776.04
promo_140_2017: 753.67
mean_ly_21_2017: 743.16
promo_60_2017: 527.88
promo_7: 247.19
promo_14: 129.04
promo_9: 87.95
promo_15: 73.13
promo_2: 64.42
promo_10: 40.11
promo_3: 35.30
promo_1: 25.75
promo_5: 21.48
promo_4: 16.04
promo_8: 15.10
promo_13: 13.94
promo_11: 13.43
promo_6: 10.85
promo_12: 7.89 


[500]	training's l2: 0.202376


2018-01-03 10:38:00,240 __main__ 8 [INFO]    [<module>] Step 2 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.271478
[200]	training's l2: 0.254583
[300]	training's l2: 0.240488
[400]	training's l2: 0.228401


2018-01-03 10:38:03,110 __main__ 39 [INFO]    [<module>] mean_14_2017: 56407.63
mean_21_2017: 18186.39
mean_7_2017: 16976.58
mean_20_dow1_2017: 7311.95
mean_30_2017: 7178.21
mean_60_2017: 4477.46
mean_4_dow1_2017: 2874.54
mean_42_2017: 2872.76
mean_3_2017: 2480.51
promo_1: 2225.70
mean_52_dow1_2017: 2201.20
day_1_2017: 1231.00
mean_91_2017: 1119.50
mean_182_2017: 1065.86
mean_ly_n16d_2017: 1044.42
mean_ly8w_dow1_2017: 976.98
mean_ly3w_dow1_2017: 947.78
mean_364_2017: 940.55
mean_ly_7_2017: 889.39
mean_140_2017: 821.91
mean_ly_30_2017: 795.19
mean_ly_14_2017: 762.02
mean_ly_21_2017: 713.71
promo_14_2017: 709.50
promo_140_2017: 666.17
promo_60_2017: 502.75
promo_0: 152.42
promo_5: 87.64
promo_7: 72.43
promo_2: 63.29
promo_14: 59.42
promo_3: 56.16
promo_6: 46.45
promo_9: 45.45
promo_11: 29.04
promo_15: 24.48
promo_4: 22.49
promo_8: 13.12
promo_12: 12.55
promo_13: 10.66
promo_10: 7.14 


[500]	training's l2: 0.21765


2018-01-03 10:38:03,220 __main__ 8 [INFO]    [<module>] Step 3 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.278577
[200]	training's l2: 0.260134
[300]	training's l2: 0.245198
[400]	training's l2: 0.232805


2018-01-03 10:38:05,764 __main__ 39 [INFO]    [<module>] mean_14_2017: 46373.61
mean_7_2017: 23350.19
mean_21_2017: 19367.20
mean_20_dow2_2017: 18760.24
mean_4_dow2_2017: 7983.70
mean_42_2017: 5108.41
promo_2: 4775.18
mean_30_2017: 4224.79
mean_52_dow2_2017: 3188.66
mean_3_2017: 1879.53
mean_60_2017: 1515.49
mean_91_2017: 1240.54
mean_ly_7_2017: 1235.02
mean_ly8w_dow2_2017: 1174.72
day_1_2017: 1173.69
mean_ly3w_dow2_2017: 1051.68
mean_182_2017: 1039.49
mean_ly_n16d_2017: 1007.28
mean_364_2017: 998.10
promo_14_2017: 991.09
mean_ly_30_2017: 821.60
mean_ly_14_2017: 776.85
mean_140_2017: 771.74
promo_60_2017: 757.59
mean_ly_21_2017: 721.39
promo_140_2017: 700.39
promo_9: 336.91
promo_3: 233.11
promo_7: 217.29
promo_5: 199.92
promo_14: 133.13
promo_0: 67.26
promo_13: 65.69
promo_6: 39.74
promo_1: 33.88
promo_15: 24.07
promo_11: 23.76
promo_10: 22.41
promo_8: 15.48
promo_12: 11.88
promo_4: 10.03 


[500]	training's l2: 0.221744


2018-01-03 10:38:05,879 __main__ 8 [INFO]    [<module>] Step 4 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.269922
[200]	training's l2: 0.251984
[300]	training's l2: 0.237702
[400]	training's l2: 0.225624


2018-01-03 10:38:08,892 __main__ 39 [INFO]    [<module>] mean_14_2017: 37426.58
mean_21_2017: 33007.60
mean_7_2017: 16057.34
mean_30_2017: 9290.55
mean_60_2017: 8782.30
mean_20_dow3_2017: 6491.92
mean_3_2017: 5222.96
mean_52_dow3_2017: 3241.35
mean_42_2017: 3033.96
promo_3: 2877.79
mean_4_dow3_2017: 2870.99
mean_91_2017: 1445.44
mean_ly8w_dow3_2017: 1277.81
mean_ly_7_2017: 1147.06
mean_ly_n16d_2017: 1096.26
mean_ly3w_dow3_2017: 1006.82
mean_ly_14_2017: 948.48
mean_182_2017: 887.62
mean_ly_30_2017: 860.71
mean_364_2017: 843.73
mean_140_2017: 820.75
mean_ly_21_2017: 803.06
day_1_2017: 800.77
promo_14_2017: 785.42
promo_140_2017: 679.64
promo_60_2017: 476.28
promo_7: 299.74
promo_6: 139.25
promo_2: 120.74
promo_5: 114.53
promo_14: 93.85
promo_1: 74.22
promo_0: 57.09
promo_8: 42.63
promo_9: 30.87
promo_4: 25.65
promo_12: 23.32
promo_15: 23.24
promo_13: 17.59
promo_11: 7.17
promo_10: 6.86 


[500]	training's l2: 0.214724


2018-01-03 10:38:08,992 __main__ 8 [INFO]    [<module>] Step 5 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.205097
[200]	training's l2: 0.191092
[300]	training's l2: 0.17981
[400]	training's l2: 0.170188


2018-01-03 10:38:11,633 __main__ 39 [INFO]    [<module>] mean_7_2017: 16308.17
mean_21_2017: 14333.85
mean_14_2017: 10211.05
mean_20_dow4_2017: 8540.54
mean_52_dow4_2017: 4343.15
mean_42_2017: 3408.72
mean_3_2017: 2912.64
promo_4: 2677.44
mean_60_2017: 2264.34
mean_4_dow4_2017: 2161.37
mean_30_2017: 2056.96
mean_91_2017: 1474.49
mean_ly8w_dow4_2017: 1073.98
day_1_2017: 1032.68
mean_ly_7_2017: 930.78
mean_ly_n16d_2017: 877.46
mean_ly3w_dow4_2017: 848.16
mean_ly_14_2017: 811.48
mean_364_2017: 751.21
mean_182_2017: 733.69
mean_140_2017: 703.38
mean_ly_21_2017: 701.89
mean_ly_30_2017: 687.90
promo_140_2017: 531.59
promo_14_2017: 414.12
promo_60_2017: 350.56
promo_7: 186.15
promo_5: 148.12
promo_3: 133.57
promo_6: 67.58
promo_14: 62.94
promo_2: 60.19
promo_0: 38.59
promo_13: 29.83
promo_1: 19.04
promo_9: 18.82
promo_8: 8.39
promo_12: 8.12
promo_10: 6.49
promo_11: 4.31
promo_15: 2.65 


[500]	training's l2: 0.16183


2018-01-03 10:38:11,778 __main__ 8 [INFO]    [<module>] Step 6 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.272884
[200]	training's l2: 0.255688
[300]	training's l2: 0.241685
[400]	training's l2: 0.229767


2018-01-03 10:38:14,812 __main__ 39 [INFO]    [<module>] mean_21_2017: 35498.88
mean_14_2017: 31185.34
mean_42_2017: 15294.20
mean_7_2017: 14764.85
mean_60_2017: 12770.42
mean_30_2017: 7880.10
mean_4_dow5_2017: 3220.07
mean_3_2017: 3201.70
mean_52_dow5_2017: 2712.22
promo_5: 2564.86
mean_20_dow5_2017: 2387.11
mean_182_2017: 1250.05
mean_ly8w_dow5_2017: 1164.83
day_1_2017: 1137.67
mean_91_2017: 1112.98
mean_ly_7_2017: 1025.84
mean_364_2017: 977.89
mean_ly3w_dow5_2017: 965.37
mean_ly_n16d_2017: 961.76
promo_14_2017: 870.63
mean_140_2017: 865.85
mean_ly_14_2017: 819.27
mean_ly_21_2017: 795.26
mean_ly_30_2017: 742.39
promo_60_2017: 598.11
promo_140_2017: 574.21
promo_7: 350.08
promo_1: 130.83
promo_2: 111.98
promo_6: 92.98
promo_3: 91.89
promo_14: 81.00
promo_8: 68.82
promo_0: 45.63
promo_9: 44.28
promo_4: 29.25
promo_13: 19.98
promo_15: 18.73
promo_12: 18.37
promo_10: 17.37
promo_11: 15.31 


[500]	training's l2: 0.218747


2018-01-03 10:38:14,923 __main__ 8 [INFO]    [<module>] Step 7 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.286458
[200]	training's l2: 0.268781
[300]	training's l2: 0.254401
[400]	training's l2: 0.241713


2018-01-03 10:38:17,751 __main__ 39 [INFO]    [<module>] mean_21_2017: 36909.10
mean_14_2017: 28530.37
mean_30_2017: 19162.92
mean_60_2017: 11159.88
mean_7_2017: 9964.36
mean_42_2017: 6288.86
mean_20_dow6_2017: 4171.53
promo_6: 2869.91
mean_3_2017: 2346.25
mean_4_dow6_2017: 2273.78
mean_52_dow6_2017: 2168.96
mean_91_2017: 1407.67
mean_140_2017: 1233.14
mean_ly3w_dow6_2017: 1219.08
mean_ly_n16d_2017: 1117.05
day_1_2017: 1092.73
mean_ly_7_2017: 1060.01
mean_ly8w_dow6_2017: 1024.82
mean_182_2017: 980.24
mean_364_2017: 974.89
mean_ly_30_2017: 890.55
mean_ly_21_2017: 856.75
mean_ly_14_2017: 844.20
promo_14_2017: 837.89
promo_140_2017: 732.93
promo_60_2017: 559.98
promo_7: 375.47
promo_3: 211.18
promo_2: 133.95
promo_14: 117.34
promo_5: 100.79
promo_0: 72.69
promo_9: 66.30
promo_1: 51.29
promo_8: 48.22
promo_4: 36.74
promo_11: 30.61
promo_12: 13.44
promo_13: 13.34
promo_10: 12.71
promo_15: 6.55 


[500]	training's l2: 0.230422


2018-01-03 10:38:17,896 __main__ 8 [INFO]    [<module>] Step 8 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.273501
[200]	training's l2: 0.256344
[300]	training's l2: 0.24238
[400]	training's l2: 0.23042


2018-01-03 10:38:20,556 __main__ 39 [INFO]    [<module>] mean_14_2017: 49878.49
mean_60_2017: 22285.90
mean_42_2017: 18760.62
mean_30_2017: 15242.42
mean_21_2017: 10751.45
mean_20_dow0_2017: 10435.49
mean_7_2017: 7304.50
promo_7: 6979.08
mean_91_2017: 3898.22
mean_4_dow0_2017: 3165.10
mean_52_dow0_2017: 2535.24
mean_3_2017: 2273.01
mean_182_2017: 1786.00
day_1_2017: 1458.06
mean_ly3w_dow0_2017: 1446.25
mean_140_2017: 1321.91
mean_ly8w_dow0_2017: 1311.54
mean_364_2017: 1071.45
mean_ly_n16d_2017: 1033.80
mean_ly_7_2017: 919.98
promo_140_2017: 853.40
promo_14_2017: 843.92
mean_ly_30_2017: 842.48
mean_ly_14_2017: 823.43
mean_ly_21_2017: 721.78
promo_60_2017: 669.56
promo_0: 440.41
promo_14: 312.38
promo_9: 113.36
promo_6: 87.80
promo_5: 85.46
promo_2: 60.90
promo_8: 54.56
promo_15: 40.31
promo_3: 34.93
promo_1: 29.48
promo_4: 29.09
promo_13: 18.55
promo_11: 16.93
promo_10: 16.48
promo_12: 12.56 


[500]	training's l2: 0.219572


2018-01-03 10:38:20,686 __main__ 8 [INFO]    [<module>] Step 9 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.285005
[200]	training's l2: 0.266483
[300]	training's l2: 0.251543
[400]	training's l2: 0.238985


2018-01-03 10:38:23,247 __main__ 39 [INFO]    [<module>] mean_30_2017: 34459.86
mean_60_2017: 22329.47
mean_7_2017: 15002.82
mean_21_2017: 14617.81
mean_42_2017: 9366.01
mean_14_2017: 6796.03
mean_20_dow1_2017: 5351.46
mean_4_dow1_2017: 3400.49
promo_8: 3161.26
mean_52_dow1_2017: 2852.24
mean_3_2017: 1785.51
mean_182_2017: 1383.71
mean_91_2017: 1272.11
mean_ly_n16d_2017: 1089.32
mean_364_2017: 1074.63
mean_ly8w_dow1_2017: 1069.72
mean_ly3w_dow1_2017: 1020.16
mean_ly_30_2017: 955.12
mean_ly_7_2017: 945.27
day_1_2017: 912.56
mean_ly_14_2017: 901.42
mean_140_2017: 859.03
mean_ly_21_2017: 835.46
promo_14_2017: 728.94
promo_140_2017: 691.40
promo_60_2017: 585.91
promo_7: 345.97
promo_14: 163.04
promo_12: 137.62
promo_10: 116.08
promo_9: 82.27
promo_13: 74.11
promo_0: 71.42
promo_3: 42.07
promo_1: 41.90
promo_2: 40.79
promo_15: 35.05
promo_4: 26.76
promo_5: 21.89
promo_11: 12.84
promo_6: 9.10 


[500]	training's l2: 0.228102


2018-01-03 10:38:23,404 __main__ 8 [INFO]    [<module>] Step 10 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.291998
[200]	training's l2: 0.272602
[300]	training's l2: 0.257535
[400]	training's l2: 0.244479


2018-01-03 10:38:26,129 __main__ 39 [INFO]    [<module>] mean_20_dow2_2017: 40733.90
mean_30_2017: 31613.46
mean_60_2017: 19084.22
mean_4_dow2_2017: 10267.81
mean_7_2017: 5977.82
mean_42_2017: 5591.45
promo_9: 5183.82
mean_21_2017: 4770.50
mean_14_2017: 4582.84
mean_52_dow2_2017: 2583.12
mean_3_2017: 1658.92
mean_ly3w_dow2_2017: 1392.24
mean_182_2017: 1245.52
mean_ly_n16d_2017: 1235.11
day_1_2017: 1190.89
mean_ly_7_2017: 1178.28
mean_91_2017: 1152.41
mean_364_2017: 1119.89
promo_140_2017: 1046.55
mean_ly8w_dow2_2017: 1007.75
mean_ly_21_2017: 947.26
mean_ly_14_2017: 924.48
mean_ly_30_2017: 921.78
mean_140_2017: 851.87
promo_14_2017: 792.69
promo_60_2017: 728.78
promo_14: 415.06
promo_2: 378.88
promo_12: 225.59
promo_10: 211.45
promo_7: 196.83
promo_0: 57.69
promo_11: 56.83
promo_13: 52.38
promo_8: 36.75
promo_4: 24.11
promo_3: 11.82
promo_15: 11.39
promo_6: 10.59
promo_1: 9.48
promo_5: 4.57 


[500]	training's l2: 0.232828


2018-01-03 10:38:26,260 __main__ 8 [INFO]    [<module>] Step 11 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.282648
[200]	training's l2: 0.263693
[300]	training's l2: 0.24903
[400]	training's l2: 0.236334


2018-01-03 10:38:29,064 __main__ 39 [INFO]    [<module>] mean_42_2017: 41872.44
mean_60_2017: 17877.70
mean_21_2017: 17402.55
mean_30_2017: 16233.86
mean_20_dow3_2017: 7381.76
mean_14_2017: 6719.96
mean_7_2017: 4950.31
mean_52_dow3_2017: 3624.21
promo_10: 3230.55
mean_3_2017: 2347.69
mean_4_dow3_2017: 1973.50
mean_ly8w_dow3_2017: 1307.23
mean_ly_n16d_2017: 1235.37
mean_ly3w_dow3_2017: 1199.14
mean_ly_7_2017: 1163.14
mean_91_2017: 1093.60
mean_364_2017: 1001.00
mean_ly_21_2017: 954.86
mean_182_2017: 938.59
day_1_2017: 910.11
mean_ly_30_2017: 867.31
mean_140_2017: 857.97
promo_140_2017: 780.49
mean_ly_14_2017: 777.97
promo_14_2017: 663.88
promo_14: 500.11
promo_60_2017: 477.69
promo_7: 178.20
promo_13: 161.63
promo_12: 130.18
promo_9: 127.66
promo_8: 84.21
promo_11: 65.25
promo_2: 46.09
promo_15: 44.57
promo_1: 39.77
promo_0: 38.36
promo_5: 36.00
promo_6: 29.34
promo_4: 28.32
promo_3: 11.61 


[500]	training's l2: 0.225


2018-01-03 10:38:29,182 __main__ 8 [INFO]    [<module>] Step 12 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.212991
[200]	training's l2: 0.198834
[300]	training's l2: 0.187358
[400]	training's l2: 0.177424


2018-01-03 10:38:31,950 __main__ 39 [INFO]    [<module>] mean_42_2017: 23070.63
mean_20_dow4_2017: 12735.28
mean_60_2017: 8658.19
mean_14_2017: 5236.90
mean_30_2017: 4209.93
mean_52_dow4_2017: 4059.51
promo_11: 2931.01
mean_7_2017: 2332.07
mean_21_2017: 2301.98
mean_4_dow4_2017: 1569.12
mean_3_2017: 1394.31
mean_182_2017: 1030.84
mean_ly8w_dow4_2017: 1008.74
mean_ly_n16d_2017: 957.95
mean_ly_7_2017: 889.38
mean_91_2017: 887.58
mean_364_2017: 828.14
mean_140_2017: 793.31
mean_ly3w_dow4_2017: 790.38
mean_ly_14_2017: 767.45
day_1_2017: 753.11
mean_ly_30_2017: 733.44
mean_ly_21_2017: 628.56
promo_140_2017: 540.43
promo_60_2017: 383.42
promo_14: 331.05
promo_14_2017: 295.95
promo_10: 185.09
promo_12: 169.06
promo_13: 142.08
promo_9: 91.19
promo_7: 88.97
promo_5: 48.79
promo_8: 43.35
promo_0: 39.63
promo_2: 29.38
promo_15: 22.38
promo_3: 16.66
promo_1: 11.73
promo_6: 3.77
promo_4: 3.38 


[500]	training's l2: 0.168673


2018-01-03 10:38:32,084 __main__ 8 [INFO]    [<module>] Step 13 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.287197
[200]	training's l2: 0.268749
[300]	training's l2: 0.253822
[400]	training's l2: 0.241115


2018-01-03 10:38:34,900 __main__ 39 [INFO]    [<module>] mean_30_2017: 42809.07
mean_42_2017: 25351.35
mean_60_2017: 23844.02
mean_7_2017: 9839.19
mean_14_2017: 7922.48
mean_20_dow5_2017: 3447.87
mean_21_2017: 3437.41
promo_12: 3075.03
mean_52_dow5_2017: 2859.19
mean_4_dow5_2017: 2417.39
mean_3_2017: 1957.95
mean_182_2017: 1780.31
mean_91_2017: 1538.47
mean_364_2017: 1202.14
mean_ly_n16d_2017: 1179.03
mean_140_2017: 1154.13
mean_ly8w_dow5_2017: 1102.59
day_1_2017: 1036.12
mean_ly3w_dow5_2017: 1035.42
mean_ly_7_2017: 999.65
mean_ly_30_2017: 994.80
mean_ly_14_2017: 928.27
mean_ly_21_2017: 772.73
promo_140_2017: 664.62
promo_14_2017: 558.02
promo_60_2017: 544.65
promo_14: 516.65
promo_7: 229.18
promo_9: 157.49
promo_8: 112.45
promo_0: 90.33
promo_13: 86.47
promo_15: 83.60
promo_10: 68.69
promo_1: 45.58
promo_2: 44.66
promo_11: 32.67
promo_5: 26.42
promo_6: 17.33
promo_4: 12.54
promo_3: 12.39 


[500]	training's l2: 0.229831


2018-01-03 10:38:35,028 __main__ 8 [INFO]    [<module>] Step 14 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.299622
[200]	training's l2: 0.280202
[300]	training's l2: 0.265032
[400]	training's l2: 0.251701


2018-01-03 10:38:37,659 __main__ 39 [INFO]    [<module>] mean_30_2017: 46712.76
mean_42_2017: 18882.24
mean_21_2017: 12649.13
mean_60_2017: 11882.42
mean_7_2017: 8972.06
mean_14_2017: 4921.50
mean_20_dow6_2017: 4548.82
promo_13: 3391.41
mean_4_dow6_2017: 3111.17
mean_91_2017: 2969.47
mean_3_2017: 2544.76
mean_52_dow6_2017: 2092.35
mean_182_2017: 1911.31
mean_ly3w_dow6_2017: 1436.80
mean_ly8w_dow6_2017: 1374.33
mean_ly_n16d_2017: 1242.65
day_1_2017: 1215.37
mean_364_2017: 1104.05
mean_140_2017: 1048.08
mean_ly_7_2017: 999.79
mean_ly_30_2017: 972.84
mean_ly_14_2017: 943.95
promo_140_2017: 877.01
mean_ly_21_2017: 762.50
promo_14: 646.70
promo_60_2017: 561.22
promo_14_2017: 560.18
promo_10: 246.29
promo_7: 125.39
promo_12: 105.45
promo_0: 98.81
promo_2: 77.77
promo_9: 62.91
promo_15: 59.78
promo_8: 42.88
promo_1: 41.07
promo_11: 33.49
promo_6: 31.93
promo_5: 24.41
promo_4: 17.17
promo_3: 12.08 


[500]	training's l2: 0.240167


2018-01-03 10:38:37,770 __main__ 8 [INFO]    [<module>] Step 15 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.283642
[200]	training's l2: 0.265499
[300]	training's l2: 0.25171
[400]	training's l2: 0.239234


2018-01-03 10:38:40,563 __main__ 39 [INFO]    [<module>] mean_30_2017: 37073.60
mean_42_2017: 35469.13
mean_20_dow0_2017: 18493.75
mean_60_2017: 16072.86
mean_14_2017: 12756.99
promo_14: 7972.47
mean_21_2017: 6988.28
mean_7_2017: 6374.18
mean_52_dow0_2017: 3364.53
mean_4_dow0_2017: 3277.14
mean_182_2017: 1889.82
mean_ly8w_dow0_2017: 1841.06
mean_3_2017: 1687.32
mean_91_2017: 1676.76
mean_ly_n16d_2017: 1197.42
mean_364_2017: 1168.18
mean_ly_7_2017: 1035.72
mean_ly3w_dow0_2017: 1029.08
day_1_2017: 1024.30
mean_ly_30_2017: 1013.70
mean_140_2017: 999.44
promo_140_2017: 987.71
mean_ly_14_2017: 872.28
promo_14_2017: 744.61
mean_ly_21_2017: 676.82
promo_60_2017: 616.92
promo_7: 403.77
promo_0: 259.33
promo_2: 121.39
promo_13: 82.53
promo_11: 65.16
promo_12: 50.73
promo_15: 44.47
promo_8: 43.66
promo_9: 40.41
promo_6: 38.38
promo_5: 22.01
promo_3: 21.83
promo_10: 20.87
promo_4: 16.56
promo_1: 13.82 


[500]	training's l2: 0.228273


2018-01-03 10:38:40,703 __main__ 8 [INFO]    [<module>] Step 16 


Training until validation scores don't improve for 50 rounds.
[100]	training's l2: 0.296819
[200]	training's l2: 0.277209
[300]	training's l2: 0.261781
[400]	training's l2: 0.248316


2018-01-03 10:38:43,356 __main__ 39 [INFO]    [<module>] mean_42_2017: 40183.06
mean_60_2017: 20926.77
mean_30_2017: 16678.11
mean_21_2017: 14761.16
mean_14_2017: 6517.70
mean_20_dow1_2017: 5751.20
promo_15: 3073.32
mean_4_dow1_2017: 2922.21
mean_52_dow1_2017: 2817.26
mean_7_2017: 1893.45
mean_182_2017: 1547.65
mean_3_2017: 1522.08
mean_91_2017: 1371.43
mean_ly_n16d_2017: 1229.06
mean_ly8w_dow1_2017: 1208.81
day_1_2017: 1136.48
mean_ly3w_dow1_2017: 1101.43
mean_364_2017: 1056.00
mean_140_2017: 1050.92
mean_ly_14_2017: 975.85
mean_ly_7_2017: 971.85
mean_ly_30_2017: 895.01
promo_140_2017: 839.96
mean_ly_21_2017: 832.27
promo_60_2017: 572.51
promo_14: 570.51
promo_14_2017: 508.03
promo_0: 116.50
promo_7: 100.77
promo_10: 71.18
promo_9: 64.29
promo_2: 50.72
promo_13: 49.99
promo_12: 36.39
promo_11: 32.87
promo_5: 29.06
promo_8: 27.71
promo_6: 22.12
promo_3: 14.96
promo_4: 12.26
promo_1: 11.86 


[500]	training's l2: 0.236348


187

In [32]:

logger.info('Making submission...')

X_test_out.reset_index(inplace = True)
del X_test_out["date"]
X_test_out = X_test_out.set_index(["store_nbr", "item_nbr"])

y_test = np.array(test_pred).transpose()
df_preds = pd.DataFrame(
    y_test, index=X_test_out.index,
    columns=pd.date_range("2017-08-16", periods=16)
).stack().to_frame("unit_sales")


2018-01-03 10:54:06,461 __main__ 2 [INFO]    [<module>] Making submission... 
2018-01-03 10:54:06,461 __main__ 2 [INFO]    [<module>] Making submission... 
2018-01-03 10:54:06,461 __main__ 2 [INFO]    [<module>] Making submission... 


In [33]:
df_preds.tail(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,unit_sales
store_nbr,item_nbr,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2127114,2017-08-31,0.061506


In [34]:
X_test_out.head(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,index,day_1_2017,mean_140_2017,mean_14_2017,mean_182_2017,mean_21_2017,mean_30_2017,mean_364_2017,mean_3_2017,mean_42_2017,...,promo_6,promo_7,promo_8,promo_9,promo_10,promo_11,promo_12,promo_13,promo_14,promo_15
store_nbr,item_nbr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,96995,0,0.0,0.153952,0.334438,0.118424,0.275274,0.275522,0.059212,0.0,0.196801,...,0,0,0,0,0,0,0,0,0,0


In [37]:

df_preds.index.set_names(["store_nbr", "item_nbr", "date"], inplace=True)

submission = df_test[["id"]].join(df_preds, how="left").fillna(0)

submission["unit_sales"] = np.clip(np.expm1(submission["unit_sales"]), 0, 1000)
submission.to_csv('../submit/T040_tmp.csv', float_format='%.4f', index=None)

# PZ, Check overral result
print("SUM =",  submission.unit_sales.sum())
print("MEAN =",  submission.unit_sales.mean())

##########################################################################
df_prev = submission

df_sub = pd.read_csv('../input/sub_zero_30d.csv')

t_new = pd.merge(df_prev, df_sub, on=['id'], how='left')
t_new['unit_sales'] = t_new.unit_sales_y.combine_first(t_new.unit_sales_x)

submission = t_new[['id', 'unit_sales']]
del t_new

print("Merged  SUM =",  submission.unit_sales.sum())
print("Merged  MEAN =",  submission.unit_sales.mean())

submission.to_csv('../submit/T040_split.csv.gz',
                  float_format='%.4f', index=None, compression='gzip')


SUM = 3325.190447266003
MEAN = 0.043304080733275206
Merged  SUM = 3321.2397056780615
Merged  MEAN = 0.04325263007641999
