In [1]:
import gc
import pandas as pd
import numpy as np
import os

from tqdm import tqdm_notebook

pd.set_option('display.max_columns', 100)

from utils import loadpkl, to_feature, line_notify
from utils import COLS_TEST1, COLS_TEST2, DAYS_PRED

%matplotlib inline

In [2]:
# load pkls
df_sales = loadpkl('../feats/sales.pkl')
df_calendar = loadpkl('../feats/calendar.pkl')
df_sell_prices = loadpkl('../feats/sell_prices.pkl')
df_sub = pd.read_csv('../input/sample_submission.csv')

In [5]:
# split test data
sub['is_test1']=sub['id'].apply(lambda x: True if '_validation' in x else False)
sub['is_test2']=sub['id'].apply(lambda x: True if '_evaluation' in x else False)

test1 = sub[sub['is_test1']]
test2 = sub[sub['is_test2']]

del sub
gc.collect()

# drop flags
test1.drop(['is_test1','is_test2'],axis=1,inplace=True)
test2.drop(['is_test1','is_test2'],axis=1,inplace=True)

# change column name
test1.columns = ['id']+COLS_TEST1
test2.columns = ['id']+COLS_TEST2

# change id
test2['id'] = test2['id'].str.replace('_evaluation','_validation')

# merge
df = df.merge(test1,on='id',how='left')
df = df.merge(test2,on='id',how='left')

del test1, test2
gc.collect()

63

In [7]:
id_vars = ['id','item_id','dept_id','cat_id','store_id','state_id']
df = pd.melt(df,id_vars=id_vars,var_name='d',value_name='demand')

In [5]:
df_sales

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d,demand,demand_shift_7,demand_shift_28,demand_mean_7_7,demand_mean_7_28,demand_mean_28_7,demand_mean_28_28
7,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_1,12,,,,,,
8,HOBBIES_1_009_CA_1_validation,HOBBIES_1_009,HOBBIES_1,HOBBIES,CA_1,CA,d_1,2,,,,,,
14,HOBBIES_1_015_CA_1_validation,HOBBIES_1_015,HOBBIES_1,HOBBIES,CA_1,CA,d_1,4,,,,,,
15,HOBBIES_1_016_CA_1_validation,HOBBIES_1_016,HOBBIES_1,HOBBIES,CA_1,CA,d_1,5,,,,,,
21,HOBBIES_1_022_CA_1_validation,HOBBIES_1_022,HOBBIES_1,HOBBIES,CA_1,CA,d_1,2,,,,,,
22,HOBBIES_1_023_CA_1_validation,HOBBIES_1_023,HOBBIES_1,HOBBIES,CA_1,CA,d_1,2,,,,,,
28,HOBBIES_1_029_CA_1_validation,HOBBIES_1_029,HOBBIES_1,HOBBIES,CA_1,CA,d_1,2,,,,,,
31,HOBBIES_1_032_CA_1_validation,HOBBIES_1_032,HOBBIES_1,HOBBIES,CA_1,CA,d_1,9,,,,,,
35,HOBBIES_1_036_CA_1_validation,HOBBIES_1_036,HOBBIES_1,HOBBIES,CA_1,CA,d_1,2,,,,,,
43,HOBBIES_1_044_CA_1_validation,HOBBIES_1_044,HOBBIES_1,HOBBIES,CA_1,CA,d_1,3,,,,,,


In [11]:
df_sales.id.value_counts()

HOBBIES_1_278_WI_2_validation      1969
FOODS_2_063_CA_1_validation        1969
FOODS_3_092_TX_2_validation        1969
HOUSEHOLD_1_375_WI_1_validation    1969
FOODS_3_485_CA_2_validation        1969
FOODS_2_244_CA_4_validation        1969
FOODS_3_498_TX_1_validation        1969
HOUSEHOLD_1_525_TX_1_validation    1969
FOODS_1_106_TX_2_validation        1969
FOODS_3_295_WI_3_validation        1969
FOODS_3_594_CA_2_validation        1969
HOUSEHOLD_2_074_CA_1_validation    1969
HOUSEHOLD_1_016_WI_2_validation    1969
HOBBIES_1_350_CA_2_validation      1969
HOBBIES_1_355_CA_2_validation      1969
HOBBIES_2_120_WI_2_validation      1969
HOUSEHOLD_1_196_TX_3_validation    1969
HOUSEHOLD_1_525_WI_3_validation    1969
HOUSEHOLD_1_331_WI_3_validation    1969
FOODS_3_280_WI_3_validation        1969
FOODS_2_154_TX_1_validation        1969
FOODS_3_480_CA_3_validation        1969
FOODS_3_468_TX_2_validation        1969
FOODS_2_150_TX_2_validation        1969
HOUSEHOLD_1_018_WI_3_validation    1969


In [9]:
df_sub.head()

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,HOBBIES_1_002_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,HOBBIES_1_003_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,HOBBIES_1_004_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,HOBBIES_1_005_CA_1_validation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [3]:
df_calendar

Unnamed: 0,date,wm_yr_wk,wday,month,year,d,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI,seasonality,day
0,2011-01-29,11101,1,1,2011,d_1,,,,,0,0,0,-0.878612,29
1,2011-01-30,11101,2,1,2011,d_2,,,,,0,0,0,-0.870285,30
2,2011-01-31,11101,3,1,2011,d_3,,,,,0,0,0,-0.861702,31
3,2011-02-01,11101,4,2,2011,d_4,,,,,1,1,0,-0.852864,1
4,2011-02-02,11101,5,2,2011,d_5,,,,,1,0,1,-0.843776,2
5,2011-02-03,11101,6,2,2011,d_6,,,,,1,1,1,-0.834438,3
6,2011-02-04,11101,7,2,2011,d_7,,,,,1,0,0,-0.824855,4
7,2011-02-05,11102,1,2,2011,d_8,,,,,1,1,1,-0.815028,5
8,2011-02-06,11102,2,2,2011,d_9,0.0,0.0,,,1,1,1,-0.804962,6
9,2011-02-07,11102,3,2,2011,d_10,,,,,1,1,0,-0.794658,7


In [5]:
df_sales[df_sales.id=='HOBBIES_1_008_CA_1_validation']

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d,demand,demand_shift_0,demand_shift_1,demand_shift_2,demand_shift_365,demand_mean_7,demand_mean_30,demand_mean_60,demand_mean_90,demand_mean_180,demand_mean_365,demand_std_7,demand_std_30,demand_std_60,demand_std_90,demand_std_180,demand_std_365,demand_skew_7,demand_skew_30,demand_skew_60,demand_skew_90,demand_skew_180,demand_skew_365,demand_kurt_7,demand_kurt_30,demand_kurt_60,demand_kurt_90,demand_kurt_180,demand_kurt_365,demand_max_7,demand_max_30,demand_max_60,demand_max_90,demand_max_180,demand_max_365,demand_min_7,demand_min_30,demand_min_60,demand_min_90,demand_min_180,demand_min_365
7,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_1,12,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
30497,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_2,15,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
60987,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_3,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
91477,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_4,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
121967,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_5,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
152457,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_6,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
182947,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_7,6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
213437,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_8,5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
243927,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_9,7,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
274417,HOBBIES_1_008_CA_1_validation,HOBBIES_1_008,HOBBIES_1,HOBBIES,CA_1,CA,d_10,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [5]:
220000/6

36666.666666666664

In [8]:
2**6

64