In [58]:
import pandas as pd
import numpy as np
import lightgbm as lgb

In [59]:
data = pd.read_csv('train.csv', parse_dates=['Date'])
stores = pd.read_csv('store.csv')
store_features = stores[['Store', 'StoreType', 'Assortment', 'CompetitionDistance', 'Promo2']]

data = data.merge(store_features.set_index('Store'), on='Store', how='left')
data = data.loc[data.Open == 1].drop(columns=['Open'])
data.head(5)

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,Store,DayOfWeek,Date,Sales,Customers,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,Promo2
0,1,5,2015-07-31,5263,555,1,0,1,c,a,1270.0,0
1,2,5,2015-07-31,6064,625,1,0,1,a,a,570.0,1
2,3,5,2015-07-31,8314,821,1,0,1,a,a,14130.0,1
3,4,5,2015-07-31,13995,1498,1,0,1,c,c,620.0,0
4,5,5,2015-07-31,4822,559,1,0,1,a,a,29910.0,0


In [60]:
test_border = '2015-06-17'
val_border = '2015-05-01'

test = data.loc[data.Date >= test_border].drop(columns=['Customers'])

In [61]:
data['mean_sales'] = data.groupby('Store').Sales.transform('mean')
data['std_sales'] = data.groupby('Store').Sales.transform('std')
data['max_sales'] = data.groupby('Store').Sales.transform('max')
data['min_sales'] = data.groupby('Store').Sales.transform('min')

data['mean_customers'] = data.groupby('Store').Customers.transform('mean')
data['std_customers'] = data.groupby('Store').Customers.transform('std')
data['max_customers'] = data.groupby('Store').Customers.transform('max')
data['min_customers'] = data.groupby('Store').Customers.transform('min')

data['month'] = data.Date.dt.month
data['months_from_start'] = 12 * (data.Date.dt.year - data.Date.min().year) +\
                            (data.Date.dt.month - data.Date.min().month)

data = data.drop(columns=['Customers'])

data[['DayOfWeek', 'Promo', 'StateHoliday', 'SchoolHoliday', 'StoreType', 'Assortment', 'Promo2', 'month']] =\
data[['DayOfWeek', 'Promo', 'StateHoliday', 'SchoolHoliday', 'StoreType', 'Assortment', 'Promo2', 'month']].astype('category')

In [62]:
data = data.loc[data.Sales != 0]
validation = data.loc[(data.Date > val_border) & (data.Date < test_border)]
train = data.loc[data.Date <= val_border]

In [63]:
validation

Unnamed: 0,Store,DayOfWeek,Date,Sales,Promo,StateHoliday,SchoolHoliday,StoreType,Assortment,CompetitionDistance,...,mean_sales,std_sales,max_sales,min_sales,mean_customers,std_customers,max_customers,min_customers,month,months_from_start
50175,1,2,2015-06-16,4852,1,0,0,c,a,1270.0,...,4759.096031,1012.106393,9528,2362,564.049936,93.707476,1130,298,6,29
50176,2,2,2015-06-16,6243,1,0,0,a,a,570.0,...,4953.900510,1610.149102,10682,1919,583.998724,155.374483,1164,230,6,29
50177,3,2,2015-06-16,9780,1,0,0,a,a,14130.0,...,6942.568678,2193.383804,15689,2936,750.077022,170.280290,1579,381,6,29
50178,4,2,2015-06-16,10671,1,0,0,c,c,620.0,...,9638.401786,1936.031881,17412,5869,1321.752551,198.347844,2216,856,6,29
50179,5,2,2015-06-16,6354,1,0,0,a,a,29910.0,...,4676.274711,1765.745628,11692,1423,537.340180,165.604598,1081,180,6,29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101460,1111,6,2015-05-02,3528,0,0,0,a,a,1900.0,...,5251.702182,1667.623037,12492,1773,451.711168,117.418078,893,157,5,28
101461,1112,6,2015-05-02,9803,0,0,0,c,c,1880.0,...,10236.577664,3334.921787,25165,4895,838.608472,191.043702,1661,465,5,28
101462,1113,6,2015-05-02,8037,0,0,0,a,c,9260.0,...,6627.859694,1456.489302,16115,4050,717.029337,124.544792,1642,474,5,28
101463,1114,6,2015-05-02,31445,0,0,0,a,c,870.0,...,20666.562500,3452.938601,35697,8880,3200.946429,441.865226,4911,1160,5,28


In [81]:
def RMSPE(X: np.ndarray, Y: np.ndarray):
    return np.sqrt(np.square(((Y - X) / Y)).mean())

def make_feval(func, name, higher_better):
    def feval(x, y):
        return name, func(x, y.get_label()), higher_better
    return feval

In [91]:
eval_set = lgb.Dataset(validation.drop(columns=['Sales', 'Date']), validation['Sales'])
train_set = lgb.Dataset(train.drop(columns=['Sales', 'Date']), train['Sales'])
model = lgb.train({'objective': 'poisson',
                   'num_iterations': 5000,
                   'learning_rate': 0.05,
                   'early_stopping_rounds': 30},
                  train_set=train_set, valid_sets=[eval_set], feval=make_feval(RMSPE, 'RMSPE', False))

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2617
[LightGBM] [Info] Number of data points in the train set: 759868, number of used features: 19
[LightGBM] [Info] Start training from score 8.843374
[1]	valid_0's poisson: -58522.8	valid_0's RMSPE: 0.511084
Training until validation scores don't improve for 30 rounds
[2]	valid_0's poisson: -58547.5	valid_0's RMSPE: 0.501647
[3]	valid_0's poisson: -58570.8	valid_0's RMSPE: 0.492626
[4]	valid_0's poisson: -58592.4	valid_0's RMSPE: 0.483729
[5]	valid_0's poisson: -58612.9	valid_0's RMSPE: 0.475074
[6]	valid_0's poisson: -58632	valid_0's RMSPE: 0.466697
[7]	valid_0's poisson: -58650	valid_0's RMSPE: 0.458571
[8]	valid_0's poisson: -58667.2	valid_0's RMSPE: 0.450569
[9]	valid_0's poisson: -58683.2	valid_0's RMSPE: 0.442849
[10]	valid_0's poisson: -58698.5	valid_0's RMSPE: 0.435343
[11]	valid_0's poisson: -58712.9	valid_0's RMSPE: 0.428069


[142]	valid_0's poisson: -59005.6	valid_0's RMSPE: 0.189983
[143]	valid_0's poisson: -59006.1	valid_0's RMSPE: 0.189097
[144]	valid_0's poisson: -59006.3	valid_0's RMSPE: 0.188842
[145]	valid_0's poisson: -59006.6	valid_0's RMSPE: 0.188256
[146]	valid_0's poisson: -59006.9	valid_0's RMSPE: 0.187904
[147]	valid_0's poisson: -59007.2	valid_0's RMSPE: 0.187397
[148]	valid_0's poisson: -59007.4	valid_0's RMSPE: 0.187072
[149]	valid_0's poisson: -59007.7	valid_0's RMSPE: 0.18661
[150]	valid_0's poisson: -59007.9	valid_0's RMSPE: 0.186376
[151]	valid_0's poisson: -59008.1	valid_0's RMSPE: 0.1863
[152]	valid_0's poisson: -59008.3	valid_0's RMSPE: 0.185878
[153]	valid_0's poisson: -59008.7	valid_0's RMSPE: 0.185253
[154]	valid_0's poisson: -59008.9	valid_0's RMSPE: 0.18495
[155]	valid_0's poisson: -59009.3	valid_0's RMSPE: 0.184222
[156]	valid_0's poisson: -59009.5	valid_0's RMSPE: 0.183983
[157]	valid_0's poisson: -59009.7	valid_0's RMSPE: 0.183902
[158]	valid_0's poisson: -59009.9	valid_0's 

[291]	valid_0's poisson: -59020.6	valid_0's RMSPE: 0.172074
[292]	valid_0's poisson: -59020.7	valid_0's RMSPE: 0.172076
[293]	valid_0's poisson: -59020.7	valid_0's RMSPE: 0.172073
[294]	valid_0's poisson: -59020.7	valid_0's RMSPE: 0.171992
[295]	valid_0's poisson: -59020.7	valid_0's RMSPE: 0.171977
[296]	valid_0's poisson: -59020.8	valid_0's RMSPE: 0.171997
[297]	valid_0's poisson: -59020.8	valid_0's RMSPE: 0.171958
[298]	valid_0's poisson: -59020.8	valid_0's RMSPE: 0.171893
[299]	valid_0's poisson: -59020.9	valid_0's RMSPE: 0.171935
[300]	valid_0's poisson: -59020.9	valid_0's RMSPE: 0.171944
[301]	valid_0's poisson: -59020.9	valid_0's RMSPE: 0.172009
[302]	valid_0's poisson: -59021.1	valid_0's RMSPE: 0.17186
[303]	valid_0's poisson: -59021.1	valid_0's RMSPE: 0.171924
[304]	valid_0's poisson: -59021.1	valid_0's RMSPE: 0.171932
[305]	valid_0's poisson: -59021.2	valid_0's RMSPE: 0.171929
[306]	valid_0's poisson: -59021.2	valid_0's RMSPE: 0.171951
[307]	valid_0's poisson: -59021.3	valid_0

[439]	valid_0's poisson: -59026.7	valid_0's RMSPE: 0.164874
[440]	valid_0's poisson: -59026.8	valid_0's RMSPE: 0.164757
[441]	valid_0's poisson: -59026.9	valid_0's RMSPE: 0.164717
[442]	valid_0's poisson: -59027	valid_0's RMSPE: 0.164595
[443]	valid_0's poisson: -59027	valid_0's RMSPE: 0.164475
[444]	valid_0's poisson: -59027.1	valid_0's RMSPE: 0.164448
[445]	valid_0's poisson: -59027.2	valid_0's RMSPE: 0.164261
[446]	valid_0's poisson: -59027.2	valid_0's RMSPE: 0.164035
[447]	valid_0's poisson: -59027.2	valid_0's RMSPE: 0.164035
[448]	valid_0's poisson: -59027.3	valid_0's RMSPE: 0.163924
[449]	valid_0's poisson: -59027.3	valid_0's RMSPE: 0.1639
[450]	valid_0's poisson: -59027.4	valid_0's RMSPE: 0.163854
[451]	valid_0's poisson: -59027.4	valid_0's RMSPE: 0.163855
[452]	valid_0's poisson: -59027.4	valid_0's RMSPE: 0.163828
[453]	valid_0's poisson: -59027.5	valid_0's RMSPE: 0.163748
[454]	valid_0's poisson: -59027.5	valid_0's RMSPE: 0.163748
[455]	valid_0's poisson: -59027.5	valid_0's RM

[585]	valid_0's poisson: -59031.2	valid_0's RMSPE: 0.155629
[586]	valid_0's poisson: -59031.2	valid_0's RMSPE: 0.155604
[587]	valid_0's poisson: -59031.2	valid_0's RMSPE: 0.155598
[588]	valid_0's poisson: -59031.2	valid_0's RMSPE: 0.155596
[589]	valid_0's poisson: -59031.2	valid_0's RMSPE: 0.155569
[590]	valid_0's poisson: -59031.2	valid_0's RMSPE: 0.15557
[591]	valid_0's poisson: -59031.2	valid_0's RMSPE: 0.155559
[592]	valid_0's poisson: -59031.2	valid_0's RMSPE: 0.15555
[593]	valid_0's poisson: -59031.3	valid_0's RMSPE: 0.155454
[594]	valid_0's poisson: -59031.3	valid_0's RMSPE: 0.15545
[595]	valid_0's poisson: -59031.3	valid_0's RMSPE: 0.155451
[596]	valid_0's poisson: -59031.3	valid_0's RMSPE: 0.155431
[597]	valid_0's poisson: -59031.3	valid_0's RMSPE: 0.155413
[598]	valid_0's poisson: -59031.3	valid_0's RMSPE: 0.155414
[599]	valid_0's poisson: -59031.3	valid_0's RMSPE: 0.155412
[600]	valid_0's poisson: -59031.4	valid_0's RMSPE: 0.155398
[601]	valid_0's poisson: -59031.4	valid_0's

[737]	valid_0's poisson: -59033.9	valid_0's RMSPE: 0.150621
[738]	valid_0's poisson: -59034	valid_0's RMSPE: 0.150434
[739]	valid_0's poisson: -59034	valid_0's RMSPE: 0.150422
[740]	valid_0's poisson: -59034	valid_0's RMSPE: 0.150252
[741]	valid_0's poisson: -59034	valid_0's RMSPE: 0.150136
[742]	valid_0's poisson: -59034	valid_0's RMSPE: 0.150095
[743]	valid_0's poisson: -59034.1	valid_0's RMSPE: 0.149938
[744]	valid_0's poisson: -59034.1	valid_0's RMSPE: 0.14993
[745]	valid_0's poisson: -59034.1	valid_0's RMSPE: 0.149761
[746]	valid_0's poisson: -59034.1	valid_0's RMSPE: 0.149728
[747]	valid_0's poisson: -59034.1	valid_0's RMSPE: 0.149719
[748]	valid_0's poisson: -59034.1	valid_0's RMSPE: 0.149714
[749]	valid_0's poisson: -59034.1	valid_0's RMSPE: 0.149715
[750]	valid_0's poisson: -59034.1	valid_0's RMSPE: 0.149605
[751]	valid_0's poisson: -59034.2	valid_0's RMSPE: 0.14957
[752]	valid_0's poisson: -59034.2	valid_0's RMSPE: 0.149432
[753]	valid_0's poisson: -59034.1	valid_0's RMSPE: 0

[889]	valid_0's poisson: -59035.6	valid_0's RMSPE: 0.144138
[890]	valid_0's poisson: -59035.7	valid_0's RMSPE: 0.14411
[891]	valid_0's poisson: -59035.7	valid_0's RMSPE: 0.144068
[892]	valid_0's poisson: -59035.7	valid_0's RMSPE: 0.144074
[893]	valid_0's poisson: -59035.7	valid_0's RMSPE: 0.14405
[894]	valid_0's poisson: -59035.7	valid_0's RMSPE: 0.144055
[895]	valid_0's poisson: -59035.8	valid_0's RMSPE: 0.144009
[896]	valid_0's poisson: -59035.8	valid_0's RMSPE: 0.143916
[897]	valid_0's poisson: -59035.8	valid_0's RMSPE: 0.143883
[898]	valid_0's poisson: -59035.8	valid_0's RMSPE: 0.143876
[899]	valid_0's poisson: -59035.8	valid_0's RMSPE: 0.143824
[900]	valid_0's poisson: -59035.8	valid_0's RMSPE: 0.143829
[901]	valid_0's poisson: -59035.9	valid_0's RMSPE: 0.143783
[902]	valid_0's poisson: -59035.9	valid_0's RMSPE: 0.143712
[903]	valid_0's poisson: -59035.9	valid_0's RMSPE: 0.143706
[904]	valid_0's poisson: -59035.9	valid_0's RMSPE: 0.14373
[905]	valid_0's poisson: -59035.9	valid_0's

[1036]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141503
[1037]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141491
[1038]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141485
[1039]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141458
[1040]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141456
[1041]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141445
[1042]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141436
[1043]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141437
[1044]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141435
[1045]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141434
[1046]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141427
[1047]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141423
[1048]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141426
[1049]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141359
[1050]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141355
[1051]	valid_0's poisson: -59037.3	valid_0's RMSPE: 0.141352
[1052]	valid_0's poisson

[1173]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139804
[1174]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139804
[1175]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139796
[1176]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139798
[1177]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139794
[1178]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139789
[1179]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139776
[1180]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139777
[1181]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139751
[1182]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139743
[1183]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139747
[1184]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139742
[1185]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139738
[1186]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139737
[1187]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139732
[1188]	valid_0's poisson: -59038.3	valid_0's RMSPE: 0.139708
[1189]	valid_0's poisson

[1312]	valid_0's poisson: -59039.7	valid_0's RMSPE: 0.137667
[1313]	valid_0's poisson: -59039.7	valid_0's RMSPE: 0.137615
[1314]	valid_0's poisson: -59039.7	valid_0's RMSPE: 0.137609
[1315]	valid_0's poisson: -59039.6	valid_0's RMSPE: 0.137645
[1316]	valid_0's poisson: -59039.6	valid_0's RMSPE: 0.137644
[1317]	valid_0's poisson: -59039.6	valid_0's RMSPE: 0.137643
[1318]	valid_0's poisson: -59039.6	valid_0's RMSPE: 0.13764
[1319]	valid_0's poisson: -59039.6	valid_0's RMSPE: 0.137629
[1320]	valid_0's poisson: -59039.6	valid_0's RMSPE: 0.137633
[1321]	valid_0's poisson: -59039.6	valid_0's RMSPE: 0.137616
[1322]	valid_0's poisson: -59039.6	valid_0's RMSPE: 0.137615
Early stopping, best iteration is:
[1292]	valid_0's poisson: -59039.7	valid_0's RMSPE: 0.137806


In [92]:
RMSPE(model.predict(validation.drop(columns=['Sales', 'Date'])), validation['Sales'])

0.13780550562380697

In [29]:
validation = validation.loc[validation.Sales != 0]