In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import *
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from chinese_calendar import is_workday, is_holiday
from lightgbm import LGBMRegressor
import matplotlib.pyplot as plt
import lightgbm as lgb
import copy 
from tqdm.notebook import tqdm

In [2]:
train = pd.read_csv('./data/Dataset/demand_train_B.csv')
test=pd.read_csv('./data/Dataset/demand_test_B.csv')
geo_topo=pd.read_csv('./data/Dataset/geo_topo.csv')
product_topo=pd.read_csv('./data/Dataset/product_topo.csv')

In [3]:
data=pd.concat([train,test])
data['ts']=pd.to_datetime(data['ts'])
data=data.drop(columns=['Unnamed: 0','geography_level','product_level'])
data.columns=['unit','ts','qty','geography_level_3','product_level_2']
data=pd.merge(data,geo_topo,on=['geography_level_3'])
data=pd.merge(data,product_topo,on=['product_level_2'])
data['qty']=data.groupby('unit')['qty'].diff()
data=data.dropna()

In [4]:
for date in pd.date_range('2021-06-08','2021-06-28'):
    append_data=pd.DataFrame(columns=data.columns)
    append_data[['product_level_1','product_level_2','geography_level_1','geography_level_2','geography_level_3','unit']]=data[['product_level_1','product_level_2','geography_level_1','geography_level_2','geography_level_3','unit']].drop_duplicates()
    append_data['ts']=date
    data=pd.concat([data,append_data],ignore_index=True)
data['qty']=data['qty'].astype('float').fillna(0)
data=data.sort_values(['product_level_1','product_level_2','geography_level_1','geography_level_2','geography_level_3','unit','ts'])

In [5]:
data.tail()

Unnamed: 0,unit,ts,qty,geography_level_3,product_level_2,geography_level_1,geography_level_2,product_level_1
160768,44cb34447237e915874421aa2b1352ed,2021-06-24,0.0,79fd843dc0fb0d570ec0f9415cbc989d,be55b932f63b0feda2a190154290d4aa,d96995f1cfbc7e09a519c3c72f4d5920,aa8f3faa64ff47b0ce9a76682bde924c,b64343042e19b42fcbf7f2ed285dc67d
161063,44cb34447237e915874421aa2b1352ed,2021-06-25,0.0,79fd843dc0fb0d570ec0f9415cbc989d,be55b932f63b0feda2a190154290d4aa,d96995f1cfbc7e09a519c3c72f4d5920,aa8f3faa64ff47b0ce9a76682bde924c,b64343042e19b42fcbf7f2ed285dc67d
161358,44cb34447237e915874421aa2b1352ed,2021-06-26,0.0,79fd843dc0fb0d570ec0f9415cbc989d,be55b932f63b0feda2a190154290d4aa,d96995f1cfbc7e09a519c3c72f4d5920,aa8f3faa64ff47b0ce9a76682bde924c,b64343042e19b42fcbf7f2ed285dc67d
161653,44cb34447237e915874421aa2b1352ed,2021-06-27,0.0,79fd843dc0fb0d570ec0f9415cbc989d,be55b932f63b0feda2a190154290d4aa,d96995f1cfbc7e09a519c3c72f4d5920,aa8f3faa64ff47b0ce9a76682bde924c,b64343042e19b42fcbf7f2ed285dc67d
161948,44cb34447237e915874421aa2b1352ed,2021-06-28,0.0,79fd843dc0fb0d570ec0f9415cbc989d,be55b932f63b0feda2a190154290d4aa,d96995f1cfbc7e09a519c3c72f4d5920,aa8f3faa64ff47b0ce9a76682bde924c,b64343042e19b42fcbf7f2ed285dc67d


In [6]:
data['quarter']=data['ts'].apply(lambda x:x.quarter).astype('category')
data['month']=data['ts'].apply(lambda x:x.month).astype('category')
data['weekday']=data['ts'].apply(lambda x:x.weekday()).astype('category')
data['week']=data['ts'].apply(lambda x:x.week).astype('category')
data['day']=data['ts'].apply(lambda x:x.day).astype('category')
data['is_weekend']=data['ts'].apply(lambda x: 1 if x in [5,6] else 0)
data['is_holiday']=data['ts'].apply(is_holiday)

In [7]:
data.head()

Unnamed: 0,unit,ts,qty,geography_level_3,product_level_2,geography_level_1,geography_level_2,product_level_1,quarter,month,weekday,week,day,is_weekend,is_holiday
148746,16621bc8b3475de8b3ada41b598afda7,2020-01-01,-679.333333,ec258532d911270434f328606cba61b6,1807ffa2c1c84035f4346c3364104dd1,3f0cb8b8c238c3b4e08898ce6d449c8d,0ad05831b638ec6879a528bc50abaf20,2eb2930111864beeb409e946751215b1,1,1,2,1,1,0,True
148747,16621bc8b3475de8b3ada41b598afda7,2020-01-02,15.0,ec258532d911270434f328606cba61b6,1807ffa2c1c84035f4346c3364104dd1,3f0cb8b8c238c3b4e08898ce6d449c8d,0ad05831b638ec6879a528bc50abaf20,2eb2930111864beeb409e946751215b1,1,1,3,1,2,0,False
148748,16621bc8b3475de8b3ada41b598afda7,2020-01-03,-73.0,ec258532d911270434f328606cba61b6,1807ffa2c1c84035f4346c3364104dd1,3f0cb8b8c238c3b4e08898ce6d449c8d,0ad05831b638ec6879a528bc50abaf20,2eb2930111864beeb409e946751215b1,1,1,4,1,3,0,False
148749,16621bc8b3475de8b3ada41b598afda7,2020-01-04,0.0,ec258532d911270434f328606cba61b6,1807ffa2c1c84035f4346c3364104dd1,3f0cb8b8c238c3b4e08898ce6d449c8d,0ad05831b638ec6879a528bc50abaf20,2eb2930111864beeb409e946751215b1,1,1,5,1,4,0,True
148750,16621bc8b3475de8b3ada41b598afda7,2020-01-05,2.0,ec258532d911270434f328606cba61b6,1807ffa2c1c84035f4346c3364104dd1,3f0cb8b8c238c3b4e08898ce6d449c8d,0ad05831b638ec6879a528bc50abaf20,2eb2930111864beeb409e946751215b1,1,1,6,1,5,0,True


In [8]:
data_geo_agg= data.set_index(["geography_level_2"])
data_pro_agg= data.set_index(["product_level_1"])
unit_list = list(data.unit.unique())

In [9]:
data_geo_agg

Unnamed: 0_level_0,unit,ts,qty,geography_level_3,product_level_2,geography_level_1,product_level_1,quarter,month,weekday,week,day,is_weekend,is_holiday
geography_level_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0ad05831b638ec6879a528bc50abaf20,16621bc8b3475de8b3ada41b598afda7,2020-01-01,-679.333333,ec258532d911270434f328606cba61b6,1807ffa2c1c84035f4346c3364104dd1,3f0cb8b8c238c3b4e08898ce6d449c8d,2eb2930111864beeb409e946751215b1,1,1,2,1,1,0,True
0ad05831b638ec6879a528bc50abaf20,16621bc8b3475de8b3ada41b598afda7,2020-01-02,15.000000,ec258532d911270434f328606cba61b6,1807ffa2c1c84035f4346c3364104dd1,3f0cb8b8c238c3b4e08898ce6d449c8d,2eb2930111864beeb409e946751215b1,1,1,3,1,2,0,False
0ad05831b638ec6879a528bc50abaf20,16621bc8b3475de8b3ada41b598afda7,2020-01-03,-73.000000,ec258532d911270434f328606cba61b6,1807ffa2c1c84035f4346c3364104dd1,3f0cb8b8c238c3b4e08898ce6d449c8d,2eb2930111864beeb409e946751215b1,1,1,4,1,3,0,False
0ad05831b638ec6879a528bc50abaf20,16621bc8b3475de8b3ada41b598afda7,2020-01-04,0.000000,ec258532d911270434f328606cba61b6,1807ffa2c1c84035f4346c3364104dd1,3f0cb8b8c238c3b4e08898ce6d449c8d,2eb2930111864beeb409e946751215b1,1,1,5,1,4,0,True
0ad05831b638ec6879a528bc50abaf20,16621bc8b3475de8b3ada41b598afda7,2020-01-05,2.000000,ec258532d911270434f328606cba61b6,1807ffa2c1c84035f4346c3364104dd1,3f0cb8b8c238c3b4e08898ce6d449c8d,2eb2930111864beeb409e946751215b1,1,1,6,1,5,0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
aa8f3faa64ff47b0ce9a76682bde924c,44cb34447237e915874421aa2b1352ed,2021-06-24,0.000000,79fd843dc0fb0d570ec0f9415cbc989d,be55b932f63b0feda2a190154290d4aa,d96995f1cfbc7e09a519c3c72f4d5920,b64343042e19b42fcbf7f2ed285dc67d,2,6,3,25,24,0,False
aa8f3faa64ff47b0ce9a76682bde924c,44cb34447237e915874421aa2b1352ed,2021-06-25,0.000000,79fd843dc0fb0d570ec0f9415cbc989d,be55b932f63b0feda2a190154290d4aa,d96995f1cfbc7e09a519c3c72f4d5920,b64343042e19b42fcbf7f2ed285dc67d,2,6,4,25,25,0,False
aa8f3faa64ff47b0ce9a76682bde924c,44cb34447237e915874421aa2b1352ed,2021-06-26,0.000000,79fd843dc0fb0d570ec0f9415cbc989d,be55b932f63b0feda2a190154290d4aa,d96995f1cfbc7e09a519c3c72f4d5920,b64343042e19b42fcbf7f2ed285dc67d,2,6,5,25,26,0,True
aa8f3faa64ff47b0ce9a76682bde924c,44cb34447237e915874421aa2b1352ed,2021-06-27,0.000000,79fd843dc0fb0d570ec0f9415cbc989d,be55b932f63b0feda2a190154290d4aa,d96995f1cfbc7e09a519c3c72f4d5920,b64343042e19b42fcbf7f2ed285dc67d,2,6,6,25,27,0,True


In [10]:
unit_data_dict_geo = dict()
lag = 5
for unit in unit_list:
    geo_lv2_inf = data[data["unit"]==unit].geography_level_2.values[0]
    #pro_lv1_inf = data[data["unit"]==unit].product_level_1.values[0]
    
    data_unit = data[data["unit"]==unit]
    for i in range(21,lag+21):
        data_unit["qty_shift"+str(i)] = data_unit['qty'].shift(i)
    data_unit=data_unit.dropna()
    data_temp = data_geo_agg.loc[geo_lv2_inf,["unit","ts","qty"]].groupby("unit")
    for unit_idx,unit_df in data_temp:
        
        for i in range(21,lag+21):
            unit_df["qty_shift"+str(i)] = unit_df['qty'].shift(i)
        unit_df=unit_df.dropna()
        unit_df.columns = ["unit","ts"]+[name+"_"+unit_idx for name in unit_df.columns[2:]]
        
        if unit!=unit_idx:
            data_unit = pd.merge(data_unit,unit_df.drop(["unit"],axis = 1),how='left',on = ["ts"])
    
    qty_names = data_unit.columns[data_unit.columns.str.startswith("qty")]
    for qty_name in qty_names:
        data_unit[qty_name].fillna(0, inplace=True)
    
    #data_unit.fillna()
    unit_data_dict_geo[unit] = data_unit.sort_values(['ts'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [11]:
params = {
        'objective':'quantile',
        "alpha":0.9,
        'metric': 'mape',
        'num_leaves': 2**8-1,
        "max_depth":8,
        'reg_lambda': 5,
        'colsample_bytree': 0.8,
        'subsample': 0.8,
        'subsample_freq': 4,
        'learning_rate': 0.015,
        'n_estimators':5000,
        'seed': 1024,
        'n_jobs':-1,
        'verbose': -1,
        "categorical_feature":["quarter ","month","weekday","week","day","is_weekend","is_holiday"] 
    }

In [12]:
#y_test_full=np.array([])
y_test_full = dict()
#pred_full=np.array([])
pred_full = dict()
geolgb_dict = dict()
train_split_point= pd.to_datetime('2021-03-01')-pd.DateOffset(10)
test_split_point = pd.to_datetime('2021-03-01')
for unit in unit_list:
    df = copy.deepcopy(unit_data_dict_geo[unit])
    df.drop(["unit","geography_level_3","product_level_2","geography_level_1","geography_level_2","product_level_1"],axis = 1,inplace = True)
    train=df[df['ts']<=train_split_point]
    test=df[(df['ts']>train_split_point)&(df['ts']<=test_split_point)]
    
    X_train=train.drop(columns=['qty','ts'])
    y_train=train['qty']
    X_test=test.drop(columns=['qty','ts'])
    y_test=test['qty']
    model=LGBMRegressor(**params)
    model.fit(X_train,y_train,eval_set=[(X_test,y_test)],eval_metric=['rmse','mae'],early_stopping_rounds=100,verbose=100)
    #pred=model.predict(X_test)
    
    geolgb_dict[unit] = model
    #y_test_full[unit] = y_test
    #pred_full[unit] = pred

Please use categorical_feature argument of the Dataset constructor to pass this parameter.


[100]	valid_0's rmse: 52.8657	valid_0's l1: 49.9606	valid_0's mape: 19.9327
[100]	valid_0's rmse: 12.327	valid_0's l1: 11.5599	valid_0's mape: 9.64635
[200]	valid_0's rmse: 10.2791	valid_0's l1: 9.6857	valid_0's mape: 8.00896
[300]	valid_0's rmse: 10.1597	valid_0's l1: 9.44138	valid_0's mape: 7.8404
[100]	valid_0's rmse: 4.57983	valid_0's l1: 3.03348	valid_0's mape: 3.03348
[100]	valid_0's rmse: 2.25616	valid_0's l1: 1.95562	valid_0's mape: 1.95562
[100]	valid_0's rmse: 96.2986	valid_0's l1: 89.6955	valid_0's mape: 17.5059
[100]	valid_0's rmse: 7.5937	valid_0's l1: 6.73412	valid_0's mape: 5.98152
[100]	valid_0's rmse: 0.542843	valid_0's l1: 0.473005	valid_0's mape: 0.473005
[100]	valid_0's rmse: 4.31683	valid_0's l1: 3.85384	valid_0's mape: 2.7767
[100]	valid_0's rmse: 21.0383	valid_0's l1: 19.1169	valid_0's mape: 8.68178
[200]	valid_0's rmse: 20.1858	valid_0's l1: 18.0954	valid_0's mape: 8.17232
[300]	valid_0's rmse: 18.8473	valid_0's l1: 16.3463	valid_0's mape: 7.43088
[400]	valid_0'

[100]	valid_0's rmse: 2.24728	valid_0's l1: 1.88008	valid_0's mape: 1.58129
[100]	valid_0's rmse: 3.74676	valid_0's l1: 3.26943	valid_0's mape: 2.8201
[100]	valid_0's rmse: 4.5186	valid_0's l1: 3.54197	valid_0's mape: 3.24387
[100]	valid_0's rmse: 7.18997	valid_0's l1: 6.56587	valid_0's mape: 3.14196
[200]	valid_0's rmse: 7.01679	valid_0's l1: 6.24136	valid_0's mape: 2.99556
[100]	valid_0's rmse: 7.63889	valid_0's l1: 7.04751	valid_0's mape: 4.07976
[200]	valid_0's rmse: 6.50621	valid_0's l1: 5.75459	valid_0's mape: 3.38719
[300]	valid_0's rmse: 6.05109	valid_0's l1: 5.17658	valid_0's mape: 3.08379
[400]	valid_0's rmse: 5.91354	valid_0's l1: 5.01225	valid_0's mape: 2.98615
[500]	valid_0's rmse: 5.70295	valid_0's l1: 4.7965	valid_0's mape: 2.84982
[600]	valid_0's rmse: 5.58915	valid_0's l1: 4.69743	valid_0's mape: 2.76847
[700]	valid_0's rmse: 5.26038	valid_0's l1: 4.48094	valid_0's mape: 2.60503
[800]	valid_0's rmse: 5.19782	valid_0's l1: 4.42953	valid_0's mape: 2.56476
[900]	valid_0's

[100]	valid_0's rmse: 2.31511	valid_0's l1: 2.11648	valid_0's mape: 2.11312
[100]	valid_0's rmse: 3.42429	valid_0's l1: 2.14527	valid_0's mape: 0.848614
[100]	valid_0's rmse: 1.72456	valid_0's l1: 1.24908	valid_0's mape: 0.986224
[100]	valid_0's rmse: 6.33803	valid_0's l1: 5.14252	valid_0's mape: 3.28196
[100]	valid_0's rmse: 10.5077	valid_0's l1: 9.44933	valid_0's mape: 3.28924
[200]	valid_0's rmse: 9.21293	valid_0's l1: 8.04028	valid_0's mape: 2.68794
[300]	valid_0's rmse: 8.72291	valid_0's l1: 7.40682	valid_0's mape: 2.40921
[400]	valid_0's rmse: 7.88673	valid_0's l1: 6.62631	valid_0's mape: 2.03511
[500]	valid_0's rmse: 7.16885	valid_0's l1: 5.89045	valid_0's mape: 1.64603
[600]	valid_0's rmse: 7.16664	valid_0's l1: 5.90102	valid_0's mape: 1.63355
[100]	valid_0's rmse: 38.7886	valid_0's l1: 35.1192	valid_0's mape: 10.0067
[100]	valid_0's rmse: 120.123	valid_0's l1: 94.171	valid_0's mape: 3.11567
[100]	valid_0's rmse: 18.6689	valid_0's l1: 18.0129	valid_0's mape: 12.2793
[200]	valid

[100]	valid_0's rmse: 3.46002	valid_0's l1: 3.04148	valid_0's mape: 2.38154
[100]	valid_0's rmse: 11.4746	valid_0's l1: 8.22597	valid_0's mape: 5.30591
[100]	valid_0's rmse: 0.805725	valid_0's l1: 0.700723	valid_0's mape: 0.700723
[200]	valid_0's rmse: 0.817632	valid_0's l1: 0.652359	valid_0's mape: 0.652359
[100]	valid_0's rmse: 2.11474	valid_0's l1: 1.85864	valid_0's mape: 1.36018
[100]	valid_0's rmse: 4.2631	valid_0's l1: 4.0724	valid_0's mape: 3.34827
[100]	valid_0's rmse: 34.0324	valid_0's l1: 30.9992	valid_0's mape: 15.3748
[100]	valid_0's rmse: 8.12027	valid_0's l1: 7.79206	valid_0's mape: 4.87364
[200]	valid_0's rmse: 7.44575	valid_0's l1: 7.08247	valid_0's mape: 4.32237
[300]	valid_0's rmse: 6.70672	valid_0's l1: 6.35126	valid_0's mape: 3.90034
[400]	valid_0's rmse: 7.01612	valid_0's l1: 6.47996	valid_0's mape: 3.88616
[100]	valid_0's rmse: 6.84092	valid_0's l1: 6.64765	valid_0's mape: 5.68475
[200]	valid_0's rmse: 5.38104	valid_0's l1: 5.05094	valid_0's mape: 4.21752
[300]	va

[400]	valid_0's rmse: 0.747117	valid_0's l1: 0.548928	valid_0's mape: 0.523502
[500]	valid_0's rmse: 0.742115	valid_0's l1: 0.544407	valid_0's mape: 0.519155
[600]	valid_0's rmse: 0.735543	valid_0's l1: 0.535588	valid_0's mape: 0.510438
[700]	valid_0's rmse: 0.729597	valid_0's l1: 0.529667	valid_0's mape: 0.504723
[800]	valid_0's rmse: 0.725707	valid_0's l1: 0.526198	valid_0's mape: 0.501384
[900]	valid_0's rmse: 0.721804	valid_0's l1: 0.522138	valid_0's mape: 0.497436
[1000]	valid_0's rmse: 0.716687	valid_0's l1: 0.518016	valid_0's mape: 0.493444
[1100]	valid_0's rmse: 0.717612	valid_0's l1: 0.519025	valid_0's mape: 0.494425
[100]	valid_0's rmse: 517.97	valid_0's l1: 496.166	valid_0's mape: 2.72689
[200]	valid_0's rmse: 474.241	valid_0's l1: 451.671	valid_0's mape: 2.4521
[300]	valid_0's rmse: 449.674	valid_0's l1: 423.858	valid_0's mape: 2.25125
[400]	valid_0's rmse: 429.945	valid_0's l1: 402.565	valid_0's mape: 2.11305
[500]	valid_0's rmse: 407.771	valid_0's l1: 377.527	valid_0's ma

[100]	valid_0's rmse: 499.718	valid_0's l1: 482.883	valid_0's mape: 11.9959
[100]	valid_0's rmse: 121.545	valid_0's l1: 121.067	valid_0's mape: 91.4015
[200]	valid_0's rmse: 107.601	valid_0's l1: 106.505	valid_0's mape: 81.3041
[300]	valid_0's rmse: 95.1506	valid_0's l1: 93.6097	valid_0's mape: 71.2149
[400]	valid_0's rmse: 82.0962	valid_0's l1: 80.0308	valid_0's mape: 61.0577
[500]	valid_0's rmse: 80.927	valid_0's l1: 77.7073	valid_0's mape: 59.861
[100]	valid_0's rmse: 4.22515	valid_0's l1: 3.44918	valid_0's mape: 1.62331
[100]	valid_0's rmse: 14.543	valid_0's l1: 11.0277	valid_0's mape: 8.16702
[100]	valid_0's rmse: 1.20058	valid_0's l1: 0.876302	valid_0's mape: 0.562782
[100]	valid_0's rmse: 18.7442	valid_0's l1: 18.4779	valid_0's mape: 9.92626
[100]	valid_0's rmse: 29.4562	valid_0's l1: 22.8183	valid_0's mape: 22.45
[100]	valid_0's rmse: 88.8996	valid_0's l1: 69.414	valid_0's mape: 9.09432
[100]	valid_0's rmse: 31.0267	valid_0's l1: 28.0809	valid_0's mape: 27.141
[100]	valid_0's r

[900]	valid_0's rmse: 145.222	valid_0's l1: 116.267	valid_0's mape: 1.41808
[100]	valid_0's rmse: 5.92747	valid_0's l1: 4.66938	valid_0's mape: 2.22049
[100]	valid_0's rmse: 59.7985	valid_0's l1: 57.1453	valid_0's mape: 3.53442
[200]	valid_0's rmse: 57.3446	valid_0's l1: 54.7433	valid_0's mape: 3.25398
[300]	valid_0's rmse: 54.5995	valid_0's l1: 51.7905	valid_0's mape: 2.98374
[400]	valid_0's rmse: 51.8808	valid_0's l1: 48.7237	valid_0's mape: 2.71927
[500]	valid_0's rmse: 50.4664	valid_0's l1: 47.0663	valid_0's mape: 2.56969
[600]	valid_0's rmse: 49.4415	valid_0's l1: 45.8467	valid_0's mape: 2.4577
[700]	valid_0's rmse: 49.2567	valid_0's l1: 45.631	valid_0's mape: 2.43972
[100]	valid_0's rmse: 8.4391	valid_0's l1: 7.17448	valid_0's mape: 5.34214
[100]	valid_0's rmse: 0.0862977	valid_0's l1: 0.0609952	valid_0's mape: 0.0609952
[100]	valid_0's rmse: 265.431	valid_0's l1: 167.818	valid_0's mape: 70.7154
[100]	valid_0's rmse: 11.6669	valid_0's l1: 10.1885	valid_0's mape: 3.89634
[100]	val

[100]	valid_0's rmse: 26.1074	valid_0's l1: 24.8036	valid_0's mape: 9.19064
[200]	valid_0's rmse: 23.2868	valid_0's l1: 21.3184	valid_0's mape: 7.55842
[300]	valid_0's rmse: 23.4456	valid_0's l1: 20.9658	valid_0's mape: 7.31509
[100]	valid_0's rmse: 82.6752	valid_0's l1: 75.9598	valid_0's mape: 3.64874
[100]	valid_0's rmse: 59.7931	valid_0's l1: 50.5975	valid_0's mape: 10.1795
[100]	valid_0's rmse: 27.4064	valid_0's l1: 8.66667	valid_0's mape: 0.1
[100]	valid_0's rmse: 16.7703	valid_0's l1: 14.2805	valid_0's mape: 3.74884
[100]	valid_0's rmse: 4.91343	valid_0's l1: 4.28495	valid_0's mape: 3.13638
[100]	valid_0's rmse: 1.71902	valid_0's l1: 1.44882	valid_0's mape: 1.22328
[100]	valid_0's rmse: 5.28668	valid_0's l1: 3.25635	valid_0's mape: 1.64793
[100]	valid_0's rmse: 1.35516	valid_0's l1: 1.17272	valid_0's mape: 1.15743
[100]	valid_0's rmse: 10.2611	valid_0's l1: 8.412	valid_0's mape: 3.45436
[100]	valid_0's rmse: 17.5819	valid_0's l1: 14.8649	valid_0's mape: 5.84869


In [13]:
pred_frame_list = []
for date in tqdm(pd.date_range(start='2021-03-08',end='2021-06-07',freq='7d')):
    end_date=date+pd.DateOffset(21)
    val_date=date-pd.DateOffset(21)
    train_num = pd.DateOffset(100)
    unit_dict = dict()
    for unit in unit_list:
        model = geolgb_dict[unit]
        df = copy.deepcopy(unit_data_dict_geo[unit])
        df[df['ts']<=train_split_point]
        df.drop(["unit","geography_level_3","product_level_2","geography_level_1","geography_level_2","product_level_1"],axis = 1,inplace = True)
    
        #train=df[(df['ts']<=val_date)]#&(df["ts"]>val_date-train_num)]
        train=df[(df['ts']<=val_date)&(df["ts"]>val_date-train_num)]
        val=df[(df['ts']>val_date)&(df['ts']<=date)]
        test=df[(df['ts']>date)&(df['ts']<=end_date)]
        X_train=train.drop(columns=['qty','ts'])
        y_train=train['qty']
        X_val=val.drop(columns=['qty','ts'])
        y_val=val['qty']
        X_test=test.drop(columns=['qty','ts'])
        y_test=test['qty']
        
        lgb_train = lgb.Dataset(X_train, y_train.values)
        lgb_eval = lgb.Dataset(X_val, y_val.values, reference=lgb_train)
        
        new_model = lgb.train(params,
                    lgb_train,
                    num_boost_round=200,
                    valid_sets=lgb_eval,
                    init_model=model,             
                    early_stopping_rounds=20,
                    verbose_eval=False,
                    keep_training_booster=True)
        
        pred = new_model.predict(X_test)
        unit_dict[unit] = pred
        geolgb_dict[unit] = new_model
        
    unit_dict["ts"] = test.ts.values
    unit_dict["decision_day"] = test.ts.values.shape[0]*[date]
    
    decision_frame = pd.DataFrame(unit_dict)
    pred_frame_list.append(decision_frame)
    

  0%|          | 0/14 [00:00<?, ?it/s]

Please use categorical_feature argument of the Dataset constructor to pass this parameter.


test_set = np.array([])
pred_set = np.array([])
def wmape(y_true,y_pred):
    return np.sum(np.abs(y_true-y_pred))/np.sum(y_true)
for value_test,value_pred in zip(y_test_full.values(),pred_full.values()):
    test_set = np.append(test_set,value_test)
    pred_set = np.append(pred_set,value_pred)
print(f"rmse: {mean_squared_error(test_set,pred_set,squared=False)}")
print(f"mae: {mean_absolute_error(test_set,pred_set)}")
print(f"mape: {wmape(test_set,pred_set)}")

ts=df[(df['ts']>'2021-03-08')].ts

def make_decision_day(x):
    decision_range = pd.date_range(start='2021-03-08',end='2021-06-07',freq='7d')
    available_day = decision_range[decision_range<x][-1]
    return available_day
decision_day = ts.apply(lambda x:make_decision_day(x))

pred_frame = pd.DataFrame(pred_full)
pred_frame.insert(0,"ts",ts.values) 
pred_frame.insert(0,"decision_day",decision_day.values)
pred_frame.to_csv('lgb_quantile_80_full.csv',index = False)

In [14]:
pred_frame_list[0].head()

Unnamed: 0,16621bc8b3475de8b3ada41b598afda7,727771c2319dba6bd0d957d327d6c7e9,d2fb8ee0618b62fbc44f7cae7b5ea679,e5d27207eac5253b7fda820a7f3827e3,502d57431ca99938c1d4e606c4ac71fa,24e3e03e01cb04c4ed51ea97d4bd4791,8d2fb899d57a2c110d4e2381fa264c7f,e8bca92219ace752690b1fc328bed7f7,5f0bd7576fda1b008526e5696b7a9115,f1e4f08d73f2c4edb21519e03be12791,...,c56064df9ed5f2a38d95616e168d0a97,c0f4e44fb4369aabb14beda9b6dcf795,067cbc1b9ec91699b1754fc7a1492ff1,70cf38eb21b8ac7625b015d45bd694bf,3c83ccd917df2d689c5cede33bdfee95,c1763ccafff242dcec633a501a7e8075,b2eeefd1df0de07e7602b213269ef672,44cb34447237e915874421aa2b1352ed,ts,decision_day
0,59.061325,11.999997,0.8,0.0,67.060441,6.035737,0.370391,3.980499,25.248672,35.711988,...,0.0,7.259129,5.324816,1.038542,2.479374,1.057292,10.371041,7.349373,2021-03-09,2021-03-08
1,59.061325,11.999997,0.8,0.0,67.060441,5.895014,0.335891,4.072999,28.189149,38.3486,...,0.0,7.036861,5.551787,1.038542,2.539062,1.057292,10.371041,7.670623,2021-03-10,2021-03-08
2,60.333325,11.999997,0.0,0.16,67.060441,6.008737,0.335891,3.980499,25.657729,36.655856,...,0.0,7.437244,5.324816,1.038542,2.479374,1.057292,10.371041,7.670623,2021-03-11,2021-03-08
3,58.743325,11.999997,0.0,0.0,66.503969,6.035737,0.335891,3.980499,26.046807,38.3486,...,0.0,7.543509,5.202316,1.060417,2.539062,1.057292,10.565416,7.349373,2021-03-12,2021-03-08
4,59.061325,11.999997,0.0,0.0,66.503969,5.895014,0.335891,4.072999,26.167204,37.871354,...,0.0,7.514228,5.324816,1.038542,2.479374,1.038542,10.416666,7.349373,2021-03-13,2021-03-08


In [15]:
pred_frame_list[10].head()

Unnamed: 0,16621bc8b3475de8b3ada41b598afda7,727771c2319dba6bd0d957d327d6c7e9,d2fb8ee0618b62fbc44f7cae7b5ea679,e5d27207eac5253b7fda820a7f3827e3,502d57431ca99938c1d4e606c4ac71fa,24e3e03e01cb04c4ed51ea97d4bd4791,8d2fb899d57a2c110d4e2381fa264c7f,e8bca92219ace752690b1fc328bed7f7,5f0bd7576fda1b008526e5696b7a9115,f1e4f08d73f2c4edb21519e03be12791,...,c56064df9ed5f2a38d95616e168d0a97,c0f4e44fb4369aabb14beda9b6dcf795,067cbc1b9ec91699b1754fc7a1492ff1,70cf38eb21b8ac7625b015d45bd694bf,3c83ccd917df2d689c5cede33bdfee95,c1763ccafff242dcec633a501a7e8075,b2eeefd1df0de07e7602b213269ef672,44cb34447237e915874421aa2b1352ed,ts,decision_day
0,75.952165,14.303702,5.26122,2.47161,37.076572,7.668202,2.479836,9.993773,19.000314,22.846531,...,8.042876,12.853053,3.106327,0.702821,6.367929,3.422164,13.069796,10.182353,2021-05-18,2021-05-17
1,16.645536,3.958459,9.37712,2.392725,58.582222,10.751294,1.058492,5.196799,15.038211,37.017007,...,8.295119,14.955568,0.234993,2.55638,5.873787,1.484812,10.848075,10.667559,2021-05-19,2021-05-17
2,62.535739,3.920215,16.746545,1.765683,49.706136,11.781374,2.294234,11.607221,16.34922,15.645556,...,5.392882,4.673771,4.535821,2.626471,4.017344,1.267749,13.627134,11.40917,2021-05-20,2021-05-17
3,32.577595,13.729251,6.561584,-1.345461,62.011187,9.033773,0.600805,7.110039,26.256117,28.859316,...,5.657768,2.412509,3.586116,2.533458,6.672638,2.922763,12.733932,15.549294,2021-05-21,2021-05-17
4,50.745555,14.013373,11.785017,2.593178,68.861097,7.941201,4.198194,10.550253,20.025996,12.774083,...,8.27672,4.031066,5.922813,0.449262,4.051865,1.856387,9.543903,16.745947,2021-05-22,2021-05-17


In [16]:
pred_frame = pd.concat(pred_frame_list)
pred_frame.to_csv('data/forecast/lgb_quantile_90_incre.csv',index = False)