In [None]:
!mkdir /root/.kaggle

In [None]:
import json
with open("/root/.kaggle/kaggle.json", "w") as kaggle_json:
    json.dump({"username":"YOUR USERNAME","key":"YOUR KEY"}, kaggle_json)

In [None]:
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
!mkdir ../input/m5-train-day-1941-horizon-7
!kaggle kernels output lucamassaron/m5-train-day-1941-horizon-7 -p ../input/m5-train-day-1941-horizon-7/

In [None]:
!mkdir ../input/m5-train-day-1941-horizon-14
!kaggle kernels output lucamassaron/m5-train-day-1941-horizon-14 -p ../input/m5-train-day-1941-horizon-14/

In [None]:
!mkdir ../input/m5-train-day-1941-horizon-21
!kaggle kernels output lucamassaron/m5-train-day-1941-horizon-21 -p ../input/m5-train-day-1941-horizon-21/

In [None]:
!mkdir ../input/m5-train-day-1941-horizon-28
!kaggle kernels output lucamassaron/m5-train-day-1941-horizon-28 -p ../input/m5-train-day-1941-horizon-28/

In [None]:
ls ../input/

In [None]:
!pip install gluonts --quiet

In [None]:
import numpy as np
import pandas as pd
import os
import random
import math
from decimal import Decimal as dec
import datetime
import time
import gc
import lightgbm as lgb
import pickle

from gluonts.model.rotbaum._model import LSF

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [None]:
def predict_uncertainty(end_train_day_x_list, prediction_horizon_list, min_bin_size, quantile):
    
    store_id_set_list = ['CA_1', 'CA_2', 'CA_3', 'CA_4', 'TX_1', 'TX_2', 'TX_3', 'WI_1', 'WI_2', 'WI_3']
    
    print(f"predicting quantile={quantile}")
    pred_v_all_df = list()

    for end_train_day_x in end_train_day_x_list:
        previous_prediction_horizon = 0
        for prediction_horizon in prediction_horizon_list:
            print(f"prediction horizon=+{prediction_horizon}")
            notebook_name = f"../input/m5-train-day-{end_train_day_x}-horizon-{prediction_horizon}"

            pred_v_df = pd.DataFrame()

            for store_index, store_id in enumerate(store_id_set_list):

                model_path = str(f'{notebook_name}/lgb_model_{store_id}_{prediction_horizon}.bin')
                print(f'loading {model_path}')
                estimator = pickle.load(open(model_path, 'rb'))
                base_test = pd.read_feather(f"{notebook_name}/test_{store_id}_{prediction_horizon}.feather")
                enable_features = [col for col in base_test.columns if col not in ['id', 'd', 'sales']]

                grid_full = pd.read_feather(f"{notebook_name}/grid_full_store_{store_id}_{end_train_day_x}_to_{end_train_day_x + prediction_horizon}.feather")           
                lsf = LSF(model=estimator, min_bin_size=min_bin_size)
                lsf.fit(x_train=grid_full[enable_features], y_train=grid_full['sales'].fillna(0), 
                        seed=0, x_train_is_dataframe=True, model_is_already_trained=True)

                for predict_day in range(previous_prediction_horizon + 1, prediction_horizon + 1):
                    print('[{3} -> {4}] predict {0}/{1} {2} day {5}'.format(
                    store_index + 1, len(store_id_set_list), store_id,
                    previous_prediction_horizon + 1, prediction_horizon, predict_day))
                    mask = base_test['d'] == (end_train_day_x + predict_day)
                    base_test.loc[mask, 'sales'] = lsf.predict(base_test[mask][enable_features], quantile=quantile)

                temp_v_df = base_test[
                        (base_test['d'] >= end_train_day_x + previous_prediction_horizon + 1) &
                        (base_test['d'] < end_train_day_x + prediction_horizon + 1)
                        ][['id', 'd', 'sales']]

                if len(pred_v_df)!=0:
                    pred_v_df = pd.concat([pred_v_df, temp_v_df])
                else:
                    pred_v_df = temp_v_df.copy()

                del(temp_v_df)
                gc.collect()

            previous_prediction_horizon = prediction_horizon
            pred_v_all_df.append(pred_v_df)

    pred_v_all_df = pd.concat(pred_v_all_df)

    submission = pd.read_csv("../input/m5-forecasting-accuracy/sample_submission.csv")

    pred_v_all_df.d = pred_v_all_df.d - end_train_day_x_list
    pred_h_all_df = pred_v_all_df.pivot(index='id', columns='d', values='sales')
    pred_h_all_df = pred_h_all_df.reset_index()
    pred_h_all_df.columns = submission.columns

    submission = submission[['id']].merge(pred_h_all_df, on=['id'], how='left').fillna(0)
    submission.to_csv(f"m5_predictions_quantile={quantile}.csv", index=False)

In [None]:
for quantile in [0.005, 0.025, 0.165, 0.25 , 0.5  , 0.75 , 0.835, 0.975, 0.995]:
    predict_uncertainty(end_train_day_x_list=[1941], 
                        prediction_horizon_list=[7, 14, 21, 28], 
                        min_bin_size=300,
                        quantile=quantile)