In [1]:
import numpy as np
import pandas as pd
import os
import random
import math
from decimal import Decimal as dec
import datetime
import time
import gc
import lightgbm as lgb
import pickle

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
store_id_set_list = ['CA_1', 'CA_2', 'CA_3', 'CA_4', 'TX_1', 'TX_2', 'TX_3', 'WI_1', 'WI_2', 'WI_3']
end_train_day_x_list = [1913]
prediction_horizon_list = [7, 14, 21, 28]

pred_v_all_df = list()

for end_train_day_x in end_train_day_x_list:
    previous_prediction_horizon = 0
    for prediction_horizon in prediction_horizon_list:
        notebook_name = f"../input/m5-train-day-{end_train_day_x}-horizon-{prediction_horizon}"

        pred_v_df = pd.DataFrame()
        
        for store_index, store_id in enumerate(store_id_set_list):
            
            model_path = str(f'{notebook_name}/lgb_model_{store_id}_{prediction_horizon}.bin')
            print(f'loading {model_path}')
            estimator = pickle.load(open(model_path, 'rb'))
            base_test = pd.read_feather(f"{notebook_name}/test_{store_id}_{prediction_horizon}.feather")
            enable_features = [col for col in base_test.columns if col not in ['id', 'd', 'sales']]
            
            for predict_day in range(previous_prediction_horizon + 1, prediction_horizon + 1):
                print('[{3} -> {4}] predict {0}/{1} {2} day {5}'.format(
                store_index + 1, len(store_id_set_list), store_id,
                previous_prediction_horizon + 1, prediction_horizon, predict_day))
                mask = base_test['d'] == (end_train_day_x + predict_day)
                base_test.loc[mask, 'sales'] = estimator.predict(base_test[mask][enable_features])
                
            temp_v_df = base_test[
                    (base_test['d'] >= end_train_day_x + previous_prediction_horizon + 1) &
                    (base_test['d'] < end_train_day_x + prediction_horizon + 1)
                    ][['id', 'd', 'sales']]
            
            if len(pred_v_df)!=0:
                pred_v_df = pd.concat([pred_v_df, temp_v_df])
            else:
                pred_v_df = temp_v_df.copy()
            
            del(temp_v_df)
            gc.collect()
        
        previous_prediction_horizon = prediction_horizon
        
        if end_train_day_x == 1913:
            pred_v_df.id = pred_v_df.id.str.replace("evaluation", "validation")
            
        pred_v_all_df.append(pred_v_df)
    
pred_v_all_df = pd.concat(pred_v_all_df)

loading ../input/m5-train-day-1913-horizon-7/lgb_model_CA_1_7.bin
[1 -> 7] predict 1/10 CA_1 day 1
[1 -> 7] predict 1/10 CA_1 day 2
[1 -> 7] predict 1/10 CA_1 day 3
[1 -> 7] predict 1/10 CA_1 day 4
[1 -> 7] predict 1/10 CA_1 day 5
[1 -> 7] predict 1/10 CA_1 day 6
[1 -> 7] predict 1/10 CA_1 day 7
loading ../input/m5-train-day-1913-horizon-7/lgb_model_CA_2_7.bin
[1 -> 7] predict 2/10 CA_2 day 1
[1 -> 7] predict 2/10 CA_2 day 2
[1 -> 7] predict 2/10 CA_2 day 3
[1 -> 7] predict 2/10 CA_2 day 4
[1 -> 7] predict 2/10 CA_2 day 5
[1 -> 7] predict 2/10 CA_2 day 6
[1 -> 7] predict 2/10 CA_2 day 7
loading ../input/m5-train-day-1913-horizon-7/lgb_model_CA_3_7.bin
[1 -> 7] predict 3/10 CA_3 day 1
[1 -> 7] predict 3/10 CA_3 day 2
[1 -> 7] predict 3/10 CA_3 day 3
[1 -> 7] predict 3/10 CA_3 day 4
[1 -> 7] predict 3/10 CA_3 day 5
[1 -> 7] predict 3/10 CA_3 day 6
[1 -> 7] predict 3/10 CA_3 day 7
loading ../input/m5-train-day-1913-horizon-7/lgb_model_CA_4_7.bin
[1 -> 7] predict 4/10 CA_4 day 1
[1 -> 7] p

In [3]:
submission = pd.read_csv("../input/m5-forecasting-accuracy/sample_submission.csv")

In [4]:
pred_v_all_df.d = pred_v_all_df.d - end_train_day_x_list
pred_h_all_df = pred_v_all_df.pivot(index='id', columns='d', values='sales')
pred_h_all_df = pred_h_all_df.reset_index()
pred_h_all_df.columns = submission.columns

In [5]:
submission = submission[['id']].merge(pred_h_all_df, on=['id'], how='left').fillna(0)
submission.to_csv("m5_predictions.csv", index=False)