In [4]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)

In [5]:
data = pd.read_csv("pre_validate.csv", parse_dates=["date"])
data.head()

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,date,store_nbr,family,id,sales,onpromotion,transactions,oil,city,state,type,cluster,work_day,nat_terremoto,nat_navidad,nat_dia la madre,nat_dia trabajo,nat_primer dia ano,nat_futbol,nat_dia difuntos,day,month,year,day_of_week,day_of_year,week_of_year,date_index,nat_batalla_de_pichincha,nat_carnaval,nat_cyber_monday,nat_independencia_de_cuenca,nat_independencia_de_guayaquil,n_viernes_santo,day_of_month,week_of_month,is_wknd,quarter,is_month_start,is_month_end,is_quarter_start,is_quarter_end,is_year_start,is_year_end,season,wageday,day_to_nearest_holiday,day_from_nearest_holiday
0,0,0,0,2013-01-01,store_nbr_1,AUTOMOTIVE,0.0,,,0.0,93.14,city_quito,state_pichincha,type_D,cluster_13,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,1,2013,1,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0
1,1,1,1,2013-01-01,store_nbr_1,BABY CARE,1.0,,,0.0,93.14,city_quito,state_pichincha,type_D,cluster_13,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,1,2013,1,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0
2,2,2,2,2013-01-01,store_nbr_1,BEAUTY,2.0,,,0.0,93.14,city_quito,state_pichincha,type_D,cluster_13,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,1,2013,1,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0
3,3,3,3,2013-01-01,store_nbr_1,BEVERAGES,3.0,,,0.0,93.14,city_quito,state_pichincha,type_D,cluster_13,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,1,2013,1,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0
4,4,4,4,2013-01-01,store_nbr_1,BOOKS,4.0,,,0.0,93.14,city_quito,state_pichincha,type_D,cluster_13,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1,1,2013,1,1,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,0,1,0,0,0,0,0


In [6]:
from darts_helper.preprocess import get_covariates, get_target_series


# static covariates
static_cols = ["city", "state", "type", "cluster"]
target_dict, pipe_dict, id_dict = get_target_series(data, static_cols)

selected_holidays = [
    "nat_terremoto", "nat_navidad", "nat_dia la madre", "nat_dia trabajo",
    "nat_primer dia ano", "nat_futbol", "nat_dia difuntos", "nat_batalla_de_pichincha",
    "nat_carnaval", 'nat_cyber_monday', 'nat_independencia_de_cuenca',
    'nat_independencia_de_guayaquil', 'n_viernes_santo']

time_based_to_add = ['day_of_month', 'week_of_month', 'is_wknd', 'quarter', 'is_month_start',
       'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start',
       'is_year_end', 'season', 'wageday', 'day_to_nearest_holiday', 'day_from_nearest_holiday',
       "day", "month", "year", "day_of_week", "day_of_year", "week_of_year", "date_index", "work_day"]

# past covariates
past_cols = ["transactions"]

# future covariates
future_cols = [
    "oil", "onpromotion", *selected_holidays, *time_based_to_add
]

# future covariates from computing the moving averages
future_ma_cols = ["oil", "onpromotion"]

past_dict, future_dict = get_covariates(data, past_cols, future_cols, future_ma_cols)

Extracting target series: 100%|██████████| 33/33 [01:53<00:00,  3.45s/it]
Extracting covariates: 100%|██████████| 33/33 [02:26<00:00,  4.42s/it]


In [7]:
from darts_helper.trainer import Trainer

TRAINER_CONFIG = {
    "target_dict": target_dict,
    "pipe_dict": pipe_dict,
    "id_dict": id_dict,
    "past_dict": past_dict,
    "future_dict": future_dict,
    
    # time series cross-validation using a rolling forecasting origin
    "forecast_horizon": 16, # the length of the validation set
    "folds": 1, # the number of training sets (setting to 1 means the standard train-validation split)
    
    # the number of previous days to check for zero sales; if all are zero, generate zero forecasts
    "zero_fc_window": 15,
    
    "static_covs": "keep_all",
    "past_covs": "keep_all",
    "future_covs": "keep_all",
    "models": []
}

trainer = Trainer(**TRAINER_CONFIG)

In [8]:
BASE_CONFIG = {
    "random_state": 0,
    
    # the number of lag values of the target series
    "lags": 63,
    
    # the number of lag values of the past covariates
    "lags_past_covariates": list(range(-16, -23, -1)),
    
    # the number of (past, future-1) lag values of the future covariates
    "lags_future_covariates": (14, 1),
    
    # the number of days ahead that the model is forecasting given today's input data
    "output_chunk_length": 1,
}

trainer.train(["lr"], [BASE_CONFIG], drop_before="2015-01-01")

Performing validation:   0%|          | 0/33 [00:00<?, ?it/s]

Performing validation:   3%|▎         | 1/33 [00:09<05:19,  9.99s/it]

AUTOMOTIVE                 | lr: 0.50098


Performing validation:   6%|▌         | 2/33 [00:17<04:26,  8.59s/it]

BABY CARE                  | lr: 0.18491


Performing validation:   9%|▉         | 3/33 [00:25<04:07,  8.24s/it]

BEAUTY                     | lr: 0.49634


Performing validation:  12%|█▏        | 4/33 [00:34<04:10,  8.63s/it]

BEVERAGES                  | lr: 0.26614


Performing validation:  15%|█▌        | 5/33 [00:42<03:57,  8.49s/it]

BOOKS                      | lr: 0.02924


Performing validation:  18%|█▊        | 6/33 [00:53<04:05,  9.07s/it]

BREAD/BAKERY               | lr: 0.19180


Performing validation:  21%|██        | 7/33 [01:02<03:56,  9.08s/it]

CELEBRATION                | lr: 0.53101


Performing validation:  24%|██▍       | 8/33 [01:12<03:56,  9.44s/it]

CLEANING                   | lr: 0.32697


Performing validation:  27%|██▋       | 9/33 [01:23<03:56,  9.84s/it]

DAIRY                      | lr: 0.18631


Performing validation:  30%|███       | 10/33 [01:30<03:26,  8.98s/it]

DELI                       | lr: 0.19707


Performing validation:  33%|███▎      | 11/33 [01:38<03:15,  8.88s/it]

EGGS                       | lr: 0.27169


Performing validation:  36%|███▋      | 12/33 [01:47<03:06,  8.90s/it]

FROZEN FOODS               | lr: 0.28239


Performing validation:  39%|███▉      | 13/33 [01:58<03:08,  9.42s/it]

GROCERY I                  | lr: 0.21665


Performing validation:  42%|████▏     | 14/33 [02:07<02:57,  9.32s/it]

GROCERY II                 | lr: 0.52462


Performing validation:  45%|████▌     | 15/33 [02:14<02:34,  8.56s/it]

HARDWARE                   | lr: 0.51838


Performing validation:  48%|████▊     | 16/33 [02:20<02:13,  7.83s/it]

HOME AND KITCHEN I         | lr: 0.48919


Performing validation:  52%|█████▏    | 17/33 [02:25<01:54,  7.13s/it]

HOME AND KITCHEN II        | lr: 0.46280


Performing validation:  55%|█████▍    | 18/33 [02:36<02:03,  8.25s/it]

HOME APPLIANCES            | lr: 0.30068


Performing validation:  58%|█████▊    | 19/33 [02:45<01:58,  8.44s/it]

HOME CARE                  | lr: 0.36041


Performing validation:  61%|██████    | 20/33 [02:52<01:43,  8.00s/it]

LADIESWEAR                 | lr: 0.49317


Performing validation:  64%|██████▎   | 21/33 [02:58<01:29,  7.50s/it]

LAWN AND GARDEN            | lr: 0.44101


Performing validation:  67%|██████▋   | 22/33 [03:06<01:21,  7.40s/it]

LINGERIE                   | lr: 0.61907


Performing validation:  70%|██████▉   | 23/33 [03:12<01:12,  7.21s/it]

LIQUOR,WINE,BEER           | lr: 0.66767


Performing validation:  73%|███████▎  | 24/33 [03:21<01:09,  7.69s/it]

MAGAZINES                  | lr: 0.50634


Performing validation:  76%|███████▌  | 25/33 [03:27<00:56,  7.11s/it]

MEATS                      | lr: 0.22312


Performing validation:  79%|███████▉  | 26/33 [03:31<00:44,  6.32s/it]

PERSONAL CARE              | lr: 0.27084


Performing validation:  82%|████████▏ | 27/33 [03:35<00:33,  5.63s/it]

PET SUPPLIES               | lr: 0.45526


Performing validation:  85%|████████▍ | 28/33 [03:40<00:26,  5.22s/it]

PLAYERS AND ELECTRONICS    | lr: 0.45459


Performing validation:  88%|████████▊ | 29/33 [03:44<00:19,  5.00s/it]

POULTRY                    | lr: 0.20985


Performing validation:  91%|█████████ | 30/33 [03:48<00:14,  4.69s/it]

PREPARED FOODS             | lr: 0.26746


Performing validation:  94%|█████████▍| 31/33 [03:52<00:08,  4.48s/it]

PRODUCE                    | lr: 0.35962


Performing validation:  97%|█████████▋| 32/33 [03:56<00:04,  4.38s/it]

SCHOOL AND OFFICE SUPPLIES | lr: 0.67460


Performing validation: 100%|██████████| 33/33 [04:00<00:00,  7.30s/it]

SEAFOOD                    | lr: 0.45124
Average RMSLE | lr: 0.37671





: 