## Notebook Configuration && Imports

In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
import logging

import numpy as np

import optuna
from sklearn.model_selection import TimeSeriesSplit

from storesales.baseline.sales_predictor import SalesPredictor
from storesales.baseline.utils import run_study, load_baseline_data, load_submission
from storesales.baseline.model_wrappers import (
    DailyMeanModelWrapper,
    ProphetWrapper,
    DayOfWeekMeanModelWrapper,
)
from storesales.baseline.param_suggestions import (
    IntSuggestions,
    FloatSuggestions,
    CategoricalSuggestions,
)
from storesales.baseline.constants import FAMILY_GROUPS, N_STORES
from storesales.constants import SUBMISSIONS_PATH

## Load && Prepare Data

In [21]:
train_df, test_df, holidays_df = load_baseline_data()

## Prepare SalesPredictor

In [22]:
initial = "760 days"  # train period

### DailyMeanModelWrapper

In [23]:
daily_mean_int_suggestions = [IntSuggestions("window", 3, 60)]

In [24]:
daily_wrapper = DailyMeanModelWrapper(int_suggestions=daily_mean_int_suggestions)

### DayOfWeekMeanModelWrapper

In [25]:
day_of_week_mean_int_suggestions = [
    IntSuggestions("weekdays_window", 3, 50),
    IntSuggestions("weekends_window", 1, 10),
]

In [26]:
day_of_week_wrapper = DayOfWeekMeanModelWrapper(
    int_suggestions=day_of_week_mean_int_suggestions
)

### ProphetWrapper

In [27]:
prophet_int_suggestions = [
    IntSuggestions("n_changepoints", 20, 50),
]
prophet_float_suggestions = [
    FloatSuggestions("changepoint_prior_scale", 0.01, 0.5),
    FloatSuggestions("holidays_prior_scale", 5, 80),
    FloatSuggestions("seasonality_prior_scale", 5, 80),
]
prophet_categorical_suggestions = [
    CategoricalSuggestions("seasonality_mode", ["additive", "multiplicative"]),
]

In [28]:
prophet_wrapper = ProphetWrapper(
    initial=initial,
    extra_regressors=["dcoilwtico"],
    holidays=holidays_df,
    int_suggestions=prophet_int_suggestions,
    float_suggestions=prophet_float_suggestions,
    categorical_suggestions=prophet_categorical_suggestions,
)

### SalesPredictor

In [29]:
model_wrappers = {
    DailyMeanModelWrapper.__name__: daily_wrapper,
    DayOfWeekMeanModelWrapper.__name__: day_of_week_wrapper,
    ProphetWrapper.__name__: prophet_wrapper,
}

optuna_optimize_kwargs = dict(
    n_trials=100,
    show_progress_bar=True,
    timeout=45,
    n_jobs=1,
)

predictor = SalesPredictor(
    model_wrappers=model_wrappers,
    family_groups=FAMILY_GROUPS,
    inner_cutoffs=[-180, -49, -33, -17],
    outer_cv=TimeSeriesSplit(n_splits=3, test_size=16),
    optuna_optimize_kwargs=optuna_optimize_kwargs,
    n_group_store_family_choices=4,
    n_single_store_family_choices=2,
)

## Run Study

In [None]:
optuna.logging.set_verbosity(optuna.logging.WARN)
logging.getLogger("cmdstanpy").setLevel(logging.WARNING)

sales_predictor = run_study(train_df, predictor, n_stores=N_STORES)

In [29]:
losses = [value["loss"] for _key, value in sales_predictor.best_storage.items()]
np.mean(losses)

0.39114943199351826

In [30]:
sales_predictor.best_storage

{'AUTOMOTIVE': {'params': {'model': 'ProphetWrapper',
   'n_changepoints': 31,
   'changepoint_prior_scale': 0.27682195608032306,
   'holidays_prior_scale': 37.57673718031002,
   'seasonality_prior_scale': 45.98135777486151,
   'seasonality_mode': 'additive'},
  'loss': 0.23604517598612076},
 'CLEANING': {'params': {'model': 'ProphetWrapper',
   'n_changepoints': 31,
   'changepoint_prior_scale': 0.27682195608032306,
   'holidays_prior_scale': 37.57673718031002,
   'seasonality_prior_scale': 45.98135777486151,
   'seasonality_mode': 'additive'},
  'loss': 0.23604517598612076},
 'DELI': {'params': {'model': 'ProphetWrapper',
   'n_changepoints': 31,
   'changepoint_prior_scale': 0.27682195608032306,
   'holidays_prior_scale': 37.57673718031002,
   'seasonality_prior_scale': 45.98135777486151,
   'seasonality_mode': 'additive'},
  'loss': 0.23604517598612076},
 'GROCERY I': {'params': {'model': 'ProphetWrapper',
   'n_changepoints': 31,
   'changepoint_prior_scale': 0.27682195608032306,


In [20]:
sales_predictor.fit(train_df, initial)

100%|██████████| 1782/1782 [08:11<00:00,  3.63it/s]


## Make Submission

In [21]:
submission = load_submission()

In [24]:
tuned_submission = sales_predictor.predict(test_df, submission)

100%|██████████| 1782/1782 [01:22<00:00, 21.69it/s] 


In [27]:
tuned_submission[~tuned_submission["sales"].isna()]

Unnamed: 0_level_0,sales
id,Unnamed: 1_level_1
3000888,4.958483
3000889,0.000000
3000890,4.297729
3000891,2445.967258
3000892,0.035714
...,...
3029395,323.693730
3029396,104.054630
3029397,1280.578260
3029398,160.250000


In [26]:
import os

submission_file_path = os.path.join(
    SUBMISSIONS_PATH, "optuna_tuned_result_submission.csv"
)
tuned_submission.to_csv(submission_file_path)