In [None]:
#from google.colab import drive
#drive.mount('/content/drive/')

In [None]:
#!pip install fbprophet optuna --upgrade

In [None]:
import pandas as pd
import numpy as np
import pickle

import plotly.offline as py
import optuna
import json
import datetime

from joblib import Parallel, delayed
from fbprophet import Prophet
from fbprophet.plot import *
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics

## Params

In [None]:
IS_EVAL = False
DATA_PATH = '../data/' #'/content/drive/My Drive/data/'

if IS_EVAL:
    PERIOD_LABEL = 'evaluation'
else:
    PERIOD_LABEL = 'validation'

## Load refined data

In [None]:
prophet_df = pd.read_pickle(DATA_PATH + 'refined/prophet_df_' + PERIOD_LABEL + '.pkl')

## Define tuning function

In [None]:
def optimize_prophet(store_id, dept_id):
    
    # Reduce df on current
    df = prophet_df.loc[(prophet_df['store_id'] == store_id) & (prophet_df['dept_id'] == dept_id)].copy()
    df.dropna(inplace=True) # drop pred period (useless in tuning script)

    def objective(trial):
        # Define Space 
        params = {
            'changepoint_range': trial.suggest_discrete_uniform('changepoint_range', 0.7, 1.0, 0.1),
            'seasonality_mode': trial.suggest_categorical('seasonality_mode', ['additive', 'multiplicative']),
            'seasonality_prior_scale': trial.suggest_loguniform('seasonality_prior_scale', 0.1, 20),
            'holidays_prior_scale': trial.suggest_loguniform('holidays_prior_scale', 0.1, 20),
            'changepoint_prior_scale': trial.suggest_loguniform('changepoint_prior_scale', 0.001, 1),
            'yearly_order': trial.suggest_int('yearly_order', 1, 20),
            'monthly_order': trial.suggest_int('monthly_order', 1, 20),
            'weekly_order': trial.suggest_int('weekly_order', 1, 20),
            'snap_mode': trial.suggest_categorical('snap_mode', ['additive', 'multiplicative'])
        }
        
        # Define model
        m = Prophet(
            yearly_seasonality=False,
            weekly_seasonality=False,
            daily_seasonality=False,
            uncertainty_samples=False,
            changepoint_range=params['changepoint_range'],
            seasonality_mode=params['seasonality_mode'],
            seasonality_prior_scale=params['seasonality_prior_scale'],
            holidays_prior_scale=params['holidays_prior_scale'],
            changepoint_prior_scale=params['changepoint_prior_scale'],
        )
        
        m.add_seasonality(
            name='yearly', 
            period=365.25, 
            fourier_order=params['yearly_order'],
        )
        
        m.add_seasonality(
            name='monthly', 
            period=365.25/12, 
            fourier_order=params['monthly_order'],
        )
        
        m.add_seasonality(
            name='weekly', 
            period=7, 
            fourier_order=params['weekly_order'],
        )
        
        # Add holidays/regressor
        m.add_country_holidays(country_name='US')
    
        m.add_regressor('snap', mode=params['snap_mode'])
            
        m.add_regressor('price')
                    
        # Fit 
        m.fit(df)
        
        # TS validation
        initial = str(df['d'].max() - 28 - 1) + ' days' # only one cutoff to validate
        df_cv = cross_validation(m, horizon='28 days', period='28 days', initial=initial)
        
        # Calculate Error
        score = np.mean(np.square(df_cv['y'] - df_cv['yhat']))
        scale = np.mean(np.square(np.diff(df['y'].values[:-28]).astype(np.float32)))
        rmsse = np.sqrt(score / scale)
        
        return rmsse
    
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=100)
    
    return study.best_value, study.best_params

## Tune all series in parallel

In [None]:
ts_ids = prophet_df[['store_id', 'dept_id']].drop_duplicates().reset_index(drop=True)
len(ts_ids)

In [None]:
opt = Parallel(n_jobs=-1, verbose=1)\
              (delayed(optimize_prophet)(row['store_id'], row['dept_id']) for _, row in ts_ids.iterrows())

## Format & save outpus

In [None]:
prophet_params = pd.DataFrame(opt).rename(columns={0 : 'rmsse', 1 : 'params'})
prophet_params = pd.concat([ts_ids, prophet_params], axis=1)

In [None]:
dt = datetime.datetime.now().isoformat("-").split(".")[0].replace(":","-")

prophet_params.to_csv(DATA_PATH + 'external/params_prophet_store_dpt_' + PERIOD_LABEL + '_' + dt + '.csv', 
                      index=False)