In [None]:
import pandas as pd
import numpy as np
import pickle

import plotly.offline as py
import optuna
import json
import datetime

from joblib import Parallel, delayed
from fbprophet import Prophet
from fbprophet.plot import *
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics

## Params

In [None]:
START_DAY = 0
END_SALES = 1913

## Load interim data

In [None]:
grid_df = pd.read_pickle("data/interim/grid_df.pkl")

## Reduce on params period

In [None]:
if START_DAY > 0:
    grid_df = grid_df[START_DAY <= grid_df['d']]

## Format data for Prophet

In [None]:
grid_df['snap'] = grid_df['snap_CA']
grid_df.loc[grid_df['state_id'] == 'TX', 'snap'] = grid_df.loc[grid_df['state_id'] == 'TX', 'snap_TX'].values
grid_df.loc[grid_df['state_id'] == 'WI', 'snap'] = grid_df.loc[grid_df['state_id'] == 'WI', 'snap_WI'].values
grid_df['snap'] = grid_df['snap'].astype(int)

In [None]:
prophet_df = grid_df.groupby(['store_id', 'dept_id', 'd']).agg(ds=('date', 'max'), 
                                                               y=('sales', 'sum'),
                                                               snap=('snap', 'max'),
                                                               price=('sell_price', 'max')).reset_index()

prophet_df['id'] = prophet_df['store_id'].astype(str) + '_' + prophet_df['dept_id'].astype(str)
prophet_df.loc[prophet_df['d'] > END_SALES, 'y'] = np.nan # because in pandas, sum of NaN == 0

prophet_df = prophet_df[['id', 'd', 'ds', 'y', 'snap', 'price']]

del grid_df

## Define tuning function

In [None]:
def optimize_prophet(id):
    
    # Reduce df on current
    df = prophet_df[prophet_df['id'] == id].copy()
    df.dropna(inplace=True)

    def objective(trial):
        # Define Space 
        params = {
            'n_changepoints': trial.suggest_int('n_changepoints', 1, 100),
            'changepoint_range': trial.suggest_loguniform('changepoint_range', 0.5, 1.0),
            'seasonality_mode': trial.suggest_categorical('seasonality_mode', ['additive', 'multiplicative']),
            'seasonality_prior_scale': trial.suggest_loguniform('seasonality_prior_scale', 0.01, 30),
            'holidays_prior_scale': trial.suggest_loguniform('holidays_prior_scale', 0.01, 30),
            'changepoint_prior_scale': trial.suggest_loguniform('changepoint_prior_scale', 0.01, 30),
            # ---------
            'yearly_order': trial.suggest_int('yearly_order', 1, 50),
            'monthly_order': trial.suggest_int('monthly_order', 1, 50),
            'weekly_order': trial.suggest_int('weekly_order', 1, 50),
            # ---------
            'add_holidays': trial.suggest_categorical('add_holidays', [True, False]),
            'add_snap': trial.suggest_categorical('add_snap', [True, False]),
            'add_price': trial.suggest_categorical('add_price', [True, False])
        }
        
        # Define model
        m = Prophet(
            yearly_seasonality=False,
            weekly_seasonality=False,
            daily_seasonality=False,
            uncertainty_samples=False,
            n_changepoints=params['n_changepoints'],
            changepoint_range=params['changepoint_range'],
            seasonality_mode=params['seasonality_mode'],
            seasonality_prior_scale=params['seasonality_prior_scale'],
            holidays_prior_scale=params['holidays_prior_scale'],
            changepoint_prior_scale=params['changepoint_prior_scale'],
        )
        
        m.add_seasonality(
            name='yearly', 
            period=365.25, 
            fourier_order=params['yearly_order'],
        )
        
        m.add_seasonality(
            name='monthly', 
            period=365.25/12, 
            fourier_order=params['monthly_order'],
        )
        
        m.add_seasonality(
            name='weekly', 
            period=7, 
            fourier_order=params['weekly_order'],
        )
        
        # Add holidays/regressor
        if params['add_holidays']:
            m.add_country_holidays(country_name='US')
    
        if params['add_snap']:
            params['snap_mode'] = trial.suggest_categorical('snap_mode', ['additive', 'multiplicative'])
            m.add_regressor('snap', mode=params['snap_mode'])
            
        if params['add_price']:
            m.add_regressor('price')
                    
        # Fit 
        m.fit(df)
        
        # TS validation
        df_cv = cross_validation(m, horizon = '28 days', period='28 days', initial='1548 days')
        
        # Calculate Error
        rmse = np.sqrt(np.mean(np.square(df_cv['y'] - df_cv['yhat'])))
        
        return rmse
    
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=1)
    
    return study.best_value, study.best_params

## Tune all series in parallel

In [None]:
l_id = prophet_df['id'].unique()

print(len(l_id))

In [None]:
opt = Parallel(n_jobs=-1, verbose=1)(delayed(optimize_prophet)(id) for id in l_id[:2])

## Format & save outpus

In [None]:
prophet_params = {}
for i, r in enumerate(opt):
    prophet_params[l_id[i]] = {'rmse': r[0], 'params': r[1]}

In [None]:
dt = datetime.datetime.now().isoformat("-").split(".")[0].replace(":","-")

with open('prophet_params_' + dt + '.json', 'w') as json_file:
    json.dump(prophet_params, json_file)