In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import pandas as pd
import numpy as np
import os
import metrics

In [4]:
MP = '../../' # MP means Main Path
DP = MP + 'data/'
def load_pickle(path):
    with open(path, 'rb') as f:
        return pickle.load(f)

## Load predictions

In [5]:
point_predictions = dict()
sampled_ids = np.load(f'{DP}interim/sampled_ids.npy')
ste = pd.read_pickle(DP + 'processed/sales_train_evaluation.pkl')

### Point forecasts

#### deepAR

In [6]:
DEEPAR_DP = '../../models/'
DEEPAR_NAME = 'deepar_305'
deepar_forecasts = load_pickle(f'{DEEPAR_DP}{DEEPAR_NAME}_predictions/forecasts.pkl')
point_predictions['deepar'] = np.array(
    [f.mean for f in deepar_forecasts])

#### GP

In [7]:
m_name = 'GP_305'
model_path = f'{MP}models/{m_name}/'
point_predictions['gp'] = np.load(f'{model_path}means.npy').transpose()

#### Croston

In [8]:
point_predictions['croston'] = pd.read_csv(MP + 'models/croston/predictions.csv', index_col='id') \
                            .drop('alpha', axis=1) \
                            .loc[sampled_ids] \
                            .to_numpy()

#### CrostonTSB

In [9]:
point_predictions['crostontsb'] = pd.read_csv(MP + 'models/croston_tsb/predictions.csv', index_col='id') \
                            .drop(['alpha', 'beta'], axis=1) \
                            .loc[sampled_ids] \
                            .to_numpy()

#### Dummy prediction

In [10]:
m_name = 'dummy'

In [11]:
dummy = np.zeros(point_predictions['croston'].shape)
point_predictions[m_name] = dummy

## Find metrics

### Point forecasts

In [12]:
from metrics import create_mase

In [13]:
sampled_tss = ste \
                     .loc[sampled_ids] \
                     .to_numpy()

In [14]:
sampled_tss.shape

(305, 1941)

In [15]:
y_trains, y_tests = sampled_tss[:, :-28], sampled_tss[:, -28:]

In [16]:
result_df = pd.DataFrame(columns=['MASE','WRMSSE'],
                         index=point_predictions.keys(),
                         dtype='float64')
result_df.index = result_df.index.rename('models')

#### MASE

In [17]:
mase = create_mase(y_tests, y_trains)

In [18]:
for model in point_predictions:
    y_pred = point_predictions[model]
    result_df.loc[model, 'MASE'] = mase(y_pred)

In [19]:
result_df

Unnamed: 0_level_0,MASE,WRMSSE
models,Unnamed: 1_level_1,Unnamed: 2_level_1
deepar,1.69484,
gp,1.829102,
croston,1.726859,
crostontsb,1.679214,
dummy,1.613721,


#### RMSSE

In [20]:
from sktime.performance_metrics.forecasting import mean_squared_scaled_error

In [21]:
def create_rmsse(y_tests, y_trains):
    def metric(y_preds):
        metrics = np.zeros(y_preds.shape[0])

        metric_ = np.mean(mean_squared_scaled_error(y_train=y_trains.transpose(),
                                                    y_true=y_tests.transpose(),
                                                    y_pred=y_pred.transpose(),
                                                    square_root=True,
                                                    multioutput='raw_values'),
                          axis=0)
        return metric_
    return metric

In [22]:
big_rmsse = create_rmsse(y_tests, y_trains)

In [23]:
metric = 'RMSSE'
result_df[metric] = -1

In [24]:
for model in point_predictions:
    y_pred = point_predictions[model]
    result_df.loc[model, metric] = big_rmsse(y_pred)

In [25]:
result_df

Unnamed: 0_level_0,MASE,WRMSSE,RMSSE
models,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
deepar,1.69484,,0.914777
gp,1.829102,,0.933273
croston,1.726859,,0.943407
crostontsb,1.679214,,0.954094
dummy,1.613721,,1.156502


#### WRMSSE

In [26]:
from utils.evaluation import calc_eval_metric, WRMSSEEvaluator

In [27]:
df_train_eval = pd.read_csv(DP + 'external/sales_train_evaluation.csv')
calendar = pd.read_csv(DP + 'external/calendar.csv')
selling_prices = pd.read_csv(DP + 'external/sell_prices.csv')

In [28]:
key_names = ["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"]
all_ids = df_train_eval["id"].unique()
date_names = ["d_" + str(i) for i in range(1, 1942)]
dates = calendar["date"].unique()
test_steps = 28

In [29]:
df_sampled = (
    df_train_eval.set_index("id").loc[sampled_ids].reset_index()
)
df_train_sampled = df_sampled.loc[:, key_names + date_names[:-test_steps]]
df_test_sampled = df_sampled.loc[:, date_names[-test_steps:]]

wrmsse_evaluator = WRMSSEEvaluator(
    df_train_sampled, df_test_sampled, calendar, selling_prices, test_steps
)

  0%|          | 0/12 [00:00<?, ?it/s]

In [30]:
# predictions = [forecast.mean for forecast in forecasts]
def find_wrmsse(point_predictions, _result_df, col='WRMSSE'):
    result_df = _result_df.copy()
    for model in point_predictions:
        predictions = point_predictions[model]
        df_pred_sampled = pd.DataFrame(predictions, columns=df_test_sampled.columns)
        eval_metrics = calc_eval_metric(df_test_sampled, df_pred_sampled)
        wrmsse = wrmsse_evaluator.score(df_pred_sampled)
        result_df.loc[model, col] = wrmsse
    return result_df

In [31]:
result_df = find_wrmsse(point_predictions, result_df)

  return np.nanmean(2 * np.abs((y_true - y_pred) / (y_true + y_pred)))
  return np.nanmean(2 * np.abs((y_true - y_pred) / (y_true + y_pred)))
  return np.nanmean(2 * np.abs((y_true - y_pred) / (y_true + y_pred)))


In [32]:
result_df.transpose().loc[['MASE','RMSSE','WRMSSE'], ['dummy','croston', 'crostontsb', 'deepar', 'gp']].round(3)

models,dummy,croston,crostontsb,deepar,gp
MASE,1.614,1.727,1.679,1.695,1.829
RMSSE,1.157,0.943,0.954,0.915,0.933
WRMSSE,3.119,0.898,0.901,0.748,0.924


### Probabilistic forecasts

#### Load in quantile forecasts (used for WSPL)

In [33]:
quantile_forecasts = dict()

##### DeepAR

In [34]:
DEEPAR_DP = '../../models/'
DEEPAR_NAME = 'deepar_305'
forecasts_folder = f'{DEEPAR_DP}{DEEPAR_NAME}_predictions/'
quantile_forecasts['deepar'] = pd.read_pickle(f'{forecasts_folder}quantile_forecasts.pkl')

##### GP

In [35]:
m_name = 'GP_305'
model_path = f'{MP}models/{m_name}/'
forecasts_folder = model_path
quantile_forecasts['gp'] = pd.read_pickle(f'{forecasts_folder}quantile_forecasts.pkl')

#### Continuous-Ranked Probability Score (CRPS)

##### DeepAR

In [36]:
%%time
forecasts = deepar_forecasts
prob_preds = np.array([forecast.samples for forecast in forecasts])

dar_crpss = metrics.CRPSs(y_tests, prob_preds, use_ps=True)
dar_crpss = np.mean(dar_crpss)

result_df['CRPS'] = pd.NA
result_df.loc['deepar', 'CRPS'] = dar_crpss

Wall time: 229 ms


In [37]:
result_df

Unnamed: 0_level_0,MASE,WRMSSE,RMSSE,CRPS
models,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
deepar,1.69484,0.747801,0.914777,0.575653
gp,1.829102,0.924087,0.933273,
croston,1.726859,0.898053,0.943407,
crostontsb,1.679214,0.90134,0.954094,
dummy,1.613721,3.119003,1.156502,


##### GP

In [38]:
prob_preds.shape

(305, 1000, 28)

In [39]:
# means = point_predictions['GP']
prob_preds = np.load(f'{forecasts_folder}np_predictions.npy')
gp_crpss = metrics.CRPSs(y_tests, prob_preds, use_ps=True)
gp_crpss = np.mean(gp_crpss)

# result_df['CRPS'] = pd.NA
result_df.loc['gp', 'CRPS'] = gp_crpss

In [40]:
result_df

Unnamed: 0_level_0,MASE,WRMSSE,RMSSE,CRPS
models,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
deepar,1.69484,0.747801,0.914777,0.575653
gp,1.829102,0.924087,0.933273,0.644072
croston,1.726859,0.898053,0.943407,
crostontsb,1.679214,0.90134,0.954094,
dummy,1.613721,3.119003,1.156502,


#### WSPL

In [42]:
ste = pd.read_csv(f'{DP}external/sales_train_evaluation.csv')
cal = pd.read_csv(f'{DP}external/calendar.csv')
prices = pd.read_csv(f'{DP}external/sell_prices.csv')

ste = ste.set_index('id').loc[sampled_ids].reset_index()

In [43]:
import utils.m5_helpers
wspl_eval = utils.m5_helpers.WSPLEvaluator(ste, cal, prices, start_test=1914)
result_df['WSPL'] = pd.NA

##### DeepAR

In [44]:
wspl_eval.score_all(quantile_forecasts['deepar'].to_numpy())
result_df.loc['deepar', 'WSPL'] = wspl_eval.total_score

Total score is 0.1995079616291688


##### GP

In [45]:
wspl_eval.score_all(quantile_forecasts['gp'].to_numpy())
result_df.loc['gp', 'WSPL'] = wspl_eval.total_score

Total score is 0.2558884855148303


In [46]:
result_df

Unnamed: 0_level_0,MASE,WRMSSE,RMSSE,CRPS,WSPL
models,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
deepar,1.69484,0.747801,0.914777,0.575653,0.199508
gp,1.829102,0.924087,0.933273,0.644072,0.255888
croston,1.726859,0.898053,0.943407,,
crostontsb,1.679214,0.90134,0.954094,,
dummy,1.613721,3.119003,1.156502,,


In [47]:
result_df.to_csv('metrics_305.csv')

In [48]:
result_df.round(3)

Unnamed: 0_level_0,MASE,WRMSSE,RMSSE,CRPS,WSPL
models,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
deepar,1.695,0.748,0.915,0.575653,0.199508
gp,1.829,0.924,0.933,0.644072,0.255888
croston,1.727,0.898,0.943,,
crostontsb,1.679,0.901,0.954,,
dummy,1.614,3.119,1.157,,
