In [1]:
from utils import import_raw_data
from validation import (
    validate_ql_DeepAR, 
    validate_ql_intermittentGP, 
    validate_ql_baseline, 
    validate_ql_DLinear, 
    validate_smooth_ts,
    validate_local_fn
)
from reconciliation import temporal_recon_ts
from forecasters import gp
import numpy as np
import pandas as pd
import torch
#from statsmodels.tsa.holtwinters import ExponentialSmoothing
#from tweediegp.intermittent_gp import intermittentGP




## Validating local methods on smooth time series

In [None]:
datasets = import_raw_data()

experiments = []

for likelihood in ["negbin", "gaussian"]:
    for kernel in [None, "periodic"]:
        for threshold in [3.5, 4., 4.5, 5.]:
            for h_recon in [3, 4]:

                torch.manual_seed(42)
                np.random.seed(42)

                exp_dict = {"likelihood":likelihood, "kernel":kernel, "h_recon":h_recon, "threshold":threshold}
                print(exp_dict)

                sales, in_stock, master = import_raw_data()
                in_stock = in_stock.iloc[:,:sales.shape[1]]
                colmeans = np.nanmean(np.where(in_stock, sales, np.nan), axis=0)
                in_stock.iloc[:,(colmeans > threshold)] = False
                    
                def negbin_gp(train_y, train_x, test_x):
                        return gp(train_y, train_x, test_x, likelihood="negbin")
                def gp_fn(train_y, train_x, test_x):
                        return gp(train_y, train_x, test_x, likelihood=likelihood, kernel=kernel)
                
                def recon_fn(train_y, observed, h):
                    if torch.all(train_y[observed] > 0.):
                        fn = gp_fn
                    else:
                        fn = negbin_gp
                    recon_samples, _ = temporal_recon_ts(fn, train_y, observed, h_recon)
                    return torch.tensor(recon_samples[:,:h])

                validation_results = validate_local_fn((sales, in_stock, master), recon_fn)

                for T in validation_results.keys():
                    experiments.append(
                        pd.Series(exp_dict | {
                            "T":T,
                            "QL simple":validation_results[T]["quantile_loss_simple"],
                            "QL cumulative":validation_results[T]["quantile_loss_cumulative"]
                        })
                    )

experiments = pd.DataFrame(experiments)

{'likelihood': 'negbin', 'kernel': None, 'h_recon': 3, 'threshold': 3.5}


100%|██████████| 599/599 [04:47<00:00,  2.09it/s]
100%|██████████| 599/599 [04:54<00:00,  2.03it/s]
100%|██████████| 599/599 [05:03<00:00,  1.97it/s]
100%|██████████| 599/599 [04:58<00:00,  2.00it/s]
100%|██████████| 599/599 [04:59<00:00,  2.00it/s]


{'likelihood': 'negbin', 'kernel': None, 'h_recon': 4, 'threshold': 3.5}


100%|██████████| 599/599 [04:51<00:00,  2.06it/s]
100%|██████████| 599/599 [04:45<00:00,  2.10it/s]
100%|██████████| 599/599 [04:52<00:00,  2.05it/s]
100%|██████████| 599/599 [04:45<00:00,  2.10it/s]
100%|██████████| 599/599 [04:46<00:00,  2.09it/s]


{'likelihood': 'negbin', 'kernel': None, 'h_recon': 3, 'threshold': 4.0}


100%|██████████| 599/599 [05:09<00:00,  1.94it/s]
100%|██████████| 599/599 [05:06<00:00,  1.95it/s]
100%|██████████| 599/599 [05:23<00:00,  1.85it/s]
100%|██████████| 599/599 [05:08<00:00,  1.94it/s]
100%|██████████| 599/599 [05:05<00:00,  1.96it/s]


{'likelihood': 'negbin', 'kernel': None, 'h_recon': 4, 'threshold': 4.0}


100%|██████████| 599/599 [05:03<00:00,  1.97it/s]
100%|██████████| 599/599 [05:01<00:00,  1.99it/s]
100%|██████████| 599/599 [04:53<00:00,  2.04it/s]
100%|██████████| 599/599 [05:13<00:00,  1.91it/s]
100%|██████████| 599/599 [05:08<00:00,  1.94it/s]


{'likelihood': 'negbin', 'kernel': None, 'h_recon': 3, 'threshold': 4.5}


100%|██████████| 599/599 [05:05<00:00,  1.96it/s]
100%|██████████| 599/599 [04:58<00:00,  2.01it/s]
100%|██████████| 599/599 [05:13<00:00,  1.91it/s]
100%|██████████| 599/599 [05:17<00:00,  1.88it/s]
100%|██████████| 599/599 [05:13<00:00,  1.91it/s]


{'likelihood': 'negbin', 'kernel': None, 'h_recon': 4, 'threshold': 4.5}


100%|██████████| 599/599 [05:15<00:00,  1.90it/s]
100%|██████████| 599/599 [05:26<00:00,  1.83it/s]
100%|██████████| 599/599 [05:13<00:00,  1.91it/s]
100%|██████████| 599/599 [05:29<00:00,  1.82it/s]
100%|██████████| 599/599 [05:15<00:00,  1.90it/s]


{'likelihood': 'negbin', 'kernel': None, 'h_recon': 3, 'threshold': 5.0}


100%|██████████| 599/599 [05:13<00:00,  1.91it/s]
100%|██████████| 599/599 [05:11<00:00,  1.92it/s]
100%|██████████| 599/599 [05:29<00:00,  1.82it/s]
100%|██████████| 599/599 [05:29<00:00,  1.82it/s]
100%|██████████| 599/599 [05:29<00:00,  1.82it/s]


{'likelihood': 'negbin', 'kernel': None, 'h_recon': 4, 'threshold': 5.0}


100%|██████████| 599/599 [05:39<00:00,  1.76it/s]
100%|██████████| 599/599 [19:44<00:00,  1.98s/it]    
100%|██████████| 599/599 [04:59<00:00,  2.00it/s]
100%|██████████| 599/599 [05:01<00:00,  1.99it/s]
100%|██████████| 599/599 [05:08<00:00,  1.94it/s]


{'likelihood': 'negbin', 'kernel': 'periodic', 'h_recon': 3, 'threshold': 3.5}


100%|██████████| 599/599 [05:05<00:00,  1.96it/s]
100%|██████████| 599/599 [05:10<00:00,  1.93it/s]
100%|██████████| 599/599 [05:04<00:00,  1.97it/s]
 83%|████████▎ | 497/599 [04:16<00:48,  2.10it/s]

In [None]:
experiments_grouped = experiments.groupby(exp_dict.keys(), dropna=False).agg(["mean", "std"])
experiments_grouped.sort_values(("QL simple", "mean"))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,T,T,QL simple,QL simple,QL cumulative,QL cumulative
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
likelihood,kernel,threshold,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
negbin,,4.0,155.0,1.581139,0.968689,0.043263,1.891486,0.1954
gaussian,,3.5,155.0,1.581139,0.977666,0.021626,1.941736,0.153445
gaussian,,4.0,155.0,1.581139,0.978613,0.031331,1.880134,0.186048
negbin,,3.5,155.0,1.581139,0.978798,0.050667,2.02877,0.26239
