In [13]:
import pandas as pd
import numpy as np
# from utils.evals import *
UNIT_DICT = {"amazon-google": "H", "m5": "D", "glucose": "T", "meditation": "S"}
UNIT_NUM_DICT = {"amazon-google": 1, "m5": 1, "glucose": 5, "meditation": 1}

In [14]:
dataset = 'meditation'
model = 'nbeats'
confidence = 0.80
quantiles = [0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]

unit = UNIT_DICT[dataset]
unit_num = UNIT_NUM_DICT[dataset]
results_fn = f"model_results/{dataset}/{model}/median_preds.csv"
lower_fn = f"model_results/{dataset}/{model}/quantile_{round((1-confidence)/2 * 100)}_preds.csv"
upper_fn = f"model_results/{dataset}/{model}/quantile_{round((1+confidence)/2 * 100)}_preds.csv"
data_fn = f"data/{dataset}/y_{dataset}.csv"
results_df = pd.read_csv(results_fn, index_col=0, parse_dates=['ds'])
upper_df = pd.read_csv(upper_fn, index_col=0, parse_dates=['ds'])
lower_df = pd.read_csv(lower_fn, index_col=0, parse_dates=['ds'])
data_df = pd.read_csv(data_fn, index_col=0, parse_dates=['ds'])
freq_delta = pd.Timedelta(unit_num, unit=unit)
    
quantiles_df = []
for quantile in quantiles:
    quantile_fn = f"model_results/{dataset}/{model}/quantile_{round(quantile * 100)}_preds.csv"
    quantiles_df.append(pd.read_csv(quantile_fn, index_col=0, parse_dates=['ds']))
quantiles_df.insert(len(quantiles_df)//2, results_df)
quantiles.insert(len(quantiles)//2, 0.5)
quantiles_dict = dict(zip(quantiles, quantiles_df))

In [15]:
def mae(results_df, data_df, freq_delta):
    pred_length = int(results_df.columns[-1])
    mae_arr = []
    for h in range(1,pred_length+1):
        shift_results = results_df[['ds', 'unique_id', str(h)]]
        shift_results.loc[:,'ds'] += freq_delta * h
        merged_results = pd.merge(data_df, shift_results, on=['unique_id', 'ds'], how='inner')
        mean_abs_error = np.mean(np.abs(merged_results['y'] - merged_results[str(h)]))
        mae_arr.append(mean_abs_error)
    return np.mean(mae_arr), mae_arr

print(mae(results_df, data_df, freq_delta))

(2.772543402777778, [1.8771999999999995, 1.9096166666666659, 1.531966666666668, 3.4375000000000013, 2.663766666666669, 3.166800000000002, 2.2241166666666685, 3.8612, 3.863350000000002, 3.081416666666667, 3.577416666666666, 2.2422666666666657, 2.2055666666666647, 2.727266666666665, 2.694033333333335, 2.9475333333333325, 2.967950000000002, 3.805066666666665, 1.9447500000000038, 1.9659500000000005, 2.538116666666666, 3.5265833333333325, 2.7521666666666653, 1.9840333333333338, 2.6167000000000016, 2.862616666666664, 2.5954666666666646, 1.8530000000000015, 3.0240999999999993, 1.9640000000000033, 1.6238499999999998, 1.8678166666666662, 3.451816666666668, 3.6598833333333354, 2.736116666666669, 2.7474833333333337, 2.6948666666666674, 3.611400000000001, 2.26933333333333, 0.9703666666666658, 1.5680000000000025, 2.935066666666669, 3.433100000000003, 3.899500000000001, 3.854683333333334, 4.142450000000003, 2.993933333333333, 4.210900000000001])


In [16]:
def mase(results_df, data_df, freq_delta):
    pred_length = int(results_df.columns[-1])
    mae_arr = []
    for h in range(1,pred_length+1):
        shift_results = results_df[['ds', 'unique_id', str(h)]]
        shift_results.loc[:,'ds'] += freq_delta * h
        merged_results = pd.merge(data_df, shift_results, on=['unique_id', 'ds'], how='inner')
        mean_abs_error = np.mean(np.abs(merged_results['y'] - merged_results[str(h)]))
        mae_arr.append(mean_abs_error)
    
    # naive mae
    shift_results = data_df.copy()
    shift_results.loc[:, 'ds'] -= freq_delta
    shift_results = shift_results.rename(columns={"y": "1"})
    merged_results = pd.merge(data_df, shift_results, on=['unique_id', 'ds'], how='inner')
    mae_n = np.mean(np.abs(merged_results['y'] - merged_results["1"]))
    return np.mean(mae_arr) / mae_n, np.array(mae_arr) / mae_n

print(mase(results_df, data_df, freq_delta))

(1.027350563107045, array([0.69558604, 0.70759785, 0.56766174, 1.27374654, 0.98704395,
       1.17344016, 0.82413408, 1.43074622, 1.43154289, 1.14180184,
       1.32559188, 0.83085946, 0.81726048, 1.01057352, 0.99825909,
       1.09219211, 1.09975739, 1.40994633, 0.72061631, 0.72847186,
       0.94048504, 1.30675587, 1.01980008, 0.73517254, 0.96960366,
       1.06072671, 0.96173576, 0.68661886, 1.12056346, 0.72774929,
       0.6017086 , 0.69210909, 1.2790515 , 1.35614945, 1.01385284,
       1.01806469, 0.99856788, 1.33818422, 0.84088887, 0.35956398,
       0.58101369, 1.08757266, 1.27211614, 1.44493807, 1.42833151,
       1.53496185, 1.10938537, 1.56032561]))


In [21]:
def tce(lower_df, upper_df, data_df, freq_delta, confidence):    
    pred_length = int(lower_df.columns[-1])
    outside_ratio = (1-confidence)/2
    tce_arr = []
    for h in range(1,pred_length+1):
        shift_lower = lower_df[['ds', 'unique_id', str(h)]]
        print(shift_lower.shape)
        print(shift_lower)
        shift_lower.loc[:,'ds'] += freq_delta * h
        print(shift_lower.shape)
        print(shift_lower)
        shift_upper = upper_df[['ds', 'unique_id', str(h)]]
        shift_upper.loc[:,'ds'] += freq_delta * h
        merged_upper = pd.merge(data_df, shift_upper, on=['unique_id', 'ds'], how='inner')
        merged_lower = pd.merge(data_df, shift_lower, on=['unique_id', 'ds'], how='inner')
        print(lower_df.shape, merged_upper.shape, merged_lower.shape)
        mean_upper_outside = np.mean(merged_upper['y'] > merged_upper[str(h)])
        mean_lower_outside = np.mean(merged_lower['y'] < merged_lower[str(h)])
        tce_arr.append(abs(outside_ratio - mean_upper_outside) + abs(outside_ratio - mean_lower_outside))
    return np.mean(tce_arr), np.array(tce_arr)
print(tce(lower_df, upper_df, data_df, freq_delta, confidence))

(6, 3)
                   ds  unique_id          1
0 2000-01-01 00:04:59          0  60.000000
1 2000-01-01 00:04:59          1  50.000000
2 2000-01-01 00:04:59          2  67.000000
3 2000-01-01 00:04:59          3  51.000000
4 2000-01-01 00:04:59          4  50.950001
5 2000-01-01 00:04:59          5  54.000000
(6, 3)
                   ds  unique_id          1
0 2000-01-01 00:05:00          0  60.000000
1 2000-01-01 00:05:00          1  50.000000
2 2000-01-01 00:05:00          2  67.000000
3 2000-01-01 00:05:00          3  51.000000
4 2000-01-01 00:05:00          4  50.950001
5 2000-01-01 00:05:00          5  54.000000
(6, 50) (6, 4) (6, 4)
(6, 3)
                   ds  unique_id          2
0 2000-01-01 00:04:59          0  61.950001
1 2000-01-01 00:04:59          1  47.000000
2 2000-01-01 00:04:59          2  68.000000
3 2000-01-01 00:04:59          3  53.000000
4 2000-01-01 00:04:59          4  49.000000
5 2000-01-01 00:04:59          5  54.950001
(6, 3)
                   ds  uni

In [93]:
def wql(quantiles_dict, data_df, freq_delta):
    '''
    returns: weighted quantile loss, (n_quantiles, pred_length) WQL array
    '''
    ql_arr = []
    for quantile, quantile_df in quantiles_dict.items():
        quantile_ql_arr = []
        pred_length = int(quantile_df.columns[-1])
        for h in range(1,pred_length+1):
            shift_results = results_df[['ds', 'unique_id', str(h)]]
            shift_results.loc[:,'ds'] += freq_delta * h
            merged_results = pd.merge(data_df, shift_results, on=['unique_id', 'ds'], how='inner')
            quantile_loss = np.mean((2*(1-quantile)*(merged_results[str(h)] - merged_results['y'])*(merged_results[str(h)] >= merged_results['y'])) \
                            + (2*(quantile)*(merged_results['y'] - merged_results[str(h)])*(merged_results[str(h)] < merged_results['y'])))
            quantile_ql_arr.append(quantile_loss)
        ql_arr.append(quantile_ql_arr)

    scale = np.sum(merged_results['y'])
    wql_arr = np.array(ql_arr) / scale
    return np.sum(wql_arr), wql_arr
            
wql_val, wql_arr = wql(quantiles_dict, data_df, freq_delta)
print(wql_val, wql_arr.shape, np.mean(wql_arr))

0.0038484083119834514 (9, 48) 8.908352574035768e-06


In [8]:
def msis(lower_df, upper_df, data_df, freq_delta, confidence):    
    pred_length = int(results_df.columns[-1])
    mis_arr = []
    for h in range(1,pred_length+1):
        shift_lower = lower_df[['ds', 'unique_id', str(h)]]
        shift_lower.loc[:,'ds'] += freq_delta * h
        shift_upper = upper_df[['ds', 'unique_id', str(h)]]
        shift_upper.loc[:,'ds'] += freq_delta * h
        merged_upper = pd.merge(data_df, shift_upper, on=['unique_id', 'ds'], how='inner')
        merged_lower = pd.merge(data_df, shift_lower, on=['unique_id', 'ds'], how='inner')
        mean_interval_score = np.mean( (merged_upper[str(h)] - merged_lower[str(h)]) \
                                      + confidence * (merged_lower[str(h)] - merged_lower['y']) * (merged_lower['y'] < merged_lower[str(h)]) \
                                      + confidence * (merged_upper['y'] - merged_upper[str(h)]) * (merged_upper['y'] > merged_upper[str(h)]) )
        mis_arr.append(mean_interval_score)
    
    # naive mae
    shift_results = data_df.copy()
    shift_results.loc[:, 'ds'] -= freq_delta
    shift_results = shift_results.rename(columns={"y": "1"})
    merged_results = pd.merge(data_df, shift_results, on=['unique_id', 'ds'], how='inner')
    mae_n = np.mean(np.abs(merged_results['y'] - merged_results["1"]))

    return np.mean(mis_arr) / mae_n, np.array(mis_arr) / mae_n

print(msis(lower_df, upper_df, data_df, freq_delta, confidence))

(1.9698064437374196, array([1.59909541, 1.69047787, 1.7798634 , 1.82855429, 1.83496945,
       1.86386496, 1.89763052, 1.89564952, 1.87658962, 1.88554238,
       1.88561317, 1.90186529, 1.92080159, 1.9247371 , 1.93812584,
       1.96945075, 1.97422951, 1.94015624, 1.94902403, 1.94419349,
       1.97811766, 1.96131106, 1.97714109, 1.96569066, 1.98812767,
       2.00991368, 2.01288925, 2.04183722, 2.06745672, 2.06035704,
       2.0416558 , 2.04050521, 2.03891777, 2.02324464, 2.05736086,
       2.0875128 , 2.09095392, 2.0825263 , 2.08135843, 2.08403165,
       2.04955599, 2.0415112 , 2.03088281, 2.0284644 , 2.04071957,
       2.04759876, 2.05401842, 2.06661426]))
