In [1]:
import numpy as np
import pandas as pd
import os
import sys
from scipy.stats import ttest_rel

In [2]:
INFERRED_PREDICATE_FILE_NAME = "SERIES.txt"
TRUTH_PREDICATE_FILE_NAME = "Series_truth.txt"
AR_BASELINE_FILE_NAME = "ARBaseline_obs.txt"
CLUSTER_BASELINE_NAME = "ARBaselineNaiveTD_obs.txt"

In [42]:
METRICS = ["NRMSE", "MASE"]

In [19]:
def mase(a, f, scale):
    return np.mean(np.abs(a - f)) / scale

def smape(a, f):
    return 1/len(a) * np.sum(2 * np.abs(f-a) / (np.abs(a) + np.abs(f))*100)

In [33]:
def eval_run(data_dir, result_dir, n_folds, method_name, results_df, params, eval_baseline=False, cluster_baseline=False):
    
    mase_scale_factors = dict()

    for line in open(os.path.join(data_dir, "mase_scale.txt"), "r").readlines():
        tokens = line.rstrip().split("\t")
        mase_scale_factors[tokens[0]] = float(tokens[1])

    stddevs = dict()

    for line in open(os.path.join(data_dir, "stddevs.txt"), "r").readlines():
        tokens = line.rstrip().split("\t")
        stddevs[tokens[0]] = float(tokens[1])
        
    result_rows = []    
    
    for i in range(30):
        fold_dir = str(i).zfill(3)
        if not os.path.isdir(os.path.join(result_dir, fold_dir)):
            continue

        result_fold_dir = os.path.join(result_dir, fold_dir)
        truth_fold_dir = os.path.join(data_dir, fold_dir)

        truth_lines = open(os.path.join(truth_fold_dir, TRUTH_PREDICATE_FILE_NAME), "r").readlines()

        if eval_baseline:
            result_lines = open(os.path.join(truth_fold_dir, AR_BASELINE_FILE_NAME), "r").readlines()
        else:
            result_lines = open(os.path.join(result_fold_dir, INFERRED_PREDICATE_FILE_NAME), "r").readlines()

        if cluster_baseline:
            result_lines = open(os.path.join(truth_fold_dir, CLUSTER_BASELINE_NAME), "r").readlines()


        truth_dict = dict()
        result_dict = dict()
        ar_baseline_dict = dict()

        for line in truth_lines:
            tokens = line.split("\t")
            series_id = tokens[0]
            timestep = tokens[1]
            val = tokens[2].rstrip()

            if series_id not in truth_dict:
                truth_dict[series_id] = dict()

            truth_dict[series_id][timestep] = float(val)

        for line in result_lines:
            tokens = line.split("\t")
            series_id = tokens[0]
            timestep = tokens[1]
            val = tokens[2].rstrip()

            if series_id not in result_dict:
                result_dict[series_id] = dict()

            result_dict[series_id][timestep] = float(val)


        for series in truth_dict.keys():
            abs_errors = []

            sq_errors = []

            truth_values = []
            
            predicted_values = []

            for timestep in sorted(list(truth_dict[series].keys())):
                truth_values += [truth_dict[series][timestep]]
                predicted_values += [result_dict[series][timestep]]

                ts_abs_error = np.abs(truth_dict[series][timestep] - result_dict[series][timestep])
                abs_errors += [ts_abs_error]
                sq_errors += [ts_abs_error ** 2]

            s_mase = mase(np.array(truth_values), np.array(predicted_values), mase_scale_factors[series])

            s_nrmse = np.sqrt(np.mean(sq_errors)) / stddevs[series]

            result_rows += [{"base_noise_std": params[0], "clus_or_std": params[1], "temp_or_std": params[2], "Series_ID": series, "Forecast_Window": fold_dir, "Method": method_name, "NRMSE": s_nrmse,
                                                              "MASE": s_mase}]

    return pd.DataFrame(result_rows)

In [34]:
models = ["cw_hard", "temporal_hard"]
base_noise_stds = [0, 0.5, 1, 1.5, 2]
n_folds = 30

results_df = pd.DataFrame(columns=["base_noise_std", "clus_or_std", "temp_or_std", "Series_ID", "Forecast_Window", "Method", "NRMSE", "SMAPE", "MASE"])

for base_noise_std in base_noise_stds:
    clus_or_std = 0
    temp_or_std = 0
    
    for model in models:
        print("Evaluating " + model + " with base noise " + str(base_noise_std))
        
        data_dir = "data/E1_fixednoise/base_noise_" + str(base_noise_std) + "/clus_or_variance_0/cross_cov_0/temp_or_variance_0/window_size_4/eval"
        result_dir = "results/Online/E1_fixednoise/base_noise_" + str(base_noise_std) + "/clus_or_variance_0/cross_cov_0/temp_or_variance_0/window_size_4/" + model + "/inferred-predicates"

        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "PSL_" + str(model), results_df, [base_noise_std, clus_or_std, temp_or_std])])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True)])

Evaluating cw_hard with base noise 0
Evaluating temporal_hard with base noise 0
Evaluating cw_hard with base noise 0.5
Evaluating temporal_hard with base noise 0.5
Evaluating cw_hard with base noise 1
Evaluating temporal_hard with base noise 1
Evaluating cw_hard with base noise 1.5


  """


Evaluating temporal_hard with base noise 1.5
Evaluating cw_hard with base noise 2
Evaluating temporal_hard with base noise 2


In [41]:
for metric in METRICS:
    for model in models:
        for std in base_noise_stds:
            window_vals = []
            ar_window_vals = []

            for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
                window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
                ar_window_vals += [np.mean(group[metric].values)]

            print("Base_noise = " + str(std) + " PSL Model: " + model + " " + metric + " = " + str(np.mean(window_vals)) + " +- " + str(np.std(window_vals)) + "\n")
            print("B_noise = " + str(std) + " AR " + metric + " = " + str(np.mean(ar_window_vals)) + " +- " + str(np.std(ar_window_vals)) + "\n\n")

Base_noise = 0 PSL Model: cw_hard NRMSE = 0.7112026991711417 +- 0.036120712953674874

B_noise = 0 AR NRMSE = 0.708958330350146 +- 0.03738423023102443


Base_noise = 0.5 PSL Model: cw_hard NRMSE = 0.7274134036302561 +- 0.03666813617474336

B_noise = 0.5 AR NRMSE = 0.7258643502935178 +- 0.03785918622817364


Base_noise = 1 PSL Model: cw_hard NRMSE = 0.7761551649141907 +- 0.038112709628265226

B_noise = 1 AR NRMSE = 0.7755301642025221 +- 0.03898970257877851


Base_noise = 1.5 PSL Model: cw_hard NRMSE = 0.8502352412185642 +- 0.04338907426040914

B_noise = 1.5 AR NRMSE = 0.8487911435340538 +- 0.042937548902763675


Base_noise = 2 PSL Model: cw_hard NRMSE = 0.9403340695710869 +- 0.0513364536366314

B_noise = 2 AR NRMSE = 0.938994213549593 +- 0.04981251731621002


Base_noise = 0 PSL Model: temporal_hard NRMSE = 0.7101829806561194 +- 0.03613703803719058

B_noise = 0 AR NRMSE = 0.708958330350146 +- 0.03738423023102443


Base_noise = 0.5 PSL Model: temporal_hard NRMSE = 0.7281582056524499 +- 0.0