In [8]:
import numpy as np
import pandas as pd
import os
import sys
from scipy.stats import ttest_rel
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

In [33]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))


In [2]:
INFERRED_PREDICATE_FILE_NAME = "SERIES.txt"
TRUTH_PREDICATE_FILE_NAME = "Series_truth.txt"
AR_BASELINE_FILE_NAME = "ARBaseline_obs.txt"
CLUSTER_BASELINE_NAME = "ARBaselineNaiveTD_obs.txt"
ADJ_AR_BASELINE_FILE_NAME = "ARBaselineAdj_obs.txt"

In [3]:
METRICS = ["NRMSE", "MASE"]

In [4]:
def mase(a, f, scale):
    return np.mean(np.abs(a - f)) / scale

def smape(a, f):
    return 1/len(a) * np.sum(2 * np.abs(f-a) / (np.abs(a) + np.abs(f))*100)

In [5]:
def eval_run(data_dir, result_dir, n_folds, method_name, results_df, params, temporal_baseline=False, eval_baseline=False, cluster_baseline=False):
    
    mase_scale_factors = dict()

    for line in open(os.path.join(data_dir, "mase_scale.txt"), "r").readlines():
        tokens = line.rstrip().split("\t")
        mase_scale_factors[tokens[0]] = float(tokens[1])

    stddevs = dict()

    for line in open(os.path.join(data_dir, "stddevs.txt"), "r").readlines():
        tokens = line.rstrip().split("\t")
        stddevs[tokens[0]] = float(tokens[1])
        
    result_rows = []    
    
    for i in range(30):
        fold_dir = str(i).zfill(3)
        if not os.path.isdir(os.path.join(result_dir, fold_dir)):
            continue

        result_fold_dir = os.path.join(result_dir, fold_dir)
        truth_fold_dir = os.path.join(data_dir, fold_dir)

        truth_lines = open(os.path.join(truth_fold_dir, TRUTH_PREDICATE_FILE_NAME), "r").readlines()

        if eval_baseline:
            result_lines = open(os.path.join(truth_fold_dir, AR_BASELINE_FILE_NAME), "r").readlines()
        else:
            result_lines = open(os.path.join(result_fold_dir, INFERRED_PREDICATE_FILE_NAME), "r").readlines()

        if cluster_baseline:
            result_lines = open(os.path.join(truth_fold_dir, CLUSTER_BASELINE_NAME), "r").readlines()
            
        if temporal_baseline:
            result_lines = open(os.path.join(truth_fold_dir, ADJ_AR_BASELINE_FILE_NAME), "r").readlines()


        truth_dict = dict()
        result_dict = dict()
        ar_baseline_dict = dict()

        for line in truth_lines:
            tokens = line.split("\t")
            series_id = tokens[0]
            timestep = tokens[1]
            val = tokens[2].rstrip()

            if series_id not in truth_dict:
                truth_dict[series_id] = dict()

            truth_dict[series_id][timestep] = float(val)

        for line in result_lines:
            tokens = line.split("\t")
            series_id = tokens[0]
            timestep = tokens[1]
            val = tokens[2].rstrip()

            if series_id not in result_dict:
                result_dict[series_id] = dict()

            result_dict[series_id][timestep] = float(val)

            
        for series in truth_dict.keys():
            abs_errors = []

            sq_errors = []

            truth_values = []
            
            predicted_values = []

            for timestep in sorted(list(truth_dict[series].keys())):
                truth_values += [truth_dict[series][timestep]]
                predicted_values += [result_dict[series][timestep]]

                ts_abs_error = np.abs(truth_dict[series][timestep] - result_dict[series][timestep])
                abs_errors += [ts_abs_error]
                sq_errors += [ts_abs_error ** 2]

            s_mase = mase(np.array(truth_values), np.array(predicted_values), mase_scale_factors[series])
            
            s_smape = smape(np.array(truth_values), np.array(predicted_values))

            s_nrmse = np.sqrt(np.mean(sq_errors)) / stddevs[series]

            r2 = r2_score(np.array(truth_values), np.array(predicted_values))

            result_rows += [{"base_noise_std": params[0], "clus_or_std": params[1], "temp_or_std": params[2], "Series_ID": series, "Forecast_Window": fold_dir, "Method": method_name, "NRMSE": s_nrmse,
                                                              "MASE": s_mase, "SMAPE": s_smape, "R2": r2}]
            
    return pd.DataFrame(result_rows)

# E1: Vary base noise, temporal constraints

In [67]:
# E1: no base noise, vary cluster oracle noise
# Hard constraint, weighted constraints of 10

models = ["temporal_hard"]
base_noise_stds = [0, 0.5, 1, 1.5, 2]

n_folds = 30

results_df = pd.DataFrame(columns=["base_noise_std", "clus_or_std", "temp_or_std", "Series_ID", "Forecast_Window", "Method", "NRMSE", "MASE"])

for base_noise_std in base_noise_stds:
    temp_or_std = 0
    clus_or_std = 0
    
    for model in models:
        print("Evaluating " + model + " with base noise " + str(base_noise_std) + " and cluster oracle noise std. " + str(clus_or_std))
        data_dir = "data/E1/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_" + str(temp_or_std) + "/window_size_4/eval"
        result_dir = "results/Online/E1/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_" + str(temp_or_std) + "/window_size_4/" + model + "/inferred-predicates"

        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "PSL_" + str(model), results_df, [base_noise_std, clus_or_std, temp_or_std])])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "temporal_AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True, temporal_baseline=True)])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True)])

Evaluating temporal_hard with base noise 0 and cluster oracle noise std. 0
Evaluating temporal_hard with base noise 0.5 and cluster oracle noise std. 0
Evaluating temporal_hard with base noise 1 and cluster oracle noise std. 0
Evaluating temporal_hard with base noise 1.5 and cluster oracle noise std. 0


  return 1/len(a) * np.sum(2 * np.abs(f-a) / (np.abs(a) + np.abs(f))*100)
  return 1/len(a) * np.sum(2 * np.abs(f-a) / (np.abs(a) + np.abs(f))*100)


Evaluating temporal_hard with base noise 2 and cluster oracle noise std. 0


In [68]:
x = base_noise_stds
PSL = []
ARPH = []
AR = []

PSL_err = []
ARPH_err = []
AR_err = []

for metric in METRICS:
    for model in models:
        for base_noise_std in base_noise_stds:
            window_vals = []
            cluster_ar_window_vals = []
            pure_ar_window_vals = []

            for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
                window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "temporal_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
                cluster_ar_window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
                pure_ar_window_vals += [np.mean(group[metric].values)]

            print("Base_oracle_noise_std = " + str(base_noise_std) + " PSL Model: " + model + " " + metric + " = " + str(np.mean(window_vals)) + " +- " + str(np.std(window_vals)))
            PSL += [np.mean(window_vals)]
            PSL_err += [np.std(window_vals)]
            
            print("Base_oracle_noise_std = " + str(base_noise_std) + " temporal AR " + metric + " = " + str(np.mean(cluster_ar_window_vals)) + " +- " + str(np.std(cluster_ar_window_vals)))
            ARPH += [np.mean(cluster_ar_window_vals)]
            ARPH_err += [np.std(cluster_ar_window_vals)]
            
            print("Base_oracle_noise_std = " + str(base_noise_std) + " pure AR " + metric + " = " + str(np.mean(pure_ar_window_vals)) + " +- " + str(np.std(pure_ar_window_vals)) + "\n\n")
            AR += [np.mean(pure_ar_window_vals)]
            AR_err += [np.std(pure_ar_window_vals)]
            
            print("PSL-ARH vs. Pure AR t-test p value: " + str(ttest_rel(window_vals, pure_ar_window_vals).pvalue) + "\n")
            print("PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: " + str(ttest_rel(window_vals, cluster_ar_window_vals).pvalue) + "\n")
    
    plt.title("Experiment 1 Temporal Oracle " + str(metric))
    plt.xlabel("Base Noise Scale")
    plt.ylabel(metric)
    z = plt.errorbar(x, PSL, label="PSL", color='r', yerr=PSL_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    z = plt.errorbar(x, ARPH, label="AR + Post-hoc", color='g', yerr=ARPH_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    z = plt.errorbar(x, AR, label="AR", color='b', yerr=AR_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    plt.xticks(x)
    plt.ylim([0.5, 1])
    plt.legend()
    #plt.show()
    
    plt.savefig("E1_Temporal_" + str(metric) + ".png")
    plt.clf()
    
    PSL = []
    ARPH = []
    AR = []

    PSL_err = []
    ARPH_err = []
    AR_err = []
    
print(os.getcwd())

  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "temporal_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "temporal_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(resu

Base_oracle_noise_std = 0 PSL Model: temporal_hard NRMSE = 0.6641570326082014 +- 0.038573628720160055
Base_oracle_noise_std = 0 temporal AR NRMSE = 0.5908137719622428 +- 0.03566182370836759
Base_oracle_noise_std = 0 pure AR NRMSE = 0.7089582447176949 +- 0.03738422285621533


PSL-ARH vs. Pure AR t-test p value: 1.6156350850863065e-09

PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: 1.0321786974246985e-13

Base_oracle_noise_std = 0.5 PSL Model: temporal_hard NRMSE = 0.6788800277843343 +- 0.03885317337123511
Base_oracle_noise_std = 0.5 temporal AR NRMSE = 0.6052922339006147 +- 0.03605410912218544
Base_oracle_noise_std = 0.5 pure AR NRMSE = 0.7258643355113141 +- 0.037859179302687


PSL-ARH vs. Pure AR t-test p value: 4.169807700558142e-10

PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: 1.4305299289987297e-13

Base_oracle_noise_std = 1 PSL Model: temporal_hard NRMSE = 0.7238206504775013 +- 0.04030382402692018
Base_oracle_noise_std = 1 temporal AR NRMSE = 0.6471236068799603 +-

<Figure size 640x480 with 0 Axes>

# E1: Vary base noise, cluster constraints

In [69]:
# E1: no base noise, vary cluster oracle noise
# Hard constraint, weighted constraints of 10

models = ["cw_hard"]
base_noise_stds = [0, 0.5, 1, 1.5, 2]

n_folds = 30

results_df = pd.DataFrame(columns=["base_noise_std", "clus_or_std", "temp_or_std", "Series_ID", "Forecast_Window", "Method", "NRMSE", "MASE"])

for base_noise_std in base_noise_stds:
    temp_or_std = 0
    clus_or_std = 0
    
    for model in models:
        print("Evaluating " + model + " with base noise " + str(base_noise_std) + " and cluster oracle noise std. " + str(clus_or_std))
        data_dir = "data/E1/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_" + str(temp_or_std) + "/window_size_4/eval"
        result_dir = "results/Online/E1/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_" + str(temp_or_std) + "/window_size_4/" + model + "/inferred-predicates"

        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "PSL_" + str(model), results_df, [base_noise_std, clus_or_std, temp_or_std])])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "cluster_AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True, cluster_baseline=True)])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True)])

Evaluating cw_hard with base noise 0 and cluster oracle noise std. 0
Evaluating cw_hard with base noise 0.5 and cluster oracle noise std. 0
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0
Evaluating cw_hard with base noise 1.5 and cluster oracle noise std. 0


  return 1/len(a) * np.sum(2 * np.abs(f-a) / (np.abs(a) + np.abs(f))*100)
  return 1/len(a) * np.sum(2 * np.abs(f-a) / (np.abs(a) + np.abs(f))*100)


Evaluating cw_hard with base noise 2 and cluster oracle noise std. 0


In [70]:
x = base_noise_stds
PSL = []
ARPH = []
AR = []

PSL_err = []
ARPH_err = []
AR_err = []

for metric in METRICS:
    for model in models:
        for base_noise_std in base_noise_stds:
            window_vals = []
            cluster_ar_window_vals = []
            pure_ar_window_vals = []

            for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
                window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
                cluster_ar_window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
                pure_ar_window_vals += [np.mean(group[metric].values)]

            print("Base_oracle_noise_std = " + str(base_noise_std) + " PSL Model: " + model + " " + metric + " = " + str(np.mean(window_vals)) + " +- " + str(np.std(window_vals)))
            PSL += [np.mean(window_vals)]
            PSL_err += [np.std(window_vals)]
            
            print("Base_oracle_noise_std = " + str(base_noise_std) + " cluster AR " + metric + " = " + str(np.mean(cluster_ar_window_vals)) + " +- " + str(np.std(cluster_ar_window_vals)))
            ARPH += [np.mean(cluster_ar_window_vals)]
            ARPH_err += [np.std(cluster_ar_window_vals)]
            
            print("Base_oracle_noise_std = " + str(base_noise_std) + " pure AR " + metric + " = " + str(np.mean(pure_ar_window_vals)) + " +- " + str(np.std(pure_ar_window_vals)) + "\n\n")
            AR += [np.mean(pure_ar_window_vals)]
            AR_err += [np.std(pure_ar_window_vals)]
            
            print("PSL-ARH vs. Pure AR t-test p value: " + str(ttest_rel(window_vals, pure_ar_window_vals).pvalue) + "\n")
            print("PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: " + str(ttest_rel(window_vals, cluster_ar_window_vals).pvalue) + "\n")
            
    plt.title("Experiment 1 Temporal Oracle " + str(metric))
    plt.xlabel("Base Noise Scale")
    plt.ylabel(metric)
    z = plt.errorbar(x, PSL, label="PSL", color='r', yerr=PSL_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    z = plt.errorbar(x, ARPH, label="AR + Post-hoc", color='g', yerr=ARPH_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    z = plt.errorbar(x, AR, label="AR", color='b', yerr=AR_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    plt.xticks(x)
    plt.ylim([0.5, 1])
    plt.legend()
    #plt.show()
    
    plt.savefig("E1_Cluster_" + str(metric) + ".png")
    plt.clf()
    
    PSL = []
    ARPH = []
    AR = []

    PSL_err = []
    ARPH_err = []
    AR_err = []

  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == 0)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(result

Base_oracle_noise_std = 0 PSL Model: cw_hard NRMSE = 0.6880742039756833 +- 0.04905698369907255
Base_oracle_noise_std = 0 cluster AR NRMSE = 0.623890598501928 +- 0.03559914241444374
Base_oracle_noise_std = 0 pure AR NRMSE = 0.7089582447176949 +- 0.03738422285621533


PSL-ARH vs. Pure AR t-test p value: 0.005524869291834617

PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: 5.13681251688751e-09

Base_oracle_noise_std = 0.5 PSL Model: cw_hard NRMSE = 0.7047341178493934 +- 0.04674072070860771
Base_oracle_noise_std = 0.5 cluster AR NRMSE = 0.6414198666804487 +- 0.03669291339155553
Base_oracle_noise_std = 0.5 pure AR NRMSE = 0.7258643355113141 +- 0.037859179302687


PSL-ARH vs. Pure AR t-test p value: 0.005899103773437645

PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: 3.5886797003748526e-09

Base_oracle_noise_std = 1 PSL Model: cw_hard NRMSE = 0.7462778880039049 +- 0.050724024269122175
Base_oracle_noise_std = 1 cluster AR NRMSE = 0.6913653098176129 +- 0.03754266549620747
Base_o

<Figure size 640x480 with 0 Axes>

# E2: Base Noise 1, Vary Cluster Noise

In [76]:
# E1: no base noise, vary cluster oracle noise
# Hard constraint, weighted constraints of 10

models = ["cw_hard"]
clus_or_stds = [0, 0.1, 0.2, 0.3, 0.4, 0.5]

n_folds = 30

results_df = pd.DataFrame(columns=["base_noise_std", "clus_or_std", "temp_or_std", "Series_ID", "Forecast_Window", "Method", "NRMSE", "MASE"])

for clus_or_std in clus_or_stds:
    temp_or_std = 0
    base_noise_std = 1
    
    for model in models:
        print("Evaluating " + model + " with base noise " + str(base_noise_std) + " and cluster oracle noise std. " + str(clus_or_std))
        data_dir = "data/E2_clus/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_" + str(temp_or_std) + "/window_size_4/eval"
        result_dir = "results/Online/E2_clus/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_" + str(temp_or_std) + "/window_size_4/" + model + "/inferred-predicates"

        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "PSL_" + str(model), results_df, [base_noise_std, clus_or_std, temp_or_std])])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "cluster_AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True, cluster_baseline=True)])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True)])

Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0.1
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0.2
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0.3
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0.4
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0.5


In [77]:
x = clus_or_stds
PSL = []
ARPH = []
AR = []

PSL_err = []
ARPH_err = []
AR_err = []

for metric in METRICS:
    for model in models:
        base_noise_std = 1
        
        for clus_or_std in clus_or_stds:
            window_vals = []
            cluster_ar_window_vals = []
            pure_ar_window_vals = []
            

            for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
                window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
                cluster_ar_window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
                pure_ar_window_vals += [np.mean(group[metric].values)]

            print("Base_oracle_noise_std = " + str(base_noise_std) + " PSL Model: " + model + " " + metric + " = " + str(np.mean(window_vals)) + " +- " + str(np.std(window_vals)))
            PSL += [np.mean(window_vals)]
            PSL_err += [np.std(window_vals)]
            
            print("Base_oracle_noise_std = " + str(base_noise_std) + " cluster AR " + metric + " = " + str(np.mean(cluster_ar_window_vals)) + " +- " + str(np.std(cluster_ar_window_vals)))
            ARPH += [np.mean(cluster_ar_window_vals)]
            ARPH_err += [np.std(cluster_ar_window_vals)]
            
            print("Base_oracle_noise_std = " + str(base_noise_std) + " pure AR " + metric + " = " + str(np.mean(pure_ar_window_vals)) + " +- " + str(np.std(pure_ar_window_vals)) + "\n\n")
            AR += [np.mean(pure_ar_window_vals)]
            AR_err += [np.std(pure_ar_window_vals)]
            
            print("PSL-ARH vs. Pure AR t-test p value: " + str(ttest_rel(window_vals, pure_ar_window_vals).pvalue) + "\n")
            print("PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: " + str(ttest_rel(window_vals, cluster_ar_window_vals).pvalue) + "\n")
            
            
    print(PSL)
    plt.title("Experiment 2 Cluster Oracle " + str(metric))
    plt.xlabel("Oracle Noise Scale")
    plt.ylabel(metric)
    z = plt.errorbar(x, PSL, label="PSL", color='r', yerr=PSL_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    z = plt.errorbar(x, ARPH, label="AR + Post-hoc", color='g', yerr=ARPH_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    z = plt.errorbar(x, AR, label="AR", color='b', yerr=AR_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    plt.xticks(x)
    plt.ylim([0.5, 1])
    plt.legend()
    #plt.show()
    
    plt.savefig("E2_Cluster_" + str(metric) + ".png")
    plt.clf()
    
    PSL = []
    ARPH = []
    AR = []

    PSL_err = []
    ARPH_err = []
    AR_err = []

  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_

Base_oracle_noise_std = 1 PSL Model: cw_hard NRMSE = 0.7456534542174588 +- 0.049286635714301856
Base_oracle_noise_std = 1 cluster AR NRMSE = 0.6913653098176129 +- 0.03754266549620747
Base_oracle_noise_std = 1 pure AR NRMSE = 0.775530164477302 +- 0.03898970228464024


PSL-ARH vs. Pure AR t-test p value: 0.0004586414189684209

PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: 1.3164429828351218e-07

Base_oracle_noise_std = 1 PSL Model: cw_hard NRMSE = 0.7521427751340125 +- 0.054910161967507115
Base_oracle_noise_std = 1 cluster AR NRMSE = 0.6941336102584166 +- 0.03720561078013178
Base_oracle_noise_std = 1 pure AR NRMSE = 0.775530164477302 +- 0.03898970228464024


PSL-ARH vs. Pure AR t-test p value: 0.004312467549413195

PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: 8.22112078515971e-08

Base_oracle_noise_std = 1 PSL Model: cw_hard NRMSE = 0.7662937424109213 +- 0.06053009341917554
Base_oracle_noise_std = 1 cluster AR NRMSE = 0.7018564462203654 +- 0.036978504119764784
Base_ora

  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):


<Figure size 640x480 with 0 Axes>

# E2: Base Noise 1, Vary Temporal Noise

In [74]:
# E1: no base noise, vary cluster oracle noise
# Hard constraint, weighted constraints of 10

models = ["temporal_hard"]
temp_or_stds = [0, 0.1, 0.2, 0.3, 0.4, 0.5]

n_folds = 30

results_df = pd.DataFrame(columns=["base_noise_std", "clus_or_std", "temp_or_std", "Series_ID", "Forecast_Window", "Method", "NRMSE", "MASE"])

for temp_or_std in temp_or_stds:
    clus_or_std = 0
    base_noise_std = 1
    
    for model in models:
        print("Evaluating " + model + " with base noise " + str(base_noise_std) + " and temporal oracle noise std. " + str(temp_or_std))
        data_dir = "data/E2_temp/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_" + str(temp_or_std) + "/window_size_4/eval"
        result_dir = "results/Online/E2_temp/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_" + str(temp_or_std) + "/window_size_4/" + model + "/inferred-predicates"

        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "PSL_" + str(model), results_df, [base_noise_std, clus_or_std, temp_or_std])])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "temporal_AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True, temporal_baseline=True)])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True)])

Evaluating temporal_hard with base noise 1 and temporal oracle noise std. 0
Evaluating temporal_hard with base noise 1 and temporal oracle noise std. 0.1
Evaluating temporal_hard with base noise 1 and temporal oracle noise std. 0.2
Evaluating temporal_hard with base noise 1 and temporal oracle noise std. 0.3
Evaluating temporal_hard with base noise 1 and temporal oracle noise std. 0.4
Evaluating temporal_hard with base noise 1 and temporal oracle noise std. 0.5


In [75]:
x = temp_or_stds
PSL = []
ARPH = []
AR = []

PSL_err = []
ARPH_err = []
AR_err = []


for metric in METRICS:
    for model in models:
        base_noise_std = 1
        
        for temp_or_std in temp_or_stds:
            window_vals = []
            cluster_ar_window_vals = []
            pure_ar_window_vals = []

            for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecast_Window"]):
                window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "temporal_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecast_Window"]):
                cluster_ar_window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecast_Window"]):
                pure_ar_window_vals += [np.mean(group[metric].values)]

            print("Base_oracle_noise_std = " + str(base_noise_std) + " PSL Model: " + model + " " + metric + " = " + str(np.mean(window_vals)) + " +- " + str(np.std(window_vals)))
            PSL += [np.mean(window_vals)]
            PSL_err += [np.std(window_vals)]
            
            print("Base_oracle_noise_std = " + str(base_noise_std) + " cluster AR " + metric + " = " + str(np.mean(cluster_ar_window_vals)) + " +- " + str(np.std(cluster_ar_window_vals)))
            ARPH += [np.mean(cluster_ar_window_vals)]
            ARPH_err += [np.std(cluster_ar_window_vals)]
            
            print("Base_oracle_noise_std = " + str(base_noise_std) + " pure AR " + metric + " = " + str(np.mean(pure_ar_window_vals)) + " +- " + str(np.std(pure_ar_window_vals)) + "\n\n")
            AR += [np.mean(pure_ar_window_vals)]
            AR_err += [np.std(pure_ar_window_vals)]
            
            
            print("PSL-ARH vs. Pure AR t-test p value: " + str(ttest_rel(window_vals, pure_ar_window_vals).pvalue) + "\n")
            print("PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: " + str(ttest_rel(window_vals, cluster_ar_window_vals).pvalue) + "\n")
            
    plt.title("Experiment 2 Temporal Oracle " + str(metric))
    plt.xlabel("Oracle Noise Scale")
    plt.ylabel(metric)
    z = plt.errorbar(x, PSL, label="PSL", color='r', yerr=PSL_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    z = plt.errorbar(x, ARPH, label="AR + Post-hoc", color='g', yerr=ARPH_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    z = plt.errorbar(x, AR, label="AR", color='b', yerr=AR_err, capsize=4, fmt="--", marker=".", markersize=10)
    #[-1][0].set_linestyle(':')
    
    plt.xticks(x)
    plt.ylim([0.5, 1])
    plt.legend()
    #plt.show()
    
    plt.savefig("E2_Temporal_" + str(metric) + ".png")
    plt.clf()
    
    PSL = []
    ARPH = []
    AR = []

    PSL_err = []
    ARPH_err = []
    AR_err = []

  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "temporal_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "temporal_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecas

Base_oracle_noise_std = 1 PSL Model: temporal_hard NRMSE = 0.7241415203140128 +- 0.04034521841297586
Base_oracle_noise_std = 1 cluster AR NRMSE = 0.6471236068799603 +- 0.0363260654684243
Base_oracle_noise_std = 1 pure AR NRMSE = 0.775530164477302 +- 0.03898970228464024


PSL-ARH vs. Pure AR t-test p value: 2.0103192061090427e-11

PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: 8.74935592113119e-14

Base_oracle_noise_std = 1 PSL Model: temporal_hard NRMSE = 0.7284212653687316 +- 0.04103211708101286
Base_oracle_noise_std = 1 cluster AR NRMSE = 0.6517172283844782 +- 0.0363701433385848
Base_oracle_noise_std = 1 pure AR NRMSE = 0.775530164477302 +- 0.03898970228464024


PSL-ARH vs. Pure AR t-test p value: 1.8979598443960275e-10

PSL-ARH vs. AR w/ post-hoc adjustments t-test p value: 1.5277415680102771e-13

Base_oracle_noise_std = 1 PSL Model: temporal_hard NRMSE = 0.7406092852766828 +- 0.04106698383495202
Base_oracle_noise_std = 1 cluster AR NRMSE = 0.6626794992036475 +- 0.0365475390

  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "temporal_AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == base_noise_std) & (results_df["temp_or_std"] == temp_or_std)].groupby(by=["Forecast_Window"]):


<Figure size 640x480 with 0 Axes>

In [73]:
models = ["cw_hard"]
for metric in METRICS:
    for model in models:
        for temp_or_std in temp_or_stds:
            window_vals = []
            cluster_ar_window_vals = []
            pure_ar_window_vals = []

            for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["temp_or_std"] == temp_or_std) & (results_df["clus_or_std"] == 1)].groupby(by=["Forecast_Window"]):
                window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["temp_or_std"] == temp_or_std) & (results_df["clus_or_std"] == 1)].groupby(by=["Forecast_Window"]):
                cluster_ar_window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "AR") & (results_df["temp_or_std"] == temp_or_std) & (results_df["clus_or_std"] == 1)].groupby(by=["Forecast_Window"]):
                pure_ar_window_vals += [np.mean(group[metric].values)]

            print("Temporal_oracle_noise_std = " + str(temp_or_std) + " PSL Model: " + model + " " + metric + " = " + str(np.mean(window_vals)) + " +- " + str(np.std(window_vals)))
            print("Temporal_oracle_noise_std = " + str(temp_or_std) + " temporal AR " + metric + " = " + str(np.mean(cluster_ar_window_vals)) + " +- " + str(np.std(cluster_ar_window_vals)))
            print("Temporal_oracle_noise_std = " + str(temp_or_std) + " pure AR " + metric + " = " + str(np.mean(pure_ar_window_vals)) + " +- " + str(np.std(pure_ar_window_vals)) + "\n\n")

Temporal_oracle_noise_std = 0 PSL Model: cw_hard NRMSE = nan +- nan
Temporal_oracle_noise_std = 0 temporal AR NRMSE = nan +- nan
Temporal_oracle_noise_std = 0 pure AR NRMSE = nan +- nan


Temporal_oracle_noise_std = 0.1 PSL Model: cw_hard NRMSE = nan +- nan
Temporal_oracle_noise_std = 0.1 temporal AR NRMSE = nan +- nan
Temporal_oracle_noise_std = 0.1 pure AR NRMSE = nan +- nan


Temporal_oracle_noise_std = 0.2 PSL Model: cw_hard NRMSE = nan +- nan
Temporal_oracle_noise_std = 0.2 temporal AR NRMSE = nan +- nan
Temporal_oracle_noise_std = 0.2 pure AR NRMSE = nan +- nan


Temporal_oracle_noise_std = 0.3 PSL Model: cw_hard NRMSE = nan +- nan
Temporal_oracle_noise_std = 0.3 temporal AR NRMSE = nan +- nan
Temporal_oracle_noise_std = 0.3 pure AR NRMSE = nan +- nan


Temporal_oracle_noise_std = 0.4 PSL Model: cw_hard NRMSE = nan +- nan
Temporal_oracle_noise_std = 0.4 temporal AR NRMSE = nan +- nan
Temporal_oracle_noise_std = 0.4 pure AR NRMSE = nan +- nan


Temporal_oracle_noise_std = 0.5 PSL 

  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["temp_or_std"] == temp_or_std) & (results_df["clus_or_std"] == 1)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["temp_or_std"] == temp_or_std) & (results_df["clus_or_std"] == 1)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["temp_or_std"] == temp_or_std) & (results_df["clus_or_std"] == 1)].groupby(by=["Forecast_Window"]):
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)
  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["temp_or_std"] == temp_or_std) & (results_df["clus_or_std"] == 1)].groupby(by=["Forecast_Window"]

In [17]:
# E1: no base noise, vary cluster oracle noise
# Hard constraint, weighted constraints of 10
models = ["cw_hard"]
base_noise_stds = [1]
clus_or_stds = [0, 0.25, 0.5, 0.75, 1]

n_folds = 30

results_df = pd.DataFrame(columns=["base_noise_std", "clus_or_std", "temp_or_std", "Series_ID", "Forecast_Window", "Method", "NRMSE", "MASE"])

for base_noise_std in base_noise_stds:
    for clus_or_std in clus_or_stds:
        temp_or_std = 0

        for model in models:
            print("Evaluating " + model + " with base noise " + str(base_noise_std) + " and cluster oracle noise std. " + str(clus_or_std))

            data_dir = "data/E3_clus/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_" + str(temp_or_std) + "/window_size_4/eval"
            result_dir = "results/Online/E3_clus/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_" + str(temp_or_std) + "/window_size_4/" + model + "/inferred-predicates"

            print(data_dir)
            print(os.getcwd())
            
            results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "PSL_" + str(model), results_df, [base_noise_std, clus_or_std, temp_or_std])])
            results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "cluster_AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True, cluster_baseline=True)])
            results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True)])

Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0
data/E3_clus/base_noise_1/clus_or_variance_0/cross_cov_0/temp_or_variance_0/window_size_4/eval
/Users/almumill/dev/SyntheticTimeSeries
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0.25
data/E3_clus/base_noise_1/clus_or_variance_0.25/cross_cov_0/temp_or_variance_0/window_size_4/eval
/Users/almumill/dev/SyntheticTimeSeries
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0.5
data/E3_clus/base_noise_1/clus_or_variance_0.5/cross_cov_0/temp_or_variance_0/window_size_4/eval
/Users/almumill/dev/SyntheticTimeSeries
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0.75
data/E3_clus/base_noise_1/clus_or_variance_0.75/cross_cov_0/temp_or_variance_0/window_size_4/eval
/Users/almumill/dev/SyntheticTimeSeries
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 1
data/E3_clus/base_noise_1/clus_or_variance_1/cross_cov_0/temp_or_variance_0/window_size_4/eval
/Use

In [20]:
for metric in METRICS:
    for model in models:
        for clus_or_std in clus_or_stds:
            window_vals = []
            cluster_ar_window_vals = []
            pure_ar_window_vals = []

            for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
                window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
                cluster_ar_window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
                pure_ar_window_vals += [np.mean(group[metric].values)]

            print("Base_oracle_noise_std = " + str(base_noise_std) + " Cluster_noise_std = " + str(clus_or_std) + " PSL Model: " + model + " " + metric + " = " + str(np.mean(window_vals)) + " +- " + str(np.std(window_vals)))
            print("Base_oracle_noise_std = " + str(base_noise_std) + " Cluster_noise_std = " + str(clus_or_std) + " cluster AR " + metric + " = " + str(np.mean(cluster_ar_window_vals)) + " +- " + str(np.std(cluster_ar_window_vals)))
            print("Base_oracle_noise_std = " + str(base_noise_std) + " Cluster_noise_std = " + str(clus_or_std) + " pure AR " + metric + " = " + str(np.mean(pure_ar_window_vals)) + " +- " + str(np.std(pure_ar_window_vals)) + "\n\n")

  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] 

Base_oracle_noise_std = 1 Cluster_noise_std = 0 PSL Model: cw_hard NRMSE = 0.7475895595569013 +- 0.05043326880863453
Base_oracle_noise_std = 1 Cluster_noise_std = 0 cluster AR NRMSE = 0.6913653098176129 +- 0.03754266549620747
Base_oracle_noise_std = 1 Cluster_noise_std = 0 pure AR NRMSE = 0.775530164477302 +- 0.03898970228464024


Base_oracle_noise_std = 1 Cluster_noise_std = 0.25 PSL Model: cw_hard NRMSE = 0.7781854165560788 +- 0.06066557173169835
Base_oracle_noise_std = 1 Cluster_noise_std = 0.25 cluster AR NRMSE = 0.7074388673213502 +- 0.036883472694919824
Base_oracle_noise_std = 1 Cluster_noise_std = 0.25 pure AR NRMSE = 0.775530164477302 +- 0.03898970228464024


Base_oracle_noise_std = 1 Cluster_noise_std = 0.5 PSL Model: cw_hard NRMSE = 0.864369314505623 +- 0.06080258169327663
Base_oracle_noise_std = 1 Cluster_noise_std = 0.5 cluster AR NRMSE = 0.7505420321215813 +- 0.0373525929404853
Base_oracle_noise_std = 1 Cluster_noise_std = 0.5 pure AR NRMSE = 0.775530164477302 +- 0.0389897

  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):
  for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == clus_or_std)].groupby(by=["Forecast_Window"]):


In [47]:
# E1: no base noise, vary cluster oracle noise
# Hard constraint, weighted constraints of 10

models = ["cw_hard_meanprior0.1nsq"]
cluster_or_noise_stds = [0, 0.1, 0.2, 0.3, 0.4]

n_folds = 30

results_df = pd.DataFrame(columns=["base_noise_std", "clus_or_std", "temp_or_std", "Series_ID", "Forecast_Window", "Method", "NRMSE", "MASE"])

for clus_or_std in cluster_or_noise_stds:
    base_noise_std = 0
    temp_or_std = 0
    
    for model in models:
        print("Evaluating " + model + " with base noise " + str(base_noise_std) + " and cluster oracle noise std. " + str(clus_or_std))
        
        data_dir = "data/E1_nobasenoise/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_0/window_size_4/eval"
        result_dir = "results/Online/E1_nobasenoise/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_or_std) + "/cross_cov_0/temp_or_variance_0/window_size_4/" + model + "/inferred-predicates"

        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "PSL_" + str(model), results_df, [base_noise_std, clus_or_std, temp_or_std])])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "cluster_AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True, cluster_baseline=True)])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "AR", results_df, [base_noise_std, clus_or_std, temp_or_std], eval_baseline=True)])

Evaluating cw_hard_meanprior0.1nsq with base noise 0 and cluster oracle noise std. 0
Evaluating cw_hard_meanprior0.1nsq with base noise 0 and cluster oracle noise std. 0.1
Evaluating cw_hard_meanprior0.1nsq with base noise 0 and cluster oracle noise std. 0.2
Evaluating cw_hard_meanprior0.1nsq with base noise 0 and cluster oracle noise std. 0.3
Evaluating cw_hard_meanprior0.1nsq with base noise 0 and cluster oracle noise std. 0.4


In [10]:
results_df

Unnamed: 0,base_noise_std,clus_or_std,temp_or_std,Series_ID,Forecast_Window,Method,NRMSE,MASE
0,1,0,0,0,000,PSL_cw_hard,0.903602,0.645074
1,1,0,0,1,000,PSL_cw_hard,0.370476,0.172899
2,1,0,0,2,000,PSL_cw_hard,0.329546,0.219751
3,1,0,0,3,000,PSL_cw_hard,0.358692,0.219198
4,1,0,0,4,000,PSL_cw_hard,0.602820,0.465741
...,...,...,...,...,...,...,...,...
1795,1,1,0,55,029,cluster_AR,0.348639,0.221274
1796,1,1,0,56,029,cluster_AR,0.710280,0.439076
1797,1,1,0,57,029,cluster_AR,1.354515,1.143602
1798,1,1,0,58,029,cluster_AR,0.380426,1.008077


In [50]:
for metric in METRICS:
    for model in models:
        for std in cluster_or_noise_stds:
            window_vals = []
            cluster_ar_window_vals = []
            pure_ar_window_vals = []

            for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == 0) & (results_df["clus_or_std"] == std)].groupby(by=["Forecast_Window"]):
                window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == 0) & (results_df["clus_or_std"] == std)].groupby(by=["Forecast_Window"]):
                cluster_ar_window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == 0) & (results_df["clus_or_std"] == std)].groupby(by=["Forecast_Window"]):
                pure_ar_window_vals += [np.mean(group[metric].values)]

            print("Cluster_oracle_noise_std = " + str(std) + " PSL Model: " + model + " " + metric + " = " + str(np.mean(window_vals)) + " +- " + str(np.std(window_vals)))
            print("Cluster_oracle_noise_std = " + str(std) + " cluster AR " + metric + " = " + str(np.mean(cluster_ar_window_vals)) + " +- " + str(np.std(cluster_ar_window_vals)))
            print("Cluster_oracle_noise_std = " + str(std) + " pure AR " + metric + " = " + str(np.mean(pure_ar_window_vals)) + " +- " + str(np.std(pure_ar_window_vals)) + "\n\n")

Cluster_oracle_noise_std = 0 PSL Model: cw_hard_meanprior0.1nsq NRMSE = 0.6739318625048952 +- 0.04861992103649635
Cluster_oracle_noise_std = 0 cluster AR NRMSE = 0.6238906202656211 +- 0.035599140590143394
Cluster_oracle_noise_std = 0 pure AR NRMSE = 0.708958242999575 +- 0.03738425007845916


Cluster_oracle_noise_std = 0.1 PSL Model: cw_hard_meanprior0.1nsq NRMSE = 0.6798019140769225 +- 0.04704543991840487
Cluster_oracle_noise_std = 0.1 cluster AR NRMSE = 0.6265974250013367 +- 0.035482410336074445
Cluster_oracle_noise_std = 0.1 pure AR NRMSE = 0.708958242999575 +- 0.03738425007845916


Cluster_oracle_noise_std = 0.2 PSL Model: cw_hard_meanprior0.1nsq NRMSE = 0.6981651994453503 +- 0.04989878962268292
Cluster_oracle_noise_std = 0.2 cluster AR NRMSE = 0.6350245008512952 +- 0.03557058687062343
Cluster_oracle_noise_std = 0.2 pure AR NRMSE = 0.708958242999575 +- 0.03738425007845916


Cluster_oracle_noise_std = 0.3 PSL Model: cw_hard_meanprior0.1nsq NRMSE = 0.7223394765708175 +- 0.051295356713

In [None]:
for metric in METRICS:
    for model in models:
        for std in cluster_or_noise_stds:
            window_vals = []
            cluster_ar_window_vals = []
            pure_ar_window_vals = []

            for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == std)].groupby(by=["Forecast_Window"]):
                window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == std)].groupby(by=["Forecast_Window"]):
                cluster_ar_window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == std)].groupby(by=["Forecast_Window"]):
                pure_ar_window_vals += [np.mean(group[metric].values)]

            print("Cluster_oracle_noise_std = " + str(std) + " PSL Model: " + model + " " + metric + " = " + str(np.mean(window_vals)) + " +- " + str(np.std(window_vals)))
            print("Cluster_oracle_noise_std = " + str(std) + " cluster AR " + metric + " = " + str(np.mean(cluster_ar_window_vals)) + " +- " + str(np.std(cluster_ar_window_vals)))
            print("Cluster_oracle_noise_std = " + str(std) + " pure AR " + metric + " = " + str(np.mean(pure_ar_window_vals)) + " +- " + str(np.std(pure_ar_window_vals)) + "\n\n")

In [39]:
# E1: no base noise, vary cluster oracle noise
# Hard constraint, weighted constraints of 10

models = ["cw_hard"]
clus_noise_stds = [0, 1]

n_folds = 30

results_df = pd.DataFrame(columns=["base_noise_std", "clus_or_std", "temp_or_std", "Series_ID", "Forecast_Window", "Method", "NRMSE", "MASE"])

for clus_noise_std in clus_noise_stds:
    base_noise_std = 1
    temp_or_std = 0
    
    for model in models:
        print("Evaluating " + model + " with base noise " + str(base_noise_std) + " and cluster oracle noise std. " + str(clus_noise_std))
        
        data_dir = "data/E1_fixednoise/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_noise_std) + "/cross_cov_0/temp_or_variance_0/window_size_4/eval"
        result_dir = "results/Online/E1_fixednoise/base_noise_" + str(base_noise_std) + "/clus_or_variance_" + str(clus_noise_std) + "/cross_cov_0/temp_or_variance_0/window_size_4/" + model + "/inferred-predicates"

        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "PSL_" + str(model), results_df, [base_noise_std, clus_noise_std, temp_or_std])])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "cluster_AR", results_df, [base_noise_std, clus_noise_std, temp_or_std], eval_baseline=True, cluster_baseline=True)])
        results_df = pd.concat([results_df, eval_run(data_dir, result_dir, n_folds, "AR", results_df, [base_noise_std, clus_noise_std, temp_or_std], eval_baseline=True)])

Evaluating cw_hard with base noise 1 and cluster oracle noise std. 0
Evaluating cw_hard with base noise 1 and cluster oracle noise std. 1


In [40]:
for metric in METRICS:
    for model in models:
        for std in clus_noise_stds:
            window_vals = []
            cluster_ar_window_vals = []
            pure_ar_window_vals = []

            for name, group in results_df[(results_df["Method"] == "PSL_" + str(model)) & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == std)].groupby(by=["Forecast_Window"]):
                window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "cluster_AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == std)].groupby(by=["Forecast_Window"]):
                cluster_ar_window_vals += [np.mean(group[metric].values)]
            for name, group in results_df[(results_df["Method"] == "AR") & (results_df["base_noise_std"] == 1) & (results_df["clus_or_std"] == std)].groupby(by=["Forecast_Window"]):
                pure_ar_window_vals += [np.mean(group[metric].values)]

            print("Cluster_oracle_noise_std = " + str(std) + " PSL Model: " + model + " " + metric + " = " + str(np.mean(window_vals)) + " +- " + str(np.std(window_vals)))
            print("Cluster_oracle_noise_std = " + str(std) + " cluster AR " + metric + " = " + str(np.mean(cluster_ar_window_vals)) + " +- " + str(np.std(cluster_ar_window_vals)))
            print("Cluster_oracle_noise_std = " + str(std) + " pure AR " + metric + " = " + str(np.mean(pure_ar_window_vals)) + " +- " + str(np.std(pure_ar_window_vals)) + "\n\n")

Cluster_oracle_noise_std = 0 PSL Model: cw_hard NRMSE = 0.7441394223649035 +- 0.050528099071055814
Cluster_oracle_noise_std = 0 cluster AR NRMSE = 0.6913653097727207 +- 0.03754266559021357
Cluster_oracle_noise_std = 0 pure AR NRMSE = 0.775530164477302 +- 0.03898970228464027


Cluster_oracle_noise_std = 1 PSL Model: cw_hard NRMSE = 1.1028541010243926 +- 0.0872797098367929
Cluster_oracle_noise_std = 1 cluster AR NRMSE = 0.896661099251259 +- 0.04959188168417733
Cluster_oracle_noise_std = 1 pure AR NRMSE = 0.775530164477302 +- 0.03898970228464027


Cluster_oracle_noise_std = 0 PSL Model: cw_hard SMAPE = 12.076525915635838 +- 1.055149250883876
Cluster_oracle_noise_std = 0 cluster AR SMAPE = 11.250227850281815 +- 0.7339511019130658
Cluster_oracle_noise_std = 0 pure AR SMAPE = 12.859785414203579 +- 0.7945508565800287


Cluster_oracle_noise_std = 1 PSL Model: cw_hard SMAPE = 18.17590128599387 +- 1.3786686551975773
Cluster_oracle_noise_std = 1 cluster AR SMAPE = 14.203375770134093 +- 0.78244908