In [3]:
import pandas as pd
import sys, os
sys.path.append(os.path.abspath("../data_creation/plots_and_analysis"))
import config
# adjust paths in /data_creation/plots_and_analysis/config.py

We did forecasts on 7 * 3 * 5 = 105 datasets (6 targets + 1 source, 3 OSDs and 5 runs). Here, we average the performance metrics, similarities and features to evaluate the averaged results over div steps (=OSDs) and runs.

In [4]:
point_metrics = ["NRMSE", "MASE", "sMAPE"]
all_metrics = []

for run in ["case1_run1", "case1_run2", "case1_run3", "case1_run4", "case1_run5"]:
    path_to_forecasts = f"{config.path_to_data}/{run}/outputs/forecasts/"
    for div_step in [10,5,0]: 
        mets = pd.read_parquet(path_to_forecasts + "evaluation/metrics_div_step" + str(div_step) + ".parquet")
        mets['div_step'] = div_step
        mets['run'] = run
        all_metrics.append(mets)
        all_metrics_df = pd.concat(all_metrics, ignore_index=True)
all_metrics_df.drop(columns=["intermittency", "erraticness"], inplace=True)
all_metrics_df.to_csv(f"{config.path_to_evaluation}/results.csv")
all_metrics_df.to_parquet(f"{config.path_to_evaluation}/results.parquet")
all_metrics_df

Unnamed: 0,RMSSE,MASE,sMAPE,pot target size,similarity,div_step,run
0,0.478,0.714,0.304,5000,7.05,10,case1_run1
1,0.413,0.595,0.310,12000,7.22,10,case1_run1
2,0.386,0.568,0.324,25000,8.06,10,case1_run1
3,0.404,0.605,0.328,50000,8.26,10,case1_run1
4,0.602,0.807,0.391,100000,9.36,10,case1_run1
...,...,...,...,...,...,...,...
100,0.331,0.574,0.334,25000,8.06,0,case1_run5
101,0.337,0.578,0.330,50000,8.24,0,case1_run5
102,0.599,0.814,0.394,100000,9.31,0,case1_run5
103,0.504,0.743,0.374,all,9.71,0,case1_run5


In [5]:
# build the mean and the sd over runs and div steps
all_metrics_df_agg = all_metrics_df.copy()
custom_order = ["5000", "12000", "25000", "50000", "100000", "all", "source"]
all_metrics_df_agg.drop(columns=["div_step"], inplace=True)

all_metrics_df_agg['pot target size'] = pd.Categorical(all_metrics_df_agg['pot target size'], categories=custom_order, ordered=True)
agg_funcs = {
    'RMSSE': ['mean', 'std'],
    'MASE': ['mean', 'std'],
    'sMAPE': ['mean', 'std'],
    'similarity': ['mean', 'std'],
}

result = all_metrics_df_agg.groupby("pot target size").agg(agg_funcs)

formatted_result = result.apply(
    lambda x: [
        f"{x[col][0]:.2f} ({x[col][1]:.2f})" if col in ['similarity'] else f"{x[col][0]:.2f} ({x[col][1]:.2f})"
        for col in result.columns.levels[0]
    ], axis=1
)

formatted_df = pd.DataFrame(formatted_result.tolist(), columns=result.columns.levels[0], index=result.index)

formatted_df = formatted_df.sort_index()
formatted_df.reset_index(inplace=True)
formatted_df["test set"] = ["target", "target", "target", "target", "target", "target", "source"]
formatted_df = formatted_df[["test set", "pot target size", "similarity", "RMSSE", "MASE", "sMAPE"]]

print(formatted_df)
formatted_df.to_csv(f"{config.path_to_evaluation}/results_averaged.csv", index=False)

  test set pot target size   similarity        RMSSE         MASE        sMAPE
0   target            5000  7.04 (0.01)  0.47 (0.03)  0.73 (0.01)  0.31 (0.00)
1   target           12000  7.24 (0.02)  0.39 (0.04)  0.61 (0.02)  0.31 (0.00)
2   target           25000  8.08 (0.02)  0.37 (0.04)  0.58 (0.02)  0.33 (0.01)
3   target           50000  8.25 (0.03)  0.38 (0.04)  0.60 (0.01)  0.33 (0.00)
4   target          100000  9.34 (0.03)  0.61 (0.02)  0.82 (0.01)  0.39 (0.01)
5   target             all  9.73 (0.04)  0.54 (0.03)  0.75 (0.01)  0.38 (0.00)
6   source          source    nan (nan)  0.55 (0.02)  0.76 (0.01)  0.38 (0.00)


In [6]:
# aggregate results per div step over runs

point_metrics = ["NRMSE", "MASE", "sMAPE"]

for div_step in [10,5,0]:
    all_metrics = []
    for run in ["case1_run1", "case1_run2", "case1_run3", "case1_run4", "case1_run5"]:
        path_to_forecasts = f"{config.path_to_data}/{run}/outputs/forecasts/" #adjust
        mets = pd.read_parquet(path_to_forecasts + "evaluation/metrics_div_step" + str(div_step) + ".parquet")
        mets['div_step'] = div_step
        mets['run'] = run
        all_metrics.append(mets)
       
    all_metrics_df_div = pd.concat(all_metrics, ignore_index=True)
    custom_order = ["5000", "12000", "25000", "50000", "100000", "all", "source"]
    all_metrics_df_div.drop(columns=["div_step"], inplace=True)

    all_metrics_df_div['pot target size'] = pd.Categorical(all_metrics_df_div['pot target size'], categories=custom_order, ordered=True)
    # build the mean and the sd over runs and div steps
    agg_funcs = {
    'RMSSE': ['mean', 'std'],
    'MASE': ['mean', 'std'],
    'sMAPE': ['mean', 'std'],
    'similarity': ['mean', 'std'],
}

    result = all_metrics_df_div.groupby("pot target size").agg(agg_funcs)

    formatted_result = result.apply(
    lambda x: [
        f"{x[col][0]:.2f} ({x[col][1]:.2f})" if col in ['similarity'] else f"{x[col][0]:.2f} ({x[col][1]:.2f})"
        for col in result.columns.levels[0]
    ], axis=1
)

    formatted_df = pd.DataFrame(formatted_result.tolist(), columns=result.columns.levels[0], index=result.index)

    # Sort by 'pot target size'
    formatted_df = formatted_df.sort_index()
    formatted_df.reset_index(inplace=True)
    formatted_df["test set"] = ["target", "target", "target", "target", "target", "target", "source"]
    formatted_df = formatted_df[["test set", "pot target size", "similarity", "RMSSE", "MASE", "sMAPE"]]
    formatted_df.to_csv(f"{config.path_to_evaluation}/results_div_step{div_step}.csv")
    print(f"div_step {div_step}", formatted_df)

div_step 10   test set pot target size   similarity        RMSSE         MASE        sMAPE
0   target            5000  7.04 (0.01)  0.48 (0.05)  0.74 (0.02)  0.31 (0.00)
1   target           12000  7.24 (0.01)  0.41 (0.05)  0.62 (0.02)  0.31 (0.00)
2   target           25000  8.09 (0.02)  0.38 (0.05)  0.59 (0.02)  0.33 (0.01)
3   target           50000  8.26 (0.02)  0.39 (0.05)  0.60 (0.01)  0.34 (0.00)
4   target          100000  9.33 (0.03)  0.61 (0.02)  0.82 (0.01)  0.40 (0.01)
5   target             all  9.73 (0.05)  0.54 (0.03)  0.75 (0.01)  0.37 (0.00)
6   source          source    nan (nan)  0.56 (0.03)  0.77 (0.00)  0.38 (0.00)
div_step 5   test set pot target size   similarity        RMSSE         MASE        sMAPE
0   target            5000  7.04 (0.00)  0.46 (0.01)  0.73 (0.01)  0.31 (0.00)
1   target           12000  7.24 (0.02)  0.38 (0.01)  0.61 (0.00)  0.31 (0.00)
2   target           25000  8.08 (0.03)  0.36 (0.03)  0.58 (0.02)  0.33 (0.00)
3   target           50000  8