In [1]:
import ast
from pathlib import Path

import numpy as np
import pandas as pd

In [2]:
%%capture

# Make sure to add project root directory to PYTHONPATH
# export PYTHONPATH="${PYTHONPATH}:${pwd}"

# Change directory to project root directory

%cd ..

In [3]:
results = []

for algo in [
    "lower_bound",
    "upper_bound",
    "filter_based",
    "random_search",
    "genetic_algorithm",
    "vanilla_woa",
    "balanced_woa",
]:
    df = pd.read_csv(f"data/results/merged/{algo}.csv", index_col=0)
    if algo in ["random_search", "vanilla_woa", "balanced_woa"]:
        expanded_hparams = pd.json_normalize(df["fs_method_kwargs"].apply(ast.literal_eval))
        df = pd.concat([df, expanded_hparams], axis=1)
        df = df.drop(columns=["fs_method_kwargs"])

        df = df.loc[
            df.groupby(["vm", "extractor", "target_col"])["score"].idxmin()
        ].reset_index(drop=True)

    for extractor in [
        "catch22",
        "tsfel"
    ]:
        for metric in [
            "time",
            "n_selected_features",
            "energy_consumed [kWh]"
        ]:
            if algo == "random_search":
                subset = df[(df["extractor"] == extractor) & (df["num_samples"] == 1500)][metric]
            else:
                subset = df[df["extractor"] == extractor][metric]

            mean = subset.mean()
            std = subset.std()
            iqr = subset.quantile(0.75) - subset.quantile(0.25)

            if metric == "n_selected_features":
                total_features = 28 if extractor == "catch22" else 117
                mean = round(mean / total_features, 2)
                std = round(std / total_features, 2)
                iqr = round(iqr / total_features, 2)

            results.append({
                "algo": algo,
                "extractor": extractor,
                "metric": metric,
                "mean": mean,
                "std": std,
                "iqr": iqr
            })

results_df = pd.DataFrame(results)

In [4]:
pivot_df = results_df.pivot_table(
    index=["algo", "extractor"],
    columns="metric",
    values=["mean", "std", "iqr"]
)

pivot_df.columns = [f"{metric}_{stat}" for stat, metric in pivot_df.columns]
pivot_df = pivot_df.reset_index()

pivot_df = pivot_df[["algo", "extractor"] + sorted(pivot_df.columns[2:])]

In [5]:
pivot_df[pivot_df["extractor"] == "catch22"]

Unnamed: 0,algo,extractor,energy_consumed [kWh]_iqr,energy_consumed [kWh]_mean,energy_consumed [kWh]_std,n_selected_features_iqr,n_selected_features_mean,n_selected_features_std,time_iqr,time_mean,time_std
0,balanced_woa,catch22,0.003644851,0.003266,0.002821477,0.21,0.29,0.19,130.710693,117.111656,101.181268
2,filter_based,catch22,1.404799e-06,6e-06,1.224881e-06,0.34,0.44,0.24,0.050247,0.199865,0.04392
4,genetic_algorithm,catch22,0.002850921,0.006004,0.001564418,0.15,0.34,0.1,226.1076,476.248218,124.082095
6,lower_bound,catch22,2.650108e-07,3e-06,2.85953e-07,0.0,0.04,0.0,0.009538,0.109169,0.010268
8,random_search,catch22,0.0004020072,0.00903,0.001328793,0.22,0.35,0.22,14.411663,323.815378,47.650761
10,upper_bound,catch22,3.957163e-06,8e-06,2.572335e-06,0.0,1.0,0.0,0.14194,0.273758,0.092264
12,vanilla_woa,catch22,0.004978096,0.00406,0.003475566,0.18,0.6,0.12,178.522127,145.590064,124.63862


In [6]:
pivot_df[pivot_df["extractor"] == "tsfel"]

Unnamed: 0,algo,extractor,energy_consumed [kWh]_iqr,energy_consumed [kWh]_mean,energy_consumed [kWh]_std,n_selected_features_iqr,n_selected_features_mean,n_selected_features_std,time_iqr,time_mean,time_std
1,balanced_woa,tsfel,0.01111618,0.008917,0.009399463,0.13,0.19,0.14,398.636911,319.789106,337.074636
3,filter_based,tsfel,1.48577e-05,2.2e-05,8.649935e-06,0.16,0.64,0.14,0.53286,0.781794,0.310206
5,genetic_algorithm,tsfel,0.01534819,0.018776,0.008542238,0.07,0.44,0.05,1217.324662,1489.317563,677.528538
7,lower_bound,tsfel,1.303529e-07,3e-06,2.850074e-07,0.0,0.01,0.0,0.004673,0.108745,0.010218
9,random_search,tsfel,0.01691663,0.024863,0.01156234,0.05,0.09,0.1,606.646029,891.622732,414.635563
11,upper_bound,tsfel,2.493854e-05,2.8e-05,1.486069e-05,0.0,1.0,0.0,0.894387,1.001116,0.532919
13,vanilla_woa,tsfel,0.01835246,0.014453,0.0150608,0.11,0.65,0.1,658.144869,518.316126,540.102606


In [7]:
dfs = [
    pd.read_csv(f)
    for f in Path("data/results/merged").glob("*.csv")
    if not f.name.endswith(("codecarbon.csv", "results.csv"))
]
dfs = pd.concat(dfs, axis=0).reset_index(drop=True).drop(columns="Unnamed: 0")

expanded_hparams = pd.json_normalize(dfs["fs_method_kwargs"].apply(ast.literal_eval))
dfs = pd.concat([dfs, expanded_hparams], axis=1).drop(columns=["fs_method_kwargs"])

dfs = dfs.loc[
    dfs.groupby(["vm", "extractor", "target_col", "fs_method"])["score"].idxmin()
]

min_max = (
    dfs.groupby(["extractor", "vm", "target_col"])["score"]
    .agg(["min", "max"])
    .reset_index()
)
dfs = dfs.merge(min_max, on=["extractor", "vm", "target_col"], how="left")
dfs["normalized_score"] = (dfs["score"] - dfs["min"]) / (dfs["max"] - dfs["min"])
dfs["normalized_score"] = dfs["normalized_score"].fillna(0.0)

results = []

algo_lut = {
    "LowerBound": "lower_bound",
    "UpperBound": "upper_bound",
    "tsfresh_select_features": "filter_based",
    "GA": "genetic_algorithm",
    "RS": "random_search",
    "VanillaWOA": "vanilla_woa",
    "BalancedWOA": "balanced_woa",
}

for method, algo in algo_lut.items():
    for extractor in ["catch22", "tsfel"]:
        norm_score = dfs.loc[(dfs.fs_method == method) & (dfs.extractor == extractor)][
            "normalized_score"
        ]
        score = dfs.loc[(dfs.fs_method == method) & (dfs.extractor == extractor)][
            "score"
        ]
        nscore_mean = norm_score.mean()
        nscore_std = norm_score.std()
        nscore_iqr = norm_score.quantile(0.75) - norm_score.quantile(0.25)
        score_mean = score.mean()
        score_std = score.std()
        score_iqr = score.quantile(0.75) - score.quantile(0.25)
        results.append(
            {
                "algo": algo,
                "extractor": extractor,
                "metric": "norm_score",
                "mean": nscore_mean,
                "std": nscore_std,
                "iqr": nscore_iqr,
            }
        )
        results.append(
            {
                "algo": algo,
                "extractor": extractor,
                "metric": "score",
                "mean": score_mean,
                "std": score_std,
                "iqr": score_iqr,
            }
        )

In [8]:
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(["extractor", "mean"])

In [9]:
results_df[results_df["extractor"] == "catch22"]

Unnamed: 0,algo,extractor,metric,mean,std,iqr
12,genetic_algorithm,catch22,norm_score,0.009389,0.027243,0.0
24,balanced_woa,catch22,norm_score,0.044476,0.059166,0.055814
16,random_search,catch22,norm_score,0.116886,0.110202,0.190736
20,vanilla_woa,catch22,norm_score,0.237704,0.196424,0.256486
0,lower_bound,catch22,norm_score,0.719012,0.379334,0.62366
4,upper_bound,catch22,norm_score,0.739391,0.351351,0.609439
8,filter_based,catch22,norm_score,0.755593,0.324505,0.288502
13,genetic_algorithm,catch22,score,3.079503,2.271114,3.078494
25,balanced_woa,catch22,score,3.112106,2.303022,3.114903
17,random_search,catch22,score,3.175126,2.381149,3.093682


In [10]:
results_df[results_df["extractor"] == "tsfel"]

Unnamed: 0,algo,extractor,metric,mean,std,iqr
26,balanced_woa,tsfel,norm_score,0.05098,0.077272,0.069546
18,random_search,tsfel,norm_score,0.079939,0.111009,0.114351
14,genetic_algorithm,tsfel,norm_score,0.118868,0.191104,0.14484
22,vanilla_woa,tsfel,norm_score,0.407959,0.27807,0.482663
2,lower_bound,tsfel,norm_score,0.70727,0.418728,0.473271
6,upper_bound,tsfel,norm_score,0.725665,0.330697,0.444496
10,filter_based,tsfel,norm_score,0.765008,0.330239,0.437804
27,balanced_woa,tsfel,score,3.220646,2.393237,3.123852
19,random_search,tsfel,score,3.265476,2.451248,3.103814
15,genetic_algorithm,tsfel,score,3.2685,2.435965,3.157233
