In [28]:
import os
import pickle
from pathlib import Path
from typing import Generator

import numpy as np
import pandas as pd
import scikit_posthocs as sp
from IPython.display import HTML, display
from scipy import stats
from yaml import safe_load
from jmetal.lab.statistical_test.functions import (
    friedman_aligned_rank_test,
    friedman_aligned_ph_test,
)


pd.set_option("display.max_columns", 100)


def friedmann_nemenyi_test(
    unravelled_detailed_results: dict[str : dict[tuple[str, str], list[float]]],
) -> None:
    for metric in unravelled_detailed_results.keys():
        metric_df = pd.DataFrame(unravelled_detailed_results[metric])
        metric_vals = [metric_df[col].values for col in metric_df.columns]
        out = stats.friedmanchisquare(*metric_vals)
        print(
            f"Friedman Test {metric}: statistic={out.statistic:.3f}, pvalue={out.pvalue:.3f}"
        )
        metric_vals = np.array(metric_vals).T
        out = sp.posthoc_nemenyi_friedman(metric_vals)
        print("Nemenyi post-hoc test")
        feature_names = metric_df.columns
        out.index = feature_names
        out.columns = feature_names
        out = out.sort_index(axis=0).sort_index(axis=1)
        display(out)


def aligned_friedmann_holm_test(
    unravelled_detailed_results: dict[str : dict[tuple[str, str], list[float]]],
) -> None:
    for metric in unravelled_detailed_results.keys():
        metric_df = pd.DataFrame(unravelled_detailed_results[metric])
        # metric_vals = np.asarray([metric_df[col].values for col in metric_df.columns])
        metric_vals = metric_df.values

        out = friedman_aligned_rank_test(metric_vals)
        chi2_stat = out.loc['Aligned Rank stat'].iloc[0]
        p_value = out.loc['p-value'].iloc[0]
        print(f"Aligned-rank Friedman χ² {metric} = {chi2_stat:.3f}, p = {p_value:.3f}")

        z_vals, out, _ = friedman_aligned_ph_test(
            metric_vals, apv_procedure="Holm"  # Holm step-down correction
        )
        print("Holm post-hoc test")
        feature_names = metric_df.columns
        out.index = feature_names
        out.columns = feature_names
        out = out.sort_index(axis=0).sort_index(axis=1)
        display(out)


def present_results(
    paths: Generator,
    val_method: str = "lopo",
    remove_xgboost: bool = False,
    remove_chronos_small_from_test: bool = False,
    which_test: str = "friedmann-nemenyi",
    test_args: dict = {},
) -> None:
    results = []
    for reports_path in paths:
        report = pd.read_csv(reports_path, index_col=0)
        conf = safe_load(open(reports_path.parent / ".hydra/config.yaml"))
        if conf["validation_method"]["_target_"].split(".")[-1].lower() != val_method:
            continue
        model_name: str = conf["model"]["model"]["_target_"].split(".")[-1]
        features_name = (
            conf["feature_extractor"]["_target_"].split(".")[-1]
            if "model_name" not in conf["feature_extractor"]
            else conf["feature_extractor"]["model_name"]
        )
        validation_method = conf["validation_method"]["_target_"].split(".")[-1]
        if "aggregator" not in conf:
            aggregator = "MeanTimeAggregator"
        else:
            aggregator = (
                conf["aggregator"]["_target_"].split(".")[-1]
                if "_target_" in conf["aggregator"]
                else None
            )
        report_results = {}
        for col in report.columns:
            report_results[f"{col} avg"] = report[col].mean()
            report_results[f"{col} sem"] = report[col].sem() * 1.98  # 95% CI

        dataset = conf["dataset"]
        side = conf["side"]
        resampling = (
            conf["resampling"]["_target_"].split(".")[-1]
            if "resampling" in conf
            else "None"
        )
        resampling = resampling if resampling != "NoUnderSampler" else "None"
        # Collect results in a list of dicts
        if remove_xgboost and model_name == "XGBClassifier":
            continue
        results.append(
            {
                "Dataset": dataset,
                "Side": side,
                "Model": model_name,
                "Resampling": resampling,
                "Features": features_name,
                "Aggregator": aggregator,
                "Validation": validation_method,
                "Detailed Report": report,
                **report_results,
            }
        )

    # After the loop, display as a table
    df_results = pd.DataFrame(results)
    for (dataset, side, resampling), group in df_results.groupby(
        ["Dataset", "Side", "Resampling"]
    ):
        display(
            HTML(
                f"""
            <div style='background-color:#ffe6e6; padding:18px; margin:10px 0; border-radius:8px;'>
                <h2 style='color:#b30000; margin:0; font-size:2em;'>
                Results for Dataset: <i>{dataset}</i>, Side: <i>{side}</i>, resampling: <i>{resampling}</i>
                </h2>
            </div>
            """
            )
        )
        display(
            group.sort_values(by=["Model", "Features", "Aggregator"])
            .drop(columns=["Detailed Report"])
            .drop_duplicates()
        )
        if "aggregator" in test_args.keys() and group["Aggregator"].nunique() > 2:
            group = group[
                (group["Aggregator"] == test_args["aggregator"])
                | (group["Aggregator"].isnull())
            ]

        unravelled_detailed_results = {
            metric: {} for metric in group["Detailed Report"].iloc[0].columns
        }
        for i, model_results in group.iterrows():
            for metric in model_results["Detailed Report"].columns:
                cv_results = model_results["Detailed Report"][metric]
                unravelled_detailed_results[metric][
                    (model_results["Model"], model_results["Features"])
                ] = cv_results

        if which_test == "friedmann-nemenyi":
            friedmann_nemenyi_test(unravelled_detailed_results)
        elif which_test == "alignedfriedmann-holm":
            aligned_friedmann_holm_test(unravelled_detailed_results)
        else:
            raise ValueError(f"Unknown test: {which_test}")

    return results

In [30]:
results_path: str = "../outputs/"
all_results = list(Path(results_path).glob("*/*/*/reports.csv"))
results_path_adula: str = "../outputs_adula/"
all_results += list(Path(results_path_adula).glob("*/*/*/reports.csv"))


results_lopo = present_results(
    all_results,
    val_method="lopo",
    remove_xgboost=True,
    remove_chronos_small_from_test=True,
    which_test="friedmann-nemenyi",
    test_args={
        "aggregator": "MeanChanAggregator"}
)

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
4,apsync,engagement_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.421389,0.092253,0.518713,0.027086,0.034524,0.051993,0.318314,0.17451,0.272775,0.169406,0.457165,0.26818,0.518713,0.027086
6,apsync,engagement_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.484316,0.153246,0.5,0.0,0.0,0.0,0.065514,0.129718,0.042503,0.084156,0.142857,0.282857,0.5,0.0
3,apsync,engagement_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.627536,0.107493,0.641607,0.09203,0.282201,0.15794,0.535047,0.163484,0.623977,0.168456,0.580068,0.249043,0.641607,0.09203
2,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.482812,0.153343,0.498413,0.003143,-0.015532,0.030753,0.065514,0.129718,0.042503,0.084156,0.142857,0.282857,0.498413,0.003143
5,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.499654,0.086121,0.500339,0.050637,-0.003384,0.100121,0.297661,0.185441,0.311768,0.213402,0.328211,0.214678,0.500339,0.050637
1,apsync,engagement_10sec,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,LOPO,0.425518,0.088017,0.492814,0.032105,-0.009583,0.084659,0.288127,0.098984,0.466334,0.196568,0.360991,0.23421,0.492814,0.032105


Friedman Test accuracy_score: statistic=10.223, pvalue=0.069
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.939262,0.123979,0.970025,0.522199,1.0
LogisticRegression,AutonLab/MOMENT-1-large,0.939262,1.0,0.617519,0.999992,0.970025,0.956477
LogisticRegression,HandcraftedFeatureExtractor,0.123979,0.617519,1.0,0.522199,0.970025,0.14635
LogisticRegression,amazon/chronos-t5-large,0.970025,0.999992,0.522199,1.0,0.939262,0.980292
LogisticRegression,amazon/chronos-t5-small,0.522199,0.970025,0.970025,0.939262,1.0,0.569924
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,1.0,0.956477,0.14635,0.980292,0.569924,1.0


Friedman Test balanced_accuracy_score: statistic=12.557, pvalue=0.028
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.998171,0.171582,0.992869,0.999745,1.0
LogisticRegression,AutonLab/MOMENT-1-large,0.998171,1.0,0.059732,0.999992,0.980292,0.996187
LogisticRegression,HandcraftedFeatureExtractor,0.171582,0.059732,1.0,0.039878,0.302328,0.19978
LogisticRegression,amazon/chronos-t5-large,0.992869,0.999992,0.039878,1.0,0.956477,0.98774
LogisticRegression,amazon/chronos-t5-small,0.999745,0.980292,0.302328,0.956477,1.0,0.999938
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,1.0,0.996187,0.19978,0.98774,0.999938,1.0


Friedman Test matthews_corrcoef: statistic=13.959, pvalue=0.016
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.992869,0.171582,0.980292,0.999992,1.0
LogisticRegression,AutonLab/MOMENT-1-large,0.992869,1.0,0.039878,0.999992,0.998171,0.996187
LogisticRegression,HandcraftedFeatureExtractor,0.171582,0.039878,1.0,0.025963,0.123979,0.14635
LogisticRegression,amazon/chronos-t5-large,0.980292,0.999992,0.025963,1.0,0.992869,0.98774
LogisticRegression,amazon/chronos-t5-small,0.999992,0.998171,0.123979,0.992869,1.0,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,1.0,0.996187,0.14635,0.98774,1.0,1.0


Friedman Test f1_score: statistic=12.818, pvalue=0.025
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.384497,0.970025,0.384497,0.980292,0.996187
LogisticRegression,AutonLab/MOMENT-1-large,0.384497,1.0,0.072404,1.0,0.830094,0.709576
LogisticRegression,HandcraftedFeatureExtractor,0.970025,0.072404,1.0,0.072404,0.664298,0.793037
LogisticRegression,amazon/chronos-t5-large,0.384497,1.0,0.072404,1.0,0.830094,0.709576
LogisticRegression,amazon/chronos-t5-small,0.980292,0.830094,0.664298,0.830094,1.0,0.999938
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.996187,0.709576,0.793037,0.709576,0.999938,1.0


Friedman Test precision_score: statistic=25.091, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.664298,0.171582,0.664298,0.998171,0.892833
LogisticRegression,AutonLab/MOMENT-1-large,0.664298,1.0,0.001601,1.0,0.384497,0.104321
LogisticRegression,HandcraftedFeatureExtractor,0.171582,0.001601,1.0,0.001601,0.384497,0.793037
LogisticRegression,amazon/chronos-t5-large,0.664298,1.0,0.001601,1.0,0.384497,0.104321
LogisticRegression,amazon/chronos-t5-small,0.998171,0.384497,0.384497,0.384497,1.0,0.98774
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.892833,0.104321,0.793037,0.104321,0.98774,1.0


Friedman Test recall_score: statistic=12.818, pvalue=0.025
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.384497,0.970025,0.384497,0.980292,0.996187
LogisticRegression,AutonLab/MOMENT-1-large,0.384497,1.0,0.072404,1.0,0.830094,0.709576
LogisticRegression,HandcraftedFeatureExtractor,0.970025,0.072404,1.0,0.072404,0.664298,0.793037
LogisticRegression,amazon/chronos-t5-large,0.384497,1.0,0.072404,1.0,0.830094,0.709576
LogisticRegression,amazon/chronos-t5-small,0.980292,0.830094,0.664298,0.830094,1.0,0.999938
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.996187,0.709576,0.793037,0.709576,0.999938,1.0


Friedman Test roc_auc_score: statistic=12.557, pvalue=0.028
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.998171,0.171582,0.992869,0.999745,1.0
LogisticRegression,AutonLab/MOMENT-1-large,0.998171,1.0,0.059732,0.999992,0.980292,0.996187
LogisticRegression,HandcraftedFeatureExtractor,0.171582,0.059732,1.0,0.039878,0.302328,0.19978
LogisticRegression,amazon/chronos-t5-large,0.992869,0.999992,0.039878,1.0,0.956477,0.98774
LogisticRegression,amazon/chronos-t5-small,0.999745,0.980292,0.302328,0.956477,1.0,0.999938
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,1.0,0.996187,0.19978,0.98774,0.999938,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
42,apsync,immersion_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.424948,0.142739,0.440485,0.146541,0.012393,0.032708,0.401787,0.182682,0.461874,0.27004,0.441729,0.151303,0.510965,0.025945
44,apsync,immersion_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.253624,0.149921,0.338205,0.178744,-0.015014,0.071629,0.198312,0.162606,0.357748,0.291745,0.315289,0.277528,0.473488,0.065684
41,apsync,immersion_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.261139,0.131941,0.331712,0.143075,-0.081566,0.096574,0.274058,0.176519,0.413604,0.258524,0.43388,0.31093,0.438179,0.062473
40,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.306481,0.141948,0.35797,0.164798,-0.017755,0.132807,0.290548,0.145073,0.475471,0.301033,0.334796,0.241437,0.464015,0.122785
43,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.30748,0.164373,0.318506,0.169606,-0.056672,0.080626,0.26962,0.186584,0.300543,0.2387,0.310951,0.167046,0.445908,0.068776


Friedman Test accuracy_score: statistic=8.224, pvalue=0.084
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.066759,0.388334,0.761079,0.493589
LogisticRegression,AutonLab/MOMENT-1-large,0.066759,1.0,0.916448,0.603784,0.848974
LogisticRegression,HandcraftedFeatureExtractor,0.388334,0.916448,1.0,0.976379,0.999817
LogisticRegression,amazon/chronos-t5-large,0.761079,0.603784,0.976379,1.0,0.993349
LogisticRegression,amazon/chronos-t5-small,0.493589,0.848974,0.999817,0.993349,1.0


Friedman Test balanced_accuracy_score: statistic=2.094, pvalue=0.719
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.848974,0.885479,0.976379,0.711104
LogisticRegression,AutonLab/MOMENT-1-large,0.848974,1.0,0.999988,0.993349,0.999091
LogisticRegression,HandcraftedFeatureExtractor,0.885479,0.999988,1.0,0.997194,0.997194
LogisticRegression,amazon/chronos-t5-large,0.976379,0.993349,0.997194,1.0,0.961612
LogisticRegression,amazon/chronos-t5-small,0.711104,0.999091,0.997194,0.961612,1.0


Friedman Test matthews_corrcoef: statistic=3.192, pvalue=0.526
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999091,0.658328,0.999091,0.961612
LogisticRegression,AutonLab/MOMENT-1-large,0.999091,1.0,0.807304,1.0,0.993349
LogisticRegression,HandcraftedFeatureExtractor,0.658328,0.807304,1.0,0.807304,0.961612
LogisticRegression,amazon/chronos-t5-large,0.999091,1.0,0.807304,1.0,0.993349
LogisticRegression,amazon/chronos-t5-small,0.961612,0.993349,0.961612,0.993349,1.0


Friedman Test f1_score: statistic=5.864, pvalue=0.210
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.252294,0.916448,0.493589,0.603784
LogisticRegression,AutonLab/MOMENT-1-large,0.252294,1.0,0.761079,0.993349,0.976379
LogisticRegression,HandcraftedFeatureExtractor,0.916448,0.761079,1.0,0.941773,0.976379
LogisticRegression,amazon/chronos-t5-large,0.493589,0.993349,0.941773,1.0,0.999817
LogisticRegression,amazon/chronos-t5-small,0.603784,0.976379,0.976379,0.999817,1.0


Friedman Test precision_score: statistic=2.772, pvalue=0.597
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.961612,0.916448,0.993349,0.603784
LogisticRegression,AutonLab/MOMENT-1-large,0.961612,1.0,0.999817,0.999091,0.941773
LogisticRegression,HandcraftedFeatureExtractor,0.916448,0.999817,1.0,0.993349,0.976379
LogisticRegression,amazon/chronos-t5-large,0.993349,0.999091,0.993349,1.0,0.848974
LogisticRegression,amazon/chronos-t5-small,0.603784,0.941773,0.976379,0.848974,1.0


Friedman Test recall_score: statistic=4.545, pvalue=0.337
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.493589,0.961612,0.658328,0.603784
LogisticRegression,AutonLab/MOMENT-1-large,0.493589,1.0,0.885479,0.999091,0.999817
LogisticRegression,HandcraftedFeatureExtractor,0.961612,0.885479,1.0,0.961612,0.941773
LogisticRegression,amazon/chronos-t5-large,0.658328,0.999091,0.961612,1.0,0.999988
LogisticRegression,amazon/chronos-t5-small,0.603784,0.999817,0.941773,0.999988,1.0


Friedman Test roc_auc_score: statistic=nan, pvalue=nan
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.987386,0.751424,0.987386,0.930677
LogisticRegression,AutonLab/MOMENT-1-large,0.987386,1.0,0.956544,1.0,0.99824
LogisticRegression,HandcraftedFeatureExtractor,0.751424,0.956544,1.0,0.956544,0.994615
LogisticRegression,amazon/chronos-t5-large,0.987386,1.0,0.956544,1.0,0.99824
LogisticRegression,amazon/chronos-t5-small,0.930677,0.99824,0.994615,0.99824,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
47,bihearts,left,DummyClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.383712,0.131498,0.475504,0.038458,-0.030541,0.053279,0.428377,0.220202,0.548873,0.292851,0.356154,0.182575,0.475504,0.038458
49,bihearts,left,LogisticRegression,GroupUnderSampler,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.67624,0.045756,0.624431,0.049235,0.201433,0.100414,0.769869,0.050766,0.876599,0.048957,0.695483,0.083177,0.624431,0.049235
46,bihearts,left,LogisticRegression,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.720179,0.051051,0.752718,0.074657,0.374824,0.124084,0.804301,0.033096,0.927746,0.050036,0.713747,0.046953,0.752718,0.074657
45,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.714566,0.034155,0.748198,0.048663,0.368776,0.08969,0.798172,0.025449,0.92476,0.04556,0.706176,0.044744,0.748198,0.048663
48,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.660845,0.014725,0.686496,0.039834,0.269149,0.046403,0.756432,0.022548,0.892093,0.069324,0.661942,0.029569,0.686496,0.039834
0,bihearts,left,LogisticRegression,GroupUnderSampler,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,LOPO,0.602818,0.026664,0.580985,0.024202,0.120241,0.040113,0.706916,0.046263,0.851138,0.061871,0.608197,0.053613,0.580985,0.024202


Friedman Test accuracy_score: statistic=26.020, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.032277,0.000263,0.001601,0.104321,0.793037
LogisticRegression,AutonLab/MOMENT-1-large,0.032277,1.0,0.793037,0.956477,0.998171,0.522199
LogisticRegression,HandcraftedFeatureExtractor,0.000263,0.793037,1.0,0.998171,0.522199,0.032277
LogisticRegression,amazon/chronos-t5-large,0.001601,0.956477,0.998171,1.0,0.793037,0.104321
LogisticRegression,amazon/chronos-t5-small,0.104321,0.998171,0.522199,0.793037,1.0,0.793037
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.793037,0.522199,0.032277,0.104321,0.793037,1.0


Friedman Test balanced_accuracy_score: statistic=29.776, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.265207,0.000138,0.000263,0.048961,0.793037
LogisticRegression,AutonLab/MOMENT-1-large,0.265207,1.0,0.19978,0.265207,0.980292,0.956477
LogisticRegression,HandcraftedFeatureExtractor,0.000138,0.19978,1.0,0.999992,0.617519,0.020756
LogisticRegression,amazon/chronos-t5-large,0.000263,0.265207,0.999992,1.0,0.709576,0.032277
LogisticRegression,amazon/chronos-t5-small,0.048961,0.980292,0.617519,0.709576,1.0,0.617519
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.793037,0.956477,0.020756,0.032277,0.617519,1.0


Friedman Test matthews_corrcoef: statistic=29.939, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.265207,7.1e-05,0.000491,0.048961,0.793037
LogisticRegression,AutonLab/MOMENT-1-large,0.265207,1.0,0.14635,0.34218,0.980292,0.956477
LogisticRegression,HandcraftedFeatureExtractor,7.1e-05,0.14635,1.0,0.998171,0.522199,0.013027
LogisticRegression,amazon/chronos-t5-large,0.000491,0.34218,0.998171,1.0,0.793037,0.048961
LogisticRegression,amazon/chronos-t5-small,0.048961,0.980292,0.522199,0.793037,1.0,0.617519
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.793037,0.956477,0.013027,0.048961,0.617519,1.0


Friedman Test f1_score: statistic=25.776, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.020756,0.000263,0.001601,0.19978,0.709576
LogisticRegression,AutonLab/MOMENT-1-large,0.020756,1.0,0.863453,0.980292,0.956477,0.522199
LogisticRegression,HandcraftedFeatureExtractor,0.000263,0.863453,1.0,0.998171,0.34218,0.048961
LogisticRegression,amazon/chronos-t5-large,0.001601,0.980292,0.998171,1.0,0.617519,0.14635
LogisticRegression,amazon/chronos-t5-small,0.19978,0.956477,0.34218,0.617519,1.0,0.956477
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.709576,0.522199,0.048961,0.14635,0.956477,1.0


Friedman Test precision_score: statistic=28.551, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.34218,0.000263,0.000263,0.032277,0.709576
LogisticRegression,AutonLab/MOMENT-1-large,0.34218,1.0,0.19978,0.19978,0.918099,0.992869
LogisticRegression,HandcraftedFeatureExtractor,0.000263,0.19978,1.0,1.0,0.793037,0.048961
LogisticRegression,amazon/chronos-t5-large,0.000263,0.19978,1.0,1.0,0.793037,0.048961
LogisticRegression,amazon/chronos-t5-small,0.032277,0.918099,0.793037,0.793037,1.0,0.617519
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.709576,0.992869,0.048961,0.048961,0.617519,1.0


Friedman Test recall_score: statistic=20.738, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.004781,0.013027,0.003668,0.265207,0.664298
LogisticRegression,AutonLab/MOMENT-1-large,0.004781,1.0,0.999745,1.0,0.709576,0.302328
LogisticRegression,HandcraftedFeatureExtractor,0.013027,0.999745,1.0,0.999242,0.863453,0.474996
LogisticRegression,amazon/chronos-t5-large,0.003668,1.0,0.999242,1.0,0.664298,0.265207
LogisticRegression,amazon/chronos-t5-small,0.265207,0.709576,0.863453,0.664298,1.0,0.98774
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.664298,0.302328,0.474996,0.265207,0.98774,1.0


Friedman Test roc_auc_score: statistic=29.776, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.265207,0.000138,0.000263,0.048961,0.793037
LogisticRegression,AutonLab/MOMENT-1-large,0.265207,1.0,0.19978,0.265207,0.980292,0.956477
LogisticRegression,HandcraftedFeatureExtractor,0.000138,0.19978,1.0,0.999992,0.617519,0.020756
LogisticRegression,amazon/chronos-t5-large,0.000263,0.265207,0.999992,1.0,0.709576,0.032277
LogisticRegression,amazon/chronos-t5-small,0.048961,0.980292,0.617519,0.709576,1.0,0.617519
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.793037,0.956477,0.020756,0.032277,0.617519,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
9,seed,engagement_10sec_35thresh,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.566454,0.069834,0.563785,0.064325,0.014386,0.013761,0.607081,0.101947,0.662085,0.133996,0.638808,0.116072,0.512763,0.01087
11,seed,engagement_10sec_35thresh,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.65779,0.132067,0.630016,0.126509,-0.008993,0.008466,0.733725,0.123666,0.655589,0.134423,0.895782,0.117773,0.495507,0.003905
8,seed,engagement_10sec_35thresh,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.644175,0.129965,0.61771,0.123183,-0.007219,0.017336,0.730887,0.120988,0.656422,0.134193,0.897988,0.118505,0.498472,0.004105
7,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.651427,0.131188,0.625663,0.12549,-0.003951,0.01794,0.731138,0.122383,0.655591,0.134032,0.893233,0.118862,0.496869,0.01015
10,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.660957,0.127395,0.631197,0.12239,0.003315,0.006441,0.734903,0.122734,0.656956,0.134046,0.897834,0.118219,0.500452,0.002127
12,seed,engagement_10sec_35thresh,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,LOPO,0.655841,0.133618,0.629387,0.127295,0.0,0.0,0.738007,0.122733,0.656888,0.134081,0.911996,0.118828,0.5,0.0


Friedman Test accuracy_score: statistic=6.065, pvalue=0.300
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.845573,0.999997,0.826478,0.71574,0.909884
LogisticRegression,AutonLab/MOMENT-1-large,0.845573,1.0,0.895673,1.0,0.9999,0.999987
LogisticRegression,HandcraftedFeatureExtractor,0.999997,0.895673,1.0,0.880205,0.785004,0.945036
LogisticRegression,amazon/chronos-t5-large,0.826478,1.0,0.880205,1.0,0.999959,0.999959
LogisticRegression,amazon/chronos-t5-small,0.71574,0.9999,0.785004,0.999959,1.0,0.998779
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.909884,0.999987,0.945036,0.999959,0.998779,1.0


Friedman Test balanced_accuracy_score: statistic=1.341, pvalue=0.931
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.997073,0.993947,0.999959,1.0,1.0
LogisticRegression,AutonLab/MOMENT-1-large,0.997073,1.0,1.0,0.985221,0.998779,0.998068
LogisticRegression,HandcraftedFeatureExtractor,0.993947,1.0,1.0,0.975726,0.997073,0.995725
LogisticRegression,amazon/chronos-t5-large,0.999959,0.985221,0.975726,1.0,0.999786,0.9999
LogisticRegression,amazon/chronos-t5-small,1.0,0.998779,0.997073,0.999786,1.0,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,1.0,0.998068,0.995725,0.9999,1.0,1.0


Friedman Test matthews_corrcoef: statistic=6.882, pvalue=0.230
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.665971,0.954344,0.954344,0.997073,0.969626
LogisticRegression,AutonLab/MOMENT-1-large,0.665971,1.0,0.98878,0.98878,0.909884,0.980898
LogisticRegression,HandcraftedFeatureExtractor,0.954344,0.98878,1.0,1.0,0.998779,1.0
LogisticRegression,amazon/chronos-t5-large,0.954344,0.98878,1.0,1.0,0.998779,1.0
LogisticRegression,amazon/chronos-t5-small,0.997073,0.909884,0.998779,0.998779,1.0,0.999587
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.969626,0.980898,1.0,1.0,0.999587,1.0


Friedman Test f1_score: statistic=36.495, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.015603,0.036859,0.002909,0.01774,0.001144
LogisticRegression,AutonLab/MOMENT-1-large,0.015603,1.0,0.999786,0.997073,1.0,0.980898
LogisticRegression,HandcraftedFeatureExtractor,0.036859,0.999786,1.0,0.975726,0.9999,0.922837
LogisticRegression,amazon/chronos-t5-large,0.002909,0.997073,0.975726,1.0,0.995725,0.9999
LogisticRegression,amazon/chronos-t5-small,0.01774,1.0,0.9999,0.995725,1.0,0.975726
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001144,0.980898,0.922837,0.9999,0.975726,1.0


Friedman Test precision_score: statistic=8.693, pvalue=0.122
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.509196,0.954344,0.954344,0.98878,0.954344
LogisticRegression,AutonLab/MOMENT-1-large,0.509196,1.0,0.954344,0.954344,0.880205,0.954344
LogisticRegression,HandcraftedFeatureExtractor,0.954344,0.954344,1.0,1.0,0.9999,1.0
LogisticRegression,amazon/chronos-t5-large,0.954344,0.954344,1.0,1.0,0.9999,1.0
LogisticRegression,amazon/chronos-t5-small,0.98878,0.880205,0.9999,0.9999,1.0,0.9999
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.954344,0.954344,1.0,1.0,0.9999,1.0


Friedman Test recall_score: statistic=39.138, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.010501,0.032783,0.004533,0.029102,0.000702
LogisticRegression,AutonLab/MOMENT-1-large,0.010501,1.0,0.999267,0.9999,0.999587,0.980898
LogisticRegression,HandcraftedFeatureExtractor,0.032783,0.999267,1.0,0.99166,1.0,0.895673
LogisticRegression,amazon/chronos-t5-large,0.004533,0.9999,0.99166,1.0,0.993947,0.997073
LogisticRegression,amazon/chronos-t5-small,0.029102,0.999587,1.0,0.993947,1.0,0.909884
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000702,0.980898,0.895673,0.997073,0.909884,1.0


Friedman Test roc_auc_score: statistic=nan, pvalue=nan
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.191374,0.858903,0.858903,0.960327,0.858903
LogisticRegression,AutonLab/MOMENT-1-large,0.191374,1.0,0.858903,0.858903,0.685064,0.858903
LogisticRegression,HandcraftedFeatureExtractor,0.858903,0.858903,1.0,1.0,0.999592,1.0
LogisticRegression,amazon/chronos-t5-large,0.858903,0.858903,1.0,1.0,0.999592,1.0
LogisticRegression,amazon/chronos-t5-small,0.960327,0.685064,0.999592,0.999592,1.0,0.999592
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.858903,0.858903,1.0,1.0,0.999592,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
16,usilaughs,left,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.488889,0.027326,0.471389,0.023477,-0.057482,0.047797,0.307668,0.06837,0.312222,0.068741,0.313889,0.074501,0.471389,0.023477
18,usilaughs,left,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.544444,0.03886,0.529722,0.037839,0.06787,0.079153,0.426971,0.052136,0.48812,0.063227,0.397222,0.055763,0.529722,0.037839
15,usilaughs,left,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.639506,0.072138,0.633889,0.069685,0.29942,0.151118,0.581475,0.082914,0.672775,0.109169,0.583333,0.096243,0.633889,0.069685
14,usilaughs,left,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.711111,0.063328,0.7025,0.06392,0.433858,0.134917,0.63382,0.093704,0.716975,0.11233,0.625,0.105474,0.7025,0.06392
17,usilaughs,left,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.695062,0.066031,0.686389,0.066281,0.400102,0.137941,0.612699,0.097435,0.683939,0.110048,0.608333,0.108515,0.686389,0.066281


Friedman Test accuracy_score: statistic=46.851, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.286002,0.016439,2.512613e-08,1e-05
LogisticRegression,AutonLab/MOMENT-1-large,0.2860024,1.0,0.783543,0.0006037305,0.02723
LogisticRegression,HandcraftedFeatureExtractor,0.01643881,0.783543,1.0,0.03464118,0.375652
LogisticRegression,amazon/chronos-t5-large,2.512613e-08,0.000604,0.034641,1.0,0.826275
LogisticRegression,amazon/chronos-t5-small,9.53622e-06,0.02723,0.375652,0.8262746,1.0


Friedman Test balanced_accuracy_score: statistic=46.099, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.265686,0.012628,1.955751e-08,8e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.2656858,1.0,0.760677,0.0006037305,0.02723
LogisticRegression,HandcraftedFeatureExtractor,0.01262753,0.760677,1.0,0.03895772,0.399924
LogisticRegression,amazon/chronos-t5-large,1.955751e-08,0.000604,0.038958,1.0,0.826275
LogisticRegression,amazon/chronos-t5-small,7.746619e-06,0.02723,0.399924,0.8262746,1.0


Friedman Test matthews_corrcoef: statistic=45.064, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.193592,0.00632,3.222757e-08,4e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.1935917,1.0,0.736935,0.001620493,0.030743
LogisticRegression,HandcraftedFeatureExtractor,0.006319986,0.736935,1.0,0.08382349,0.450246
LogisticRegression,amazon/chronos-t5-large,3.222757e-08,0.00162,0.083823,1.0,0.912338
LogisticRegression,amazon/chronos-t5-small,4.111551e-06,0.030743,0.450246,0.9123379,1.0


Friedman Test f1_score: statistic=41.696, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.897689,0.011036,8.85139e-07,0.000253
LogisticRegression,AutonLab/MOMENT-1-large,0.8976892,1.0,0.136288,8.404476e-05,0.008383
LogisticRegression,HandcraftedFeatureExtractor,0.0110361,0.136288,1.0,0.2102181,0.864477
LogisticRegression,amazon/chronos-t5-large,8.85139e-07,8.4e-05,0.210218,1.0,0.783543
LogisticRegression,amazon/chronos-t5-small,0.0002527819,0.008383,0.864477,0.7835433,1.0


Friedman Test precision_score: statistic=49.703, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.04898,7e-05,9.133075e-09,5e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.04898045,1.0,0.399924,0.006319986,0.149262
LogisticRegression,HandcraftedFeatureExtractor,6.95391e-05,0.399924,1.0,0.4761338,0.984218
LogisticRegression,amazon/chronos-t5-large,9.133075e-09,0.00632,0.476134,1.0,0.805438
LogisticRegression,amazon/chronos-t5-small,5.086584e-06,0.149262,0.984218,0.8054383,1.0


Friedman Test recall_score: statistic=37.333, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.760677,0.004081,1e-05,0.000428
LogisticRegression,AutonLab/MOMENT-1-large,0.760677,1.0,0.136288,0.001899,0.030743
LogisticRegression,HandcraftedFeatureExtractor,0.004081,0.136288,1.0,0.63538,0.979204
LogisticRegression,amazon/chronos-t5-large,1e-05,0.001899,0.63538,1.0,0.925665
LogisticRegression,amazon/chronos-t5-small,0.000428,0.030743,0.979204,0.925665,1.0


Friedman Test roc_auc_score: statistic=46.099, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.265686,0.012628,1.955751e-08,8e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.2656858,1.0,0.760677,0.0006037305,0.02723
LogisticRegression,HandcraftedFeatureExtractor,0.01262753,0.760677,1.0,0.03895772,0.399924
LogisticRegression,amazon/chronos-t5-large,1.955751e-08,0.000604,0.038958,1.0,0.826275
LogisticRegression,amazon/chronos-t5-small,7.746619e-06,0.02723,0.399924,0.8262746,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
36,usilaughs,right,DummyClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.537037,0.031791,0.528889,0.030555,0.057068,0.061686,0.416111,0.081522,0.3892,0.0778,0.455556,0.0908,0.528889,0.030555
35,usilaughs,right,LogisticRegression,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.730864,0.103107,0.743611,0.096125,0.479467,0.205783,0.777788,0.07931,0.751362,0.106554,0.858333,0.05658,0.743611,0.096125
34,usilaughs,right,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.737037,0.072515,0.7375,0.070212,0.494875,0.144505,0.707187,0.084681,0.744827,0.097383,0.741667,0.097647,0.7375,0.070212


Friedman Test accuracy_score: statistic=13.611, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.004454,0.006822
LogisticRegression,HandcraftedFeatureExtractor,0.004454,1.0,0.990854
LogisticRegression,amazon/chronos-t5-large,0.006822,0.990854,1.0


Friedman Test balanced_accuracy_score: statistic=13.310, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.003576,0.008391
LogisticRegression,HandcraftedFeatureExtractor,0.003576,1.0,0.963917
LogisticRegression,amazon/chronos-t5-large,0.008391,0.963917,1.0


Friedman Test matthews_corrcoef: statistic=14.966, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.002276,0.003576
LogisticRegression,HandcraftedFeatureExtractor,0.002276,1.0,0.990854
LogisticRegression,amazon/chronos-t5-large,0.003576,0.990854,1.0


Friedman Test f1_score: statistic=25.737, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,4e-06,0.002859
LogisticRegression,HandcraftedFeatureExtractor,4e-06,1.0,0.268023
LogisticRegression,amazon/chronos-t5-large,0.002859,0.268023,1.0


Friedman Test precision_score: statistic=23.138, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.000243,0.000185
LogisticRegression,HandcraftedFeatureExtractor,0.000243,1.0,0.997705
LogisticRegression,amazon/chronos-t5-large,0.000185,0.997705,1.0


Friedman Test recall_score: statistic=32.849, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,2.525956e-07,0.004454
LogisticRegression,HandcraftedFeatureExtractor,2.525956e-07,1.0,0.072064
LogisticRegression,amazon/chronos-t5-large,0.004453623,0.07206404,1.0


Friedman Test roc_auc_score: statistic=13.310, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.003576,0.008391
LogisticRegression,HandcraftedFeatureExtractor,0.003576,1.0,0.963917
LogisticRegression,amazon/chronos-t5-large,0.008391,0.963917,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
29,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.516049,0.033713,0.505278,0.034151,0.010152,0.068488,0.40094,0.064931,0.402968,0.060762,0.408333,0.073087,0.505278,0.034151
23,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,CatAggregator,LOPO,0.553086,0.04597,0.539722,0.046472,0.080817,0.097864,0.434615,0.069687,0.48263,0.072581,0.419444,0.080478,0.539722,0.046472
31,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.571605,0.043871,0.548333,0.043829,0.121783,0.105163,0.397259,0.068851,0.572211,0.107839,0.338889,0.069839,0.548333,0.043829
24,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanTimeAggregator,LOPO,0.553086,0.04597,0.539722,0.046472,0.080817,0.097864,0.434615,0.069687,0.48263,0.072581,0.419444,0.080478,0.539722,0.046472
28,usilaughs,right,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.724691,0.099802,0.73,0.094771,0.465681,0.197678,0.743902,0.085226,0.752967,0.108212,0.777778,0.076837,0.73,0.094771
19,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,CatAggregator,LOPO,0.735802,0.069235,0.729444,0.068598,0.482407,0.145309,0.675669,0.094845,0.736628,0.112852,0.672222,0.101907,0.729444,0.068598
27,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.730864,0.071708,0.723611,0.071174,0.478667,0.146334,0.664974,0.094681,0.771658,0.101693,0.658333,0.10683,0.723611,0.071174
21,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanTimeAggregator,LOPO,0.759259,0.07126,0.756944,0.070498,0.534357,0.145929,0.716721,0.092405,0.774558,0.099563,0.736111,0.10226,0.756944,0.070498
20,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,CatAggregator,LOPO,0.745679,0.084439,0.740278,0.084238,0.497599,0.176246,0.689754,0.106699,0.76913,0.112825,0.691667,0.114957,0.740278,0.084238
30,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.730864,0.079087,0.722778,0.079596,0.463984,0.165255,0.655712,0.109457,0.726762,0.118299,0.65,0.114753,0.722778,0.079596


Friedman Test accuracy_score: statistic=27.843, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.696733,0.002672,0.006542,0.001784,0.001556
LogisticRegression,AutonLab/MOMENT-1-large,0.696733,1.0,0.203335,0.321939,0.162927,0.150842
LogisticRegression,HandcraftedFeatureExtractor,0.002672,0.203335,1.0,0.999889,0.999998,0.999993
LogisticRegression,amazon/chronos-t5-large,0.006542,0.321939,0.999889,1.0,0.999359,0.998982
LogisticRegression,amazon/chronos-t5-small,0.001784,0.162927,0.999998,0.999359,1.0,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001556,0.150842,0.999993,0.998982,1.0,1.0


Friedman Test balanced_accuracy_score: statistic=29.114, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.779036,0.000665,0.007396,0.003048,0.001784
LogisticRegression,AutonLab/MOMENT-1-large,0.779036,1.0,0.064018,0.267036,0.162927,0.118566
LogisticRegression,HandcraftedFeatureExtractor,0.000665,0.064018,1.0,0.989528,0.998982,0.999889
LogisticRegression,amazon/chronos-t5-large,0.007396,0.267036,0.989528,1.0,0.999889,0.998982
LogisticRegression,amazon/chronos-t5-small,0.003048,0.162927,0.998982,0.999889,1.0,0.999993
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001784,0.118566,0.999889,0.998982,0.999993,1.0


Friedman Test matthews_corrcoef: statistic=29.965, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.674825,0.000888,0.005099,0.002044,0.000496
LogisticRegression,AutonLab/MOMENT-1-large,0.674825,1.0,0.118566,0.302993,0.189166,0.084143
LogisticRegression,HandcraftedFeatureExtractor,0.000888,0.118566,1.0,0.997725,0.999948,0.999993
LogisticRegression,amazon/chronos-t5-large,0.005099,0.302993,0.997725,1.0,0.999889,0.991951
LogisticRegression,amazon/chronos-t5-small,0.002044,0.189166,0.999948,0.999889,1.0,0.999359
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000496,0.084143,0.999993,0.991951,0.999359,1.0


Friedman Test f1_score: statistic=40.664, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.995513,1.7e-05,0.01679,0.007396,0.000575
LogisticRegression,AutonLab/MOMENT-1-large,0.995513,1.0,0.0002,0.076932,0.039391,0.004493
LogisticRegression,HandcraftedFeatureExtractor,1.7e-05,0.0002,1.0,0.561028,0.718178,0.974215
LogisticRegression,amazon/chronos-t5-large,0.01679,0.076932,0.561028,1.0,0.999889,0.947323
LogisticRegression,amazon/chronos-t5-small,0.007396,0.039391,0.718178,0.999889,1.0,0.986599
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000575,0.004493,0.974215,0.947323,0.986599,1.0


Friedman Test precision_score: statistic=36.849, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.084143,0.0002,7.7e-05,0.000317,0.000106
LogisticRegression,AutonLab/MOMENT-1-large,0.084143,1.0,0.537957,0.403474,0.607089,0.447113
LogisticRegression,HandcraftedFeatureExtractor,0.0002,0.537957,1.0,0.999948,0.999998,0.999993
LogisticRegression,amazon/chronos-t5-large,7.7e-05,0.403474,0.999948,1.0,0.999617,1.0
LogisticRegression,amazon/chronos-t5-small,0.000317,0.607089,0.999998,0.999617,1.0,0.999889
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000106,0.447113,0.999993,1.0,0.999889,1.0


Friedman Test recall_score: statistic=51.116, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.9624121,3.35744e-05,0.128677,0.150842,0.001784
LogisticRegression,AutonLab/MOMENT-1-large,0.962412,1.0,4.147301e-07,0.011914,0.014997,4.7e-05
LogisticRegression,HandcraftedFeatureExtractor,3.4e-05,4.147301e-07,1.0,0.218209,0.189166,0.947323
LogisticRegression,amazon/chronos-t5-large,0.128677,0.01191366,0.2182088,1.0,1.0,0.759397
LogisticRegression,amazon/chronos-t5-small,0.150842,0.01499661,0.1891656,1.0,1.0,0.718178
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001784,4.694174e-05,0.9473232,0.759397,0.718178,1.0


Friedman Test roc_auc_score: statistic=29.114, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.779036,0.000665,0.007396,0.003048,0.001784
LogisticRegression,AutonLab/MOMENT-1-large,0.779036,1.0,0.064018,0.267036,0.162927,0.118566
LogisticRegression,HandcraftedFeatureExtractor,0.000665,0.064018,1.0,0.989528,0.998982,0.999889
LogisticRegression,amazon/chronos-t5-large,0.007396,0.267036,0.989528,1.0,0.999889,0.998982
LogisticRegression,amazon/chronos-t5-small,0.003048,0.162927,0.998982,0.999889,1.0,0.999993
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001784,0.118566,0.999889,0.998982,0.999993,1.0


In [30]:
results_lopo = present_results(
    all_results,
    val_method="tacv",
    remove_xgboost=True,
    remove_chronos_small_from_test=True,
)

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
4,apsync,engagement_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.478237,0.051894,0.500512,0.031328,0.001778,0.061588,0.38797,0.202726,0.357265,0.200925,0.443405,0.221716,0.500512,0.031328
6,apsync,engagement_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.534116,0.090357,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
3,apsync,engagement_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.6405,0.131275,0.615714,0.136109,0.241794,0.286743,0.462302,0.302731,0.625714,0.362842,0.466578,0.334841,0.615714,0.136109
2,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.534116,0.090357,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
5,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.508916,0.058305,0.523843,0.049311,0.041462,0.110617,0.297648,0.210281,0.36411,0.201832,0.312121,0.266114,0.523843,0.049311
1,apsync,engagement_10sec,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.504563,0.053762,0.519786,0.050948,0.021321,0.133771,0.361768,0.19162,0.405507,0.242627,0.368939,0.21053,0.519786,0.050948


Friedman Test: statistic=21.023, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0,"(DummyClassifier, HandcraftedFeatureExtractor)","(LogisticRegression, AutonLab/MOMENT-1-large)","(LogisticRegression, HandcraftedFeatureExtractor)","(LogisticRegression, amazon/chronos-t5-large)","(LogisticRegression, ibm-granite/granite-timeseries-patchtsmixer)"
"(DummyClassifier, HandcraftedFeatureExtractor)",1.0,0.603784,0.124583,0.603784,0.986698
"(LogisticRegression, AutonLab/MOMENT-1-large)",0.603784,1.0,0.001346,1.0,0.29407
"(LogisticRegression, HandcraftedFeatureExtractor)",0.124583,0.001346,1.0,0.001346,0.339541
"(LogisticRegression, amazon/chronos-t5-large)",0.603784,1.0,0.001346,1.0,0.29407
"(LogisticRegression, ibm-granite/granite-timeseries-patchtsmixer)",0.986698,0.29407,0.339541,0.29407,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
32,apsync,immersion_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.545503,0.08005,0.54293,0.059188,0.071216,0.103792,0.583344,0.163628,0.57177,0.209098,0.705254,0.180745,0.54293,0.059188
34,apsync,immersion_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.488051,0.208738,0.526773,0.036324,0.085154,0.108571,0.533131,0.271428,0.559069,0.208522,0.749482,0.350237,0.526773,0.036324
31,apsync,immersion_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.311145,0.142635,0.328524,0.131626,-0.355593,0.26508,0.304784,0.219214,0.323593,0.197606,0.375569,0.262681,0.328524,0.131626
30,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.500288,0.199609,0.51477,0.050704,0.0323,0.159239,0.608019,0.213006,0.547907,0.208574,0.854995,0.235896,0.51477,0.050704
33,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.452856,0.174762,0.511186,0.051142,0.028248,0.10461,0.533535,0.178035,0.542154,0.203816,0.67971,0.234199,0.511186,0.051142


Friedman Test: statistic=13.800, pvalue=0.003
Nemenyi post-hoc test


Unnamed: 0,"(DummyClassifier, HandcraftedFeatureExtractor)","(LogisticRegression, AutonLab/MOMENT-1-large)","(LogisticRegression, HandcraftedFeatureExtractor)","(LogisticRegression, amazon/chronos-t5-large)"
"(DummyClassifier, HandcraftedFeatureExtractor)",1.0,0.841171,0.002442,0.728805
"(LogisticRegression, AutonLab/MOMENT-1-large)",0.841171,1.0,0.035843,0.996864
"(LogisticRegression, HandcraftedFeatureExtractor)",0.002442,0.035843,1.0,0.062368
"(LogisticRegression, amazon/chronos-t5-large)",0.728805,0.996864,0.062368,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
37,bihearts,left,DummyClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,TACV,0.409179,0.126033,0.495931,0.005103,-0.005947,0.008245,0.380454,0.307846,0.507808,0.414703,0.305448,0.246981,0.495931,0.005103
39,bihearts,left,LogisticRegression,GroupUnderSampler,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.685536,0.047774,0.671579,0.044308,0.277698,0.111877,0.775661,0.038262,0.898969,0.045172,0.688387,0.074978,0.671579,0.044308
36,bihearts,left,LogisticRegression,GroupUnderSampler,HandcraftedFeatureExtractor,,TACV,0.776265,0.040743,0.830071,0.041804,0.512755,0.090253,0.84241,0.025621,0.961691,0.031821,0.750841,0.037521,0.830071,0.041804
35,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.733597,0.045006,0.785686,0.031861,0.441448,0.104907,0.808792,0.026878,0.952221,0.029885,0.704134,0.037858,0.785686,0.031861
38,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.673411,0.036678,0.730974,0.057816,0.339848,0.051974,0.76041,0.035016,0.913251,0.075118,0.655163,0.041796,0.730974,0.057816
0,bihearts,left,LogisticRegression,GroupUnderSampler,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.618506,0.037796,0.614872,0.046506,0.181688,0.090396,0.717323,0.041927,0.871466,0.052669,0.613656,0.063585,0.614872,0.046506


Friedman Test: statistic=28.000, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0,"(DummyClassifier, HandcraftedFeatureExtractor)","(LogisticRegression, AutonLab/MOMENT-1-large)","(LogisticRegression, HandcraftedFeatureExtractor)","(LogisticRegression, amazon/chronos-t5-large)","(LogisticRegression, ibm-granite/granite-timeseries-patchtsmixer)"
"(DummyClassifier, HandcraftedFeatureExtractor)",1.0,0.124583,2.2e-05,0.003542,0.761079
"(LogisticRegression, AutonLab/MOMENT-1-large)",0.124583,1.0,0.124583,0.761079,0.761079
"(LogisticRegression, HandcraftedFeatureExtractor)",2.2e-05,0.124583,1.0,0.761079,0.003542
"(LogisticRegression, amazon/chronos-t5-large)",0.003542,0.761079,0.761079,1.0,0.124583
"(LogisticRegression, ibm-granite/granite-timeseries-patchtsmixer)",0.761079,0.761079,0.003542,0.124583,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
9,seed,engagement_10sec_35thresh,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.555644,0.096513,0.48866,0.028251,-0.023202,0.051982,0.654688,0.126983,0.663317,0.091176,0.692261,0.250659,0.48866,0.028251
11,seed,engagement_10sec_35thresh,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.648025,0.086939,0.488468,0.011939,-0.042485,0.03812,0.776995,0.068974,0.669713,0.078364,0.929057,0.055451,0.488468,0.011939
8,seed,engagement_10sec_35thresh,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.684997,0.088681,0.522659,0.046252,0.079399,0.177631,0.808665,0.059924,0.683752,0.085957,0.998889,0.0022,0.522659,0.046252
7,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.668558,0.083023,0.498674,0.002839,-0.003172,0.007836,0.792218,0.069807,0.674811,0.074602,0.962115,0.06585,0.498674,0.002839
10,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.658135,0.072629,0.495278,0.00935,-0.011676,0.023118,0.789412,0.052666,0.673304,0.072463,0.965556,0.0682,0.495278,0.00935
12,seed,engagement_10sec_35thresh,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.675595,0.073512,0.5,0.0,0.0,0.0,0.804037,0.052678,0.675595,0.073512,1.0,0.0,0.5,0.0


Friedman Test: statistic=25.943, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0,"(DummyClassifier, HandcraftedFeatureExtractor)","(LogisticRegression, AutonLab/MOMENT-1-large)","(LogisticRegression, HandcraftedFeatureExtractor)","(LogisticRegression, amazon/chronos-t5-large)","(LogisticRegression, ibm-granite/granite-timeseries-patchtsmixer)"
"(DummyClassifier, HandcraftedFeatureExtractor)",1.0,0.999817,0.000478,0.339541,0.011544
"(LogisticRegression, AutonLab/MOMENT-1-large)",0.999817,1.0,0.00096,0.439913,0.019867
"(LogisticRegression, HandcraftedFeatureExtractor)",0.000478,0.00096,1.0,0.180575,0.916448
"(LogisticRegression, amazon/chronos-t5-large)",0.339541,0.439913,0.180575,1.0,0.658328
"(LogisticRegression, ibm-granite/granite-timeseries-patchtsmixer)",0.011544,0.019867,0.916448,0.658328,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
16,usilaughs,left,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.47037,0.056328,0.456667,0.041791,-0.086661,0.083587,0.321277,0.163389,0.311333,0.158477,0.333333,0.171076,0.456667,0.041791
18,usilaughs,left,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.574074,0.060249,0.5525,0.05802,0.117881,0.135204,0.422028,0.098378,0.540887,0.120513,0.358333,0.099685,0.5525,0.05802
15,usilaughs,left,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.648148,0.098387,0.640833,0.09397,0.290383,0.195244,0.596089,0.090262,0.629527,0.131358,0.575,0.079987,0.640833,0.09397
14,usilaughs,left,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.740741,0.032796,0.733333,0.02846,0.479257,0.066262,0.695449,0.030717,0.740366,0.077616,0.666667,0.069024,0.733333,0.02846
17,usilaughs,left,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.718519,0.029333,0.71,0.028292,0.429669,0.058617,0.665698,0.036453,0.709698,0.052691,0.633333,0.066,0.71,0.028292


Friedman Test: statistic=21.000, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0,"(DummyClassifier, HandcraftedFeatureExtractor)","(LogisticRegression, AutonLab/MOMENT-1-large)","(LogisticRegression, HandcraftedFeatureExtractor)","(LogisticRegression, amazon/chronos-t5-large)"
"(DummyClassifier, HandcraftedFeatureExtractor)",1.0,0.468608,0.019635,8.1e-05
"(LogisticRegression, AutonLab/MOMENT-1-large)",0.468608,1.0,0.468608,0.019635
"(LogisticRegression, HandcraftedFeatureExtractor)",0.019635,0.468608,1.0,0.468608
"(LogisticRegression, amazon/chronos-t5-large)",8.1e-05,0.019635,0.468608,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
21,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.5,0.065591,0.4975,0.064466,-0.004713,0.129485,0.456869,0.067827,0.444018,0.067048,0.475,0.084939,0.4975,0.064466
26,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.522222,0.063931,0.500833,0.053619,0.00206,0.107809,0.287647,0.235803,0.272747,0.22747,0.308333,0.252401,0.500833,0.053619
23,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.6,0.044305,0.579167,0.041085,0.177598,0.099592,0.462654,0.057802,0.588405,0.098253,0.391667,0.080833,0.579167,0.041085
20,usilaughs,right,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.740741,0.073333,0.754167,0.065638,0.517757,0.12078,0.753849,0.051885,0.667946,0.085374,0.875,0.026089,0.754167,0.065638
19,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.777778,0.091299,0.773333,0.093433,0.561735,0.181537,0.740824,0.104231,0.775075,0.10686,0.733333,0.162087,0.773333,0.093433
22,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.785185,0.062009,0.783333,0.069221,0.579,0.126937,0.751664,0.088778,0.765517,0.069522,0.766667,0.168268,0.783333,0.069221
29,usilaughs,right,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.744444,0.090856,0.754167,0.088702,0.509859,0.172999,0.747267,0.083511,0.674165,0.087644,0.841667,0.084134,0.754167,0.088702


Friedman Test: statistic=30.021, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0,"(DummyClassifier, HandcraftedFeatureExtractor)","(DummyClassifier, HandcraftedFeatureExtractor).1","(LogisticRegression, AutonLab/MOMENT-1-large)","(LogisticRegression, HandcraftedFeatureExtractor)","(LogisticRegression, amazon/chronos-t5-large)","(LogisticRegression, ibm-granite/granite-timeseries-patchtsmixer)"
"(DummyClassifier, HandcraftedFeatureExtractor)",1.0,1.0,0.793037,0.013027,0.001601,0.020756
"(DummyClassifier, HandcraftedFeatureExtractor)",1.0,1.0,0.793037,0.013027,0.001601,0.020756
"(LogisticRegression, AutonLab/MOMENT-1-large)",0.793037,0.793037,1.0,0.34218,0.104321,0.428918
"(LogisticRegression, HandcraftedFeatureExtractor)",0.013027,0.013027,0.34218,1.0,0.992869,0.999992
"(LogisticRegression, amazon/chronos-t5-large)",0.001601,0.001601,0.104321,0.992869,1.0,0.980292
"(LogisticRegression, ibm-granite/granite-timeseries-patchtsmixer)",0.020756,0.020756,0.428918,0.999992,0.980292,1.0
