In [4]:
import os
import pickle
from pathlib import Path
from typing import Generator

import numpy as np
import pandas as pd
import scikit_posthocs as sp
from IPython.display import HTML, display
from scipy import stats
from yaml import safe_load
from jmetal.lab.statistical_test.functions import (
    friedman_aligned_rank_test,
    friedman_aligned_ph_test,
)


pd.set_option("display.max_columns", 100)


def friedmann_nemenyi_test(
    unravelled_detailed_results: dict[str : dict[tuple[str, str], list[float]]],
) -> None:
    for metric in unravelled_detailed_results.keys():
        metric_df = pd.DataFrame(unravelled_detailed_results[metric])
        metric_vals = [metric_df[col].values for col in metric_df.columns]
        out = stats.friedmanchisquare(*metric_vals)
        print(
            f"Friedman Test {metric}: statistic={out.statistic:.3f}, pvalue={out.pvalue:.3f}"
        )
        metric_vals = np.array(metric_vals).T
        out = sp.posthoc_nemenyi_friedman(metric_vals)
        print("Nemenyi post-hoc test")
        feature_names = metric_df.columns
        out.index = feature_names
        out.columns = feature_names
        out = out.sort_index(axis=0).sort_index(axis=1)
        display(out)


def aligned_friedmann_holm_test(
    unravelled_detailed_results: dict[str : dict[tuple[str, str], list[float]]],
) -> None:
    for metric in unravelled_detailed_results.keys():
        metric_df = pd.DataFrame(unravelled_detailed_results[metric])
        # metric_vals = np.asarray([metric_df[col].values for col in metric_df.columns])
        metric_vals = metric_df.values

        out = friedman_aligned_rank_test(metric_vals)
        chi2_stat = out.loc['Aligned Rank stat'].iloc[0]
        p_value = out.loc['p-value'].iloc[0]
        print(f"Aligned-rank Friedman χ² {metric} = {chi2_stat:.3f}, p = {p_value:.3f}")

        z_vals, out, _ = friedman_aligned_ph_test(
            metric_vals, apv_procedure="Holm"  # Holm step-down correction
        )
        print("Holm post-hoc test")
        feature_names = metric_df.columns
        out.index = feature_names
        out.columns = feature_names
        out = out.sort_index(axis=0).sort_index(axis=1)
        display(out)


def present_results(
    paths: Generator,
    val_method: str = "lopo",
    remove_xgboost: bool = False,
    remove_chronos_small_from_test: bool = False,
    which_test: str = "friedmann-nemenyi",
    test_args: dict = {},
) -> None:
    results = []
    for reports_path in paths:
        report = pd.read_csv(reports_path, index_col=0)
        conf = safe_load(open(reports_path.parent / ".hydra/config.yaml"))
        if conf["validation_method"]["_target_"].split(".")[-1].lower() != val_method:
            continue
        model_name: str = conf["model"]["model"]["_target_"].split(".")[-1]
        features_name = (
            conf["feature_extractor"]["_target_"].split(".")[-1]
            if "model_name" not in conf["feature_extractor"]
            else conf["feature_extractor"]["model_name"]
        )
        validation_method = conf["validation_method"]["_target_"].split(".")[-1]
        if "aggregator" not in conf:
            aggregator = "MeanTimeAggregator"
        else:
            aggregator = (
                conf["aggregator"]["_target_"].split(".")[-1]
                if "_target_" in conf["aggregator"]
                else None
            )
        report_results = {}
        for col in report.columns:
            report_results[f"{col} avg"] = report[col].mean()
            report_results[f"{col} sem"] = report[col].sem() * 1.98  # 95% CI

        dataset = conf["dataset"]
        side = conf["side"]
        resampling = (
            conf["resampling"]["_target_"].split(".")[-1]
            if "resampling" in conf
            else "None"
        )
        resampling = resampling if resampling != "NoUnderSampler" else "None"
        # Collect results in a list of dicts
        if remove_xgboost and model_name == "XGBClassifier":
            continue
        results.append(
            {
                "Dataset": dataset,
                "Side": side,
                "Model": model_name,
                "Resampling": resampling,
                "Features": features_name,
                "Aggregator": aggregator,
                "Validation": validation_method,
                "Detailed Report": report,
                **report_results,
            }
        )

    # After the loop, display as a table
    df_results = pd.DataFrame(results)
    for (dataset, side, resampling), group in df_results.groupby(
        ["Dataset", "Side", "Resampling"]
    ):
        display(
            HTML(
                f"""
            <div style='background-color:#ffe6e6; padding:18px; margin:10px 0; border-radius:8px;'>
                <h2 style='color:#b30000; margin:0; font-size:2em;'>
                Results for Dataset: <i>{dataset}</i>, Side: <i>{side}</i>, resampling: <i>{resampling}</i>
                </h2>
            </div>
            """
            )
        )
        display(
            group.sort_values(by=["Model", "Features", "Aggregator"])
            .drop(columns=["Detailed Report"])
            .drop_duplicates()
        )
        if "aggregator" in test_args.keys() and group["Aggregator"].nunique() > 2:
            group = group[
                (group["Aggregator"] == test_args["aggregator"])
                | (group["Aggregator"].isnull())
            ]

        unravelled_detailed_results = {
            metric: {} for metric in group["Detailed Report"].iloc[0].columns
        }
        for i, model_results in group.iterrows():
            for metric in model_results["Detailed Report"].columns:
                cv_results = model_results["Detailed Report"][metric]
                unravelled_detailed_results[metric][
                    (model_results["Model"], model_results["Features"])
                ] = cv_results

        if which_test == "friedmann-nemenyi":
            friedmann_nemenyi_test(unravelled_detailed_results)
        elif which_test == "alignedfriedmann-holm":
            aligned_friedmann_holm_test(unravelled_detailed_results)
        else:
            raise ValueError(f"Unknown test: {which_test}")

    return results

In [5]:
results_path: str = "../outputs/"
all_results = list(Path(results_path).glob("*/*/*/reports.csv"))
results_path_adula: str = "../outputs_adula/"
all_results += list(Path(results_path_adula).glob("*/*/*/reports.csv"))


results_lopo = present_results(
    all_results,
    val_method="lopo",
    remove_xgboost=True,
    remove_chronos_small_from_test=True,
    which_test="friedmann-nemenyi",
    test_args={
        "aggregator": "MeanChanAggregator"}
)

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
6,apsync,engagement_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.421389,0.092253,0.518713,0.027086,0.034524,0.051993,0.318314,0.17451,0.272775,0.169406,0.457165,0.26818,0.518713,0.027086
8,apsync,engagement_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.484316,0.153246,0.5,0.0,0.0,0.0,0.065514,0.129718,0.042503,0.084156,0.142857,0.282857,0.5,0.0
5,apsync,engagement_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.627536,0.107493,0.641607,0.09203,0.282201,0.15794,0.535047,0.163484,0.623977,0.168456,0.580068,0.249043,0.641607,0.09203
4,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.482812,0.153343,0.498413,0.003143,-0.015532,0.030753,0.065514,0.129718,0.042503,0.084156,0.142857,0.282857,0.498413,0.003143
7,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.499654,0.086121,0.500339,0.050637,-0.003384,0.100121,0.297661,0.185441,0.311768,0.213402,0.328211,0.214678,0.500339,0.050637
3,apsync,engagement_10sec,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,LOPO,0.425518,0.088017,0.492814,0.032105,-0.009583,0.084659,0.288127,0.098984,0.466334,0.196568,0.360991,0.23421,0.492814,0.032105
9,apsync,engagement_10sec,LogisticRegression,,paris-noah/Mantis-8M,,LOPO,0.560433,0.079193,0.564281,0.072435,0.140943,0.140576,0.507337,0.157066,0.529815,0.212513,0.573615,0.174038,0.564281,0.072435


Friedman Test accuracy_score: statistic=10.146, pvalue=0.119
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.989917,0.220024,0.99625,0.676979,1.0,0.676979
LogisticRegression,AutonLab/MOMENT-1-large,0.989917,1.0,0.676979,1.0,0.977501,0.989917,0.977501
LogisticRegression,HandcraftedFeatureExtractor,0.220024,0.676979,1.0,0.594339,0.989917,0.220024,0.989917
LogisticRegression,amazon/chronos-t5-large,0.99625,1.0,0.594339,1.0,0.956377,0.99625,0.956377
LogisticRegression,amazon/chronos-t5-small,0.676979,0.977501,0.989917,0.956377,1.0,0.676979,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,1.0,0.989917,0.220024,0.99625,0.676979,1.0,0.676979
LogisticRegression,paris-noah/Mantis-8M,0.676979,0.977501,0.989917,0.956377,1.0,0.676979,1.0


Friedman Test balanced_accuracy_score: statistic=12.946, pvalue=0.044
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.998928,0.24929,0.99625,1.0,1.0,0.968186
LogisticRegression,AutonLab/MOMENT-1-large,0.998928,1.0,0.078926,1.0,0.99625,0.999501,0.78969
LogisticRegression,HandcraftedFeatureExtractor,0.24929,0.078926,1.0,0.056196,0.314698,0.220024,0.822613
LogisticRegression,amazon/chronos-t5-large,0.99625,1.0,0.056196,1.0,0.989917,0.997914,0.716494
LogisticRegression,amazon/chronos-t5-small,1.0,0.99625,0.314698,0.989917,1.0,0.999997,0.984635
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,1.0,0.999501,0.220024,0.997914,0.999997,1.0,0.956377
LogisticRegression,paris-noah/Mantis-8M,0.968186,0.78969,0.822613,0.716494,0.984635,0.956377,1.0


Friedman Test matthews_corrcoef: statistic=15.065, pvalue=0.020
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.99625,0.193127,0.989917,0.999981,0.999997,0.993682
LogisticRegression,AutonLab/MOMENT-1-large,0.99625,1.0,0.039225,1.0,0.999796,0.999501,0.852688
LogisticRegression,HandcraftedFeatureExtractor,0.193127,0.039225,1.0,0.026853,0.108614,0.126427,0.594339
LogisticRegression,amazon/chronos-t5-large,0.989917,1.0,0.026853,1.0,0.998928,0.997914,0.78969
LogisticRegression,amazon/chronos-t5-small,0.999981,0.999796,0.108614,0.998928,1.0,1.0,0.968186
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.999997,0.999501,0.126427,0.997914,1.0,1.0,0.977501
LogisticRegression,paris-noah/Mantis-8M,0.993682,0.852688,0.594339,0.78969,0.968186,0.977501,1.0


Friedman Test f1_score: statistic=14.567, pvalue=0.024
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.468444,0.993682,0.468444,0.989917,0.997914,0.997914
LogisticRegression,AutonLab/MOMENT-1-large,0.468444,1.0,0.126427,1.0,0.903577,0.822613,0.168595
LogisticRegression,HandcraftedFeatureExtractor,0.993682,0.126427,1.0,0.126427,0.78969,0.879714,1.0
LogisticRegression,amazon/chronos-t5-large,0.468444,1.0,0.126427,1.0,0.903577,0.822613,0.168595
LogisticRegression,amazon/chronos-t5-small,0.989917,0.903577,0.78969,0.903577,1.0,0.999997,0.852688
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.997914,0.822613,0.879714,0.822613,0.999997,1.0,0.924253
LogisticRegression,paris-noah/Mantis-8M,0.997914,0.168595,1.0,0.168595,0.852688,0.924253,1.0


Friedman Test precision_score: statistic=25.095, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.78969,0.24929,0.78969,0.999981,0.903577,0.852688
LogisticRegression,AutonLab/MOMENT-1-large,0.78969,1.0,0.003881,1.0,0.636104,0.126427,0.092826
LogisticRegression,HandcraftedFeatureExtractor,0.24929,0.003881,1.0,0.003881,0.388375,0.924253,0.956377
LogisticRegression,amazon/chronos-t5-large,0.78969,1.0,0.003881,1.0,0.636104,0.126427,0.092826
LogisticRegression,amazon/chronos-t5-small,0.999981,0.636104,0.388375,0.636104,1.0,0.968186,0.941805
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.903577,0.126427,0.924253,0.126427,0.968186,1.0,1.0
LogisticRegression,paris-noah/Mantis-8M,0.852688,0.092826,0.956377,0.092826,0.941805,1.0,1.0


Friedman Test recall_score: statistic=15.016, pvalue=0.020
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.468444,0.984635,0.468444,0.989917,0.99625,0.998928
LogisticRegression,AutonLab/MOMENT-1-large,0.468444,1.0,0.092826,1.0,0.903577,0.852688,0.193127
LogisticRegression,HandcraftedFeatureExtractor,0.984635,0.092826,1.0,0.092826,0.716494,0.78969,0.99993
LogisticRegression,amazon/chronos-t5-large,0.468444,1.0,0.092826,1.0,0.903577,0.852688,0.193127
LogisticRegression,amazon/chronos-t5-small,0.989917,0.903577,0.716494,0.903577,1.0,1.0,0.879714
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.99625,0.852688,0.78969,0.852688,1.0,1.0,0.924253
LogisticRegression,paris-noah/Mantis-8M,0.998928,0.193127,0.99993,0.193127,0.879714,0.924253,1.0


Friedman Test roc_auc_score: statistic=12.946, pvalue=0.044
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.998928,0.24929,0.99625,1.0,1.0,0.968186
LogisticRegression,AutonLab/MOMENT-1-large,0.998928,1.0,0.078926,1.0,0.99625,0.999501,0.78969
LogisticRegression,HandcraftedFeatureExtractor,0.24929,0.078926,1.0,0.056196,0.314698,0.220024,0.822613
LogisticRegression,amazon/chronos-t5-large,0.99625,1.0,0.056196,1.0,0.989917,0.997914,0.716494
LogisticRegression,amazon/chronos-t5-small,1.0,0.99625,0.314698,0.989917,1.0,0.999997,0.984635
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,1.0,0.999501,0.220024,0.997914,0.999997,1.0,0.956377
LogisticRegression,paris-noah/Mantis-8M,0.968186,0.78969,0.822613,0.716494,0.984635,0.956377,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
47,apsync,immersion_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.424948,0.142739,0.440485,0.146541,0.012393,0.032708,0.401787,0.182682,0.461874,0.27004,0.441729,0.151303,0.510965,0.025945
49,apsync,immersion_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.253624,0.149921,0.338205,0.178744,-0.015014,0.071629,0.198312,0.162606,0.357748,0.291745,0.315289,0.277528,0.473488,0.065684
46,apsync,immersion_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.261139,0.131941,0.331712,0.143075,-0.081566,0.096574,0.274058,0.176519,0.413604,0.258524,0.43388,0.31093,0.438179,0.062473
45,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.306481,0.141948,0.35797,0.164798,-0.017755,0.132807,0.290548,0.145073,0.475471,0.301033,0.334796,0.241437,0.464015,0.122785
48,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.30748,0.164373,0.318506,0.169606,-0.056672,0.080626,0.26962,0.186584,0.300543,0.2387,0.310951,0.167046,0.445908,0.068776


Friedman Test accuracy_score: statistic=8.224, pvalue=0.084
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.066759,0.388334,0.761079,0.493589
LogisticRegression,AutonLab/MOMENT-1-large,0.066759,1.0,0.916448,0.603784,0.848974
LogisticRegression,HandcraftedFeatureExtractor,0.388334,0.916448,1.0,0.976379,0.999817
LogisticRegression,amazon/chronos-t5-large,0.761079,0.603784,0.976379,1.0,0.993349
LogisticRegression,amazon/chronos-t5-small,0.493589,0.848974,0.999817,0.993349,1.0


Friedman Test balanced_accuracy_score: statistic=2.094, pvalue=0.719
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.848974,0.885479,0.976379,0.711104
LogisticRegression,AutonLab/MOMENT-1-large,0.848974,1.0,0.999988,0.993349,0.999091
LogisticRegression,HandcraftedFeatureExtractor,0.885479,0.999988,1.0,0.997194,0.997194
LogisticRegression,amazon/chronos-t5-large,0.976379,0.993349,0.997194,1.0,0.961612
LogisticRegression,amazon/chronos-t5-small,0.711104,0.999091,0.997194,0.961612,1.0


Friedman Test matthews_corrcoef: statistic=3.192, pvalue=0.526
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999091,0.658328,0.999091,0.961612
LogisticRegression,AutonLab/MOMENT-1-large,0.999091,1.0,0.807304,1.0,0.993349
LogisticRegression,HandcraftedFeatureExtractor,0.658328,0.807304,1.0,0.807304,0.961612
LogisticRegression,amazon/chronos-t5-large,0.999091,1.0,0.807304,1.0,0.993349
LogisticRegression,amazon/chronos-t5-small,0.961612,0.993349,0.961612,0.993349,1.0


Friedman Test f1_score: statistic=5.864, pvalue=0.210
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.252294,0.916448,0.493589,0.603784
LogisticRegression,AutonLab/MOMENT-1-large,0.252294,1.0,0.761079,0.993349,0.976379
LogisticRegression,HandcraftedFeatureExtractor,0.916448,0.761079,1.0,0.941773,0.976379
LogisticRegression,amazon/chronos-t5-large,0.493589,0.993349,0.941773,1.0,0.999817
LogisticRegression,amazon/chronos-t5-small,0.603784,0.976379,0.976379,0.999817,1.0


Friedman Test precision_score: statistic=2.772, pvalue=0.597
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.961612,0.916448,0.993349,0.603784
LogisticRegression,AutonLab/MOMENT-1-large,0.961612,1.0,0.999817,0.999091,0.941773
LogisticRegression,HandcraftedFeatureExtractor,0.916448,0.999817,1.0,0.993349,0.976379
LogisticRegression,amazon/chronos-t5-large,0.993349,0.999091,0.993349,1.0,0.848974
LogisticRegression,amazon/chronos-t5-small,0.603784,0.941773,0.976379,0.848974,1.0


Friedman Test recall_score: statistic=4.545, pvalue=0.337
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.493589,0.961612,0.658328,0.603784
LogisticRegression,AutonLab/MOMENT-1-large,0.493589,1.0,0.885479,0.999091,0.999817
LogisticRegression,HandcraftedFeatureExtractor,0.961612,0.885479,1.0,0.961612,0.941773
LogisticRegression,amazon/chronos-t5-large,0.658328,0.999091,0.961612,1.0,0.999988
LogisticRegression,amazon/chronos-t5-small,0.603784,0.999817,0.941773,0.999988,1.0


Friedman Test roc_auc_score: statistic=nan, pvalue=nan
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.987386,0.751424,0.987386,0.930677
LogisticRegression,AutonLab/MOMENT-1-large,0.987386,1.0,0.956544,1.0,0.99824
LogisticRegression,HandcraftedFeatureExtractor,0.751424,0.956544,1.0,0.956544,0.994615
LogisticRegression,amazon/chronos-t5-large,0.987386,1.0,0.956544,1.0,0.99824
LogisticRegression,amazon/chronos-t5-small,0.930677,0.99824,0.994615,0.99824,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
52,bihearts,left,DummyClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.383712,0.131498,0.475504,0.038458,-0.030541,0.053279,0.428377,0.220202,0.548873,0.292851,0.356154,0.182575,0.475504,0.038458
54,bihearts,left,LogisticRegression,GroupUnderSampler,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.67624,0.045756,0.624431,0.049235,0.201433,0.100414,0.769869,0.050766,0.876599,0.048957,0.695483,0.083177,0.624431,0.049235
51,bihearts,left,LogisticRegression,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.720179,0.051051,0.752718,0.074657,0.374824,0.124084,0.804301,0.033096,0.927746,0.050036,0.713747,0.046953,0.752718,0.074657
50,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.714566,0.034155,0.748198,0.048663,0.368776,0.08969,0.798172,0.025449,0.92476,0.04556,0.706176,0.044744,0.748198,0.048663
53,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.660845,0.014725,0.686496,0.039834,0.269149,0.046403,0.756432,0.022548,0.892093,0.069324,0.661942,0.029569,0.686496,0.039834
1,bihearts,left,LogisticRegression,GroupUnderSampler,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,LOPO,0.602818,0.026664,0.580985,0.024202,0.120241,0.040113,0.706916,0.046263,0.851138,0.061871,0.608197,0.053613,0.580985,0.024202
0,bihearts,left,LogisticRegression,GroupUnderSampler,paris-noah/Mantis-8M,,LOPO,0.770045,0.032608,0.77566,0.074388,0.424481,0.111175,0.842419,0.024265,0.933836,0.050791,0.771931,0.042321,0.77566,0.074388


Friedman Test accuracy_score: statistic=32.265, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.092826,0.001465,0.009592,0.280879,0.924253,5.3e-05
LogisticRegression,AutonLab/MOMENT-1-large,0.092826,1.0,0.879714,0.989917,0.998928,0.676979,0.427763
LogisticRegression,HandcraftedFeatureExtractor,0.001465,0.879714,1.0,0.998928,0.594339,0.066766,0.989917
LogisticRegression,amazon/chronos-t5-large,0.009592,0.989917,0.998928,1.0,0.879714,0.220024,0.879714
LogisticRegression,amazon/chronos-t5-small,0.280879,0.998928,0.594339,0.879714,1.0,0.924253,0.168595
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.924253,0.676979,0.066766,0.220024,0.924253,1.0,0.006155
LogisticRegression,paris-noah/Mantis-8M,5.3e-05,0.427763,0.989917,0.879714,0.168595,0.006155,1.0


Friedman Test balanced_accuracy_score: statistic=34.776, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.510049,0.000517,0.001465,0.126427,0.924253,0.000171
LogisticRegression,AutonLab/MOMENT-1-large,0.510049,1.0,0.220024,0.350599,0.989917,0.989917,0.126427
LogisticRegression,HandcraftedFeatureExtractor,0.000517,0.220024,1.0,0.999981,0.676979,0.032532,0.999981
LogisticRegression,amazon/chronos-t5-large,0.001465,0.350599,0.999981,1.0,0.822613,0.066766,0.998928
LogisticRegression,amazon/chronos-t5-small,0.126427,0.989917,0.676979,0.822613,1.0,0.754199,0.510049
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.924253,0.989917,0.032532,0.066766,0.754199,1.0,0.01468
LogisticRegression,paris-noah/Mantis-8M,0.000171,0.126427,0.999981,0.998928,0.510049,0.01468,1.0


Friedman Test matthews_corrcoef: statistic=35.939, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.510049,0.0003,0.002405,0.168595,0.924253,9.6e-05
LogisticRegression,AutonLab/MOMENT-1-large,0.510049,1.0,0.168595,0.427763,0.99625,0.989917,0.092826
LogisticRegression,HandcraftedFeatureExtractor,0.0003,0.168595,1.0,0.998928,0.510049,0.02206,0.999981
LogisticRegression,amazon/chronos-t5-large,0.002405,0.427763,0.998928,1.0,0.822613,0.092826,0.989917
LogisticRegression,amazon/chronos-t5-small,0.168595,0.99625,0.510049,0.822613,1.0,0.822613,0.350599
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.924253,0.989917,0.02206,0.092826,0.822613,1.0,0.009592
LogisticRegression,paris-noah/Mantis-8M,9.6e-05,0.092826,0.999981,0.989917,0.350599,0.009592,1.0


Friedman Test f1_score: statistic=31.592, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.066766,0.001465,0.006155,0.427763,0.879714,9.6e-05
LogisticRegression,AutonLab/MOMENT-1-large,0.066766,1.0,0.924253,0.989917,0.977501,0.676979,0.594339
LogisticRegression,HandcraftedFeatureExtractor,0.001465,0.924253,1.0,0.999796,0.427763,0.092826,0.99625
LogisticRegression,amazon/chronos-t5-large,0.006155,0.989917,0.999796,1.0,0.676979,0.220024,0.956377
LogisticRegression,amazon/chronos-t5-small,0.427763,0.977501,0.427763,0.676979,1.0,0.989917,0.126427
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.879714,0.676979,0.092826,0.220024,0.989917,1.0,0.01468
LogisticRegression,paris-noah/Mantis-8M,9.6e-05,0.594339,0.99625,0.956377,0.126427,0.01468,1.0


Friedman Test precision_score: statistic=33.612, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.594339,0.000517,0.001465,0.092826,0.879714,0.0003
LogisticRegression,AutonLab/MOMENT-1-large,0.594339,1.0,0.168595,0.280879,0.956377,0.998928,0.126427
LogisticRegression,HandcraftedFeatureExtractor,0.000517,0.168595,1.0,0.999981,0.754199,0.047065,1.0
LogisticRegression,amazon/chronos-t5-large,0.001465,0.280879,0.999981,1.0,0.879714,0.092826,0.999796
LogisticRegression,amazon/chronos-t5-small,0.092826,0.956377,0.754199,0.879714,1.0,0.754199,0.676979
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.879714,0.998928,0.047065,0.092826,0.754199,1.0,0.032532
LogisticRegression,paris-noah/Mantis-8M,0.0003,0.126427,1.0,0.999796,0.676979,0.032532,1.0


Friedman Test recall_score: statistic=26.041, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.02206,0.032532,0.011893,0.427763,0.852688,0.0003
LogisticRegression,AutonLab/MOMENT-1-large,0.02206,1.0,1.0,0.999997,0.879714,0.468444,0.924253
LogisticRegression,HandcraftedFeatureExtractor,0.032532,1.0,1.0,0.99993,0.924253,0.552164,0.879714
LogisticRegression,amazon/chronos-t5-large,0.011893,0.999997,0.99993,1.0,0.78969,0.350599,0.968186
LogisticRegression,amazon/chronos-t5-small,0.427763,0.879714,0.924253,0.78969,1.0,0.993682,0.220024
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.852688,0.468444,0.552164,0.350599,0.993682,1.0,0.039225
LogisticRegression,paris-noah/Mantis-8M,0.0003,0.924253,0.879714,0.968186,0.220024,0.039225,1.0


Friedman Test roc_auc_score: statistic=34.776, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.510049,0.000517,0.001465,0.126427,0.924253,0.000171
LogisticRegression,AutonLab/MOMENT-1-large,0.510049,1.0,0.220024,0.350599,0.989917,0.989917,0.126427
LogisticRegression,HandcraftedFeatureExtractor,0.000517,0.220024,1.0,0.999981,0.676979,0.032532,0.999981
LogisticRegression,amazon/chronos-t5-large,0.001465,0.350599,0.999981,1.0,0.822613,0.066766,0.998928
LogisticRegression,amazon/chronos-t5-small,0.126427,0.989917,0.676979,0.822613,1.0,0.754199,0.510049
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.924253,0.989917,0.032532,0.066766,0.754199,1.0,0.01468
LogisticRegression,paris-noah/Mantis-8M,0.000171,0.126427,0.999981,0.998928,0.510049,0.01468,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
13,seed,engagement_10sec_35thresh,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.566454,0.069834,0.563785,0.064325,0.014386,0.013761,0.607081,0.101947,0.662085,0.133996,0.638808,0.116072,0.512763,0.01087
15,seed,engagement_10sec_35thresh,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.65779,0.132067,0.630016,0.126509,-0.008993,0.008466,0.733725,0.123666,0.655589,0.134423,0.895782,0.117773,0.495507,0.003905
12,seed,engagement_10sec_35thresh,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.644175,0.129965,0.61771,0.123183,-0.007219,0.017336,0.730887,0.120988,0.656422,0.134193,0.897988,0.118505,0.498472,0.004105
11,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.651427,0.131188,0.625663,0.12549,-0.003951,0.01794,0.731138,0.122383,0.655591,0.134032,0.893233,0.118862,0.496869,0.01015
14,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.660957,0.127395,0.631197,0.12239,0.003315,0.006441,0.734903,0.122734,0.656956,0.134046,0.897834,0.118219,0.500452,0.002127
16,seed,engagement_10sec_35thresh,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,LOPO,0.655841,0.133618,0.629387,0.127295,0.0,0.0,0.738007,0.122733,0.656888,0.134081,0.911996,0.118828,0.5,0.0
10,seed,engagement_10sec_35thresh,LogisticRegression,,paris-noah/Mantis-8M,,LOPO,0.598837,0.11542,0.583989,0.11178,-0.009641,0.039338,0.676068,0.115847,0.661471,0.13359,0.763153,0.119179,0.499248,0.026052


Friedman Test accuracy_score: statistic=10.157, pvalue=0.118
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.74408,0.999123,0.74408,0.634771,0.853723,0.999985
LogisticRegression,AutonLab/MOMENT-1-large,0.74408,1.0,0.948715,1.0,0.999998,0.999994,0.588636
LogisticRegression,HandcraftedFeatureExtractor,0.999123,0.948715,1.0,0.948715,0.896552,0.983103,0.991644
LogisticRegression,amazon/chronos-t5-large,0.74408,1.0,0.948715,1.0,0.999998,0.999994,0.588636
LogisticRegression,amazon/chronos-t5-small,0.634771,0.999998,0.896552,0.999998,1.0,0.999782,0.472821
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.853723,0.999994,0.983103,0.999994,0.999782,1.0,0.723217
LogisticRegression,paris-noah/Mantis-8M,0.999985,0.588636,0.991644,0.588636,0.472821,0.723217,1.0


Friedman Test balanced_accuracy_score: statistic=5.667, pvalue=0.462
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.999933,0.999875,0.999998,0.999998,0.783813
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.999998,0.999123,0.999933,0.999933,0.853723
LogisticRegression,HandcraftedFeatureExtractor,0.999933,0.999998,1.0,0.995142,0.999123,0.999123,0.920265
LogisticRegression,amazon/chronos-t5-large,0.999875,0.999123,0.995142,1.0,0.999998,0.999998,0.565371
LogisticRegression,amazon/chronos-t5-small,0.999998,0.999933,0.999123,0.999998,1.0,1.0,0.679842
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.999998,0.999933,0.999123,0.999998,1.0,1.0,0.679842
LogisticRegression,paris-noah/Mantis-8M,0.783813,0.853723,0.920265,0.565371,0.679842,0.679842,1.0


Friedman Test matthews_corrcoef: statistic=5.914, pvalue=0.433
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.853723,0.989291,0.983103,0.999638,0.993576,0.853723
LogisticRegression,AutonLab/MOMENT-1-large,0.853723,1.0,0.99813,0.999123,0.974563,0.996392,1.0
LogisticRegression,HandcraftedFeatureExtractor,0.989291,0.99813,1.0,1.0,0.999875,1.0,0.99813
LogisticRegression,amazon/chronos-t5-large,0.983103,0.999123,1.0,1.0,0.999638,0.999999,0.999123
LogisticRegression,amazon/chronos-t5-small,0.999638,0.974563,0.999875,0.999638,1.0,0.999966,0.974563
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.993576,0.996392,1.0,0.999999,0.999966,1.0,0.996392
LogisticRegression,paris-noah/Mantis-8M,0.853723,1.0,0.99813,0.999123,0.974563,0.996392,1.0


Friedman Test f1_score: statistic=50.311, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.005522,0.011476,0.001108,0.005522,0.000343,0.979155
LogisticRegression,AutonLab/MOMENT-1-large,0.005522,1.0,0.999994,0.999638,1.0,0.993576,0.083098
LogisticRegression,HandcraftedFeatureExtractor,0.011476,0.999994,1.0,0.996392,0.999994,0.974563,0.138627
LogisticRegression,amazon/chronos-t5-large,0.001108,0.999638,0.996392,1.0,0.999638,0.999966,0.025298
LogisticRegression,amazon/chronos-t5-small,0.005522,1.0,0.999994,0.999638,1.0,0.993576,0.083098
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000343,0.993576,0.974563,0.999966,0.993576,1.0,0.010193
LogisticRegression,paris-noah/Mantis-8M,0.979155,0.083098,0.138627,0.025298,0.083098,0.010193,1.0


Friedman Test precision_score: statistic=6.986, pvalue=0.322
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.74408,0.989291,0.983103,0.99813,0.989291,0.82049
LogisticRegression,AutonLab/MOMENT-1-large,0.74408,1.0,0.989291,0.993576,0.963233,0.989291,0.999999
LogisticRegression,HandcraftedFeatureExtractor,0.989291,0.989291,1.0,1.0,0.999994,1.0,0.996392
LogisticRegression,amazon/chronos-t5-large,0.983103,0.993576,1.0,1.0,0.999966,1.0,0.99813
LogisticRegression,amazon/chronos-t5-small,0.99813,0.963233,0.999994,0.999966,1.0,0.999994,0.983103
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.989291,0.989291,1.0,1.0,0.999994,1.0,0.996392
LogisticRegression,paris-noah/Mantis-8M,0.82049,0.999999,0.996392,0.99813,0.983103,0.996392,1.0


Friedman Test recall_score: statistic=61.420, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.004865,0.010193,0.002213,0.011476,0.000216,1.0
LogisticRegression,AutonLab/MOMENT-1-large,0.004865,1.0,0.999994,0.999994,0.999985,0.989291,0.007085
LogisticRegression,HandcraftedFeatureExtractor,0.010193,0.999994,1.0,0.999638,1.0,0.963233,0.014485
LogisticRegression,amazon/chronos-t5-large,0.002213,0.999994,0.999638,1.0,0.999426,0.99813,0.003301
LogisticRegression,amazon/chronos-t5-small,0.011476,0.999985,1.0,0.999426,1.0,0.956395,0.016239
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000216,0.989291,0.963233,0.99813,0.956395,1.0,0.000343
LogisticRegression,paris-noah/Mantis-8M,1.0,0.007085,0.014485,0.003301,0.016239,0.000343,1.0


Friedman Test roc_auc_score: statistic=nan, pvalue=nan
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.416304,0.95446,0.931442,0.991025,0.95446,0.53731
LogisticRegression,AutonLab/MOMENT-1-large,0.416304,1.0,0.95446,0.971463,0.865335,0.95446,0.999997
LogisticRegression,HandcraftedFeatureExtractor,0.95446,0.95446,1.0,1.0,0.999967,1.0,0.983312
LogisticRegression,amazon/chronos-t5-large,0.931442,0.971463,1.0,1.0,0.99982,1.0,0.991025
LogisticRegression,amazon/chronos-t5-small,0.991025,0.865335,0.999967,0.99982,1.0,0.999967,0.931442
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.95446,0.95446,1.0,1.0,0.999967,1.0,0.983312
LogisticRegression,paris-noah/Mantis-8M,0.53731,0.999997,0.983312,0.991025,0.931442,0.983312,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
20,usilaughs,left,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.488889,0.027326,0.471389,0.023477,-0.057482,0.047797,0.307668,0.06837,0.312222,0.068741,0.313889,0.074501,0.471389,0.023477
22,usilaughs,left,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.544444,0.03886,0.529722,0.037839,0.06787,0.079153,0.426971,0.052136,0.48812,0.063227,0.397222,0.055763,0.529722,0.037839
19,usilaughs,left,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.639506,0.072138,0.633889,0.069685,0.29942,0.151118,0.581475,0.082914,0.672775,0.109169,0.583333,0.096243,0.633889,0.069685
18,usilaughs,left,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.711111,0.063328,0.7025,0.06392,0.433858,0.134917,0.63382,0.093704,0.716975,0.11233,0.625,0.105474,0.7025,0.06392
21,usilaughs,left,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.695062,0.066031,0.686389,0.066281,0.400102,0.137941,0.612699,0.097435,0.683939,0.110048,0.608333,0.108515,0.686389,0.066281


Friedman Test accuracy_score: statistic=46.851, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.286002,0.016439,2.512613e-08,1e-05
LogisticRegression,AutonLab/MOMENT-1-large,0.2860024,1.0,0.783543,0.0006037305,0.02723
LogisticRegression,HandcraftedFeatureExtractor,0.01643881,0.783543,1.0,0.03464118,0.375652
LogisticRegression,amazon/chronos-t5-large,2.512613e-08,0.000604,0.034641,1.0,0.826275
LogisticRegression,amazon/chronos-t5-small,9.53622e-06,0.02723,0.375652,0.8262746,1.0


Friedman Test balanced_accuracy_score: statistic=46.099, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.265686,0.012628,1.955751e-08,8e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.2656858,1.0,0.760677,0.0006037305,0.02723
LogisticRegression,HandcraftedFeatureExtractor,0.01262753,0.760677,1.0,0.03895772,0.399924
LogisticRegression,amazon/chronos-t5-large,1.955751e-08,0.000604,0.038958,1.0,0.826275
LogisticRegression,amazon/chronos-t5-small,7.746619e-06,0.02723,0.399924,0.8262746,1.0


Friedman Test matthews_corrcoef: statistic=45.064, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.193592,0.00632,3.222757e-08,4e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.1935917,1.0,0.736935,0.001620493,0.030743
LogisticRegression,HandcraftedFeatureExtractor,0.006319986,0.736935,1.0,0.08382349,0.450246
LogisticRegression,amazon/chronos-t5-large,3.222757e-08,0.00162,0.083823,1.0,0.912338
LogisticRegression,amazon/chronos-t5-small,4.111551e-06,0.030743,0.450246,0.9123379,1.0


Friedman Test f1_score: statistic=41.696, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.897689,0.011036,8.85139e-07,0.000253
LogisticRegression,AutonLab/MOMENT-1-large,0.8976892,1.0,0.136288,8.404476e-05,0.008383
LogisticRegression,HandcraftedFeatureExtractor,0.0110361,0.136288,1.0,0.2102181,0.864477
LogisticRegression,amazon/chronos-t5-large,8.85139e-07,8.4e-05,0.210218,1.0,0.783543
LogisticRegression,amazon/chronos-t5-small,0.0002527819,0.008383,0.864477,0.7835433,1.0


Friedman Test precision_score: statistic=49.703, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.04898,7e-05,9.133075e-09,5e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.04898045,1.0,0.399924,0.006319986,0.149262
LogisticRegression,HandcraftedFeatureExtractor,6.95391e-05,0.399924,1.0,0.4761338,0.984218
LogisticRegression,amazon/chronos-t5-large,9.133075e-09,0.00632,0.476134,1.0,0.805438
LogisticRegression,amazon/chronos-t5-small,5.086584e-06,0.149262,0.984218,0.8054383,1.0


Friedman Test recall_score: statistic=37.333, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.760677,0.004081,1e-05,0.000428
LogisticRegression,AutonLab/MOMENT-1-large,0.760677,1.0,0.136288,0.001899,0.030743
LogisticRegression,HandcraftedFeatureExtractor,0.004081,0.136288,1.0,0.63538,0.979204
LogisticRegression,amazon/chronos-t5-large,1e-05,0.001899,0.63538,1.0,0.925665
LogisticRegression,amazon/chronos-t5-small,0.000428,0.030743,0.979204,0.925665,1.0


Friedman Test roc_auc_score: statistic=46.099, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.265686,0.012628,1.955751e-08,8e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.2656858,1.0,0.760677,0.0006037305,0.02723
LogisticRegression,HandcraftedFeatureExtractor,0.01262753,0.760677,1.0,0.03895772,0.399924
LogisticRegression,amazon/chronos-t5-large,1.955751e-08,0.000604,0.038958,1.0,0.826275
LogisticRegression,amazon/chronos-t5-small,7.746619e-06,0.02723,0.399924,0.8262746,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
41,usilaughs,right,DummyClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.537037,0.031791,0.528889,0.030555,0.057068,0.061686,0.416111,0.081522,0.3892,0.0778,0.455556,0.0908,0.528889,0.030555
40,usilaughs,right,LogisticRegression,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.730864,0.103107,0.743611,0.096125,0.479467,0.205783,0.777788,0.07931,0.751362,0.106554,0.858333,0.05658,0.743611,0.096125
39,usilaughs,right,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.737037,0.072515,0.7375,0.070212,0.494875,0.144505,0.707187,0.084681,0.744827,0.097383,0.741667,0.097647,0.7375,0.070212


Friedman Test accuracy_score: statistic=13.611, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.004454,0.006822
LogisticRegression,HandcraftedFeatureExtractor,0.004454,1.0,0.990854
LogisticRegression,amazon/chronos-t5-large,0.006822,0.990854,1.0


Friedman Test balanced_accuracy_score: statistic=13.310, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.003576,0.008391
LogisticRegression,HandcraftedFeatureExtractor,0.003576,1.0,0.963917
LogisticRegression,amazon/chronos-t5-large,0.008391,0.963917,1.0


Friedman Test matthews_corrcoef: statistic=14.966, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.002276,0.003576
LogisticRegression,HandcraftedFeatureExtractor,0.002276,1.0,0.990854
LogisticRegression,amazon/chronos-t5-large,0.003576,0.990854,1.0


Friedman Test f1_score: statistic=25.737, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,4e-06,0.002859
LogisticRegression,HandcraftedFeatureExtractor,4e-06,1.0,0.268023
LogisticRegression,amazon/chronos-t5-large,0.002859,0.268023,1.0


Friedman Test precision_score: statistic=23.138, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.000243,0.000185
LogisticRegression,HandcraftedFeatureExtractor,0.000243,1.0,0.997705
LogisticRegression,amazon/chronos-t5-large,0.000185,0.997705,1.0


Friedman Test recall_score: statistic=32.849, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,2.525956e-07,0.004454
LogisticRegression,HandcraftedFeatureExtractor,2.525956e-07,1.0,0.072064
LogisticRegression,amazon/chronos-t5-large,0.004453623,0.07206404,1.0


Friedman Test roc_auc_score: statistic=13.310, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.003576,0.008391
LogisticRegression,HandcraftedFeatureExtractor,0.003576,1.0,0.963917
LogisticRegression,amazon/chronos-t5-large,0.008391,0.963917,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
34,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.516049,0.033713,0.505278,0.034151,0.010152,0.068488,0.40094,0.064931,0.402968,0.060762,0.408333,0.073087,0.505278,0.034151
28,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,CatAggregator,LOPO,0.553086,0.04597,0.539722,0.046472,0.080817,0.097864,0.434615,0.069687,0.48263,0.072581,0.419444,0.080478,0.539722,0.046472
36,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.571605,0.043871,0.548333,0.043829,0.121783,0.105163,0.397259,0.068851,0.572211,0.107839,0.338889,0.069839,0.548333,0.043829
29,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanTimeAggregator,LOPO,0.553086,0.04597,0.539722,0.046472,0.080817,0.097864,0.434615,0.069687,0.48263,0.072581,0.419444,0.080478,0.539722,0.046472
33,usilaughs,right,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.724691,0.099802,0.73,0.094771,0.465681,0.197678,0.743902,0.085226,0.752967,0.108212,0.777778,0.076837,0.73,0.094771
24,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,CatAggregator,LOPO,0.735802,0.069235,0.729444,0.068598,0.482407,0.145309,0.675669,0.094845,0.736628,0.112852,0.672222,0.101907,0.729444,0.068598
32,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.730864,0.071708,0.723611,0.071174,0.478667,0.146334,0.664974,0.094681,0.771658,0.101693,0.658333,0.10683,0.723611,0.071174
26,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanTimeAggregator,LOPO,0.759259,0.07126,0.756944,0.070498,0.534357,0.145929,0.716721,0.092405,0.774558,0.099563,0.736111,0.10226,0.756944,0.070498
25,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,CatAggregator,LOPO,0.745679,0.084439,0.740278,0.084238,0.497599,0.176246,0.689754,0.106699,0.76913,0.112825,0.691667,0.114957,0.740278,0.084238
35,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.730864,0.079087,0.722778,0.079596,0.463984,0.165255,0.655712,0.109457,0.726762,0.118299,0.65,0.114753,0.722778,0.079596


Friedman Test accuracy_score: statistic=29.690, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.815602,0.002817,0.011724,0.001555,0.001555,0.023477
LogisticRegression,AutonLab/MOMENT-1-large,0.815602,1.0,0.189888,0.394299,0.135296,0.135296,0.532511
LogisticRegression,HandcraftedFeatureExtractor,0.002817,0.189888,1.0,0.999734,0.999999,0.999999,0.996902
LogisticRegression,amazon/chronos-t5-large,0.011724,0.394299,0.999734,1.0,0.998282,0.998282,0.999993
LogisticRegression,amazon/chronos-t5-small,0.001555,0.135296,0.999999,0.998282,1.0,1.0,0.989572
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001555,0.135296,0.999999,0.998282,1.0,1.0,0.989572
LogisticRegression,paris-noah/Mantis-8M,0.023477,0.532511,0.996902,0.999993,0.989572,0.989572,1.0


Friedman Test balanced_accuracy_score: statistic=33.144, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.845331,0.000441,0.008568,0.001555,0.001076,0.009522
LogisticRegression,AutonLab/MOMENT-1-large,0.845331,1.0,0.052952,0.305617,0.117116,0.093463,0.322449
LogisticRegression,HandcraftedFeatureExtractor,0.000441,0.052952,1.0,0.989572,0.999943,0.999993,0.987171
LogisticRegression,amazon/chronos-t5-large,0.008568,0.305617,0.989572,1.0,0.999391,0.998282,1.0
LogisticRegression,amazon/chronos-t5-small,0.001555,0.117116,0.999943,0.999391,1.0,1.0,0.999119
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001076,0.093463,0.999993,0.998282,1.0,1.0,0.997672
LogisticRegression,paris-noah/Mantis-8M,0.009522,0.322449,0.987171,1.0,0.999119,0.997672,1.0


Friedman Test matthews_corrcoef: statistic=33.250, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.815602,0.000738,0.007702,0.001218,0.000441,0.010572
LogisticRegression,AutonLab/MOMENT-1-large,0.815602,1.0,0.086483,0.322449,0.117116,0.062652,0.37571
LogisticRegression,HandcraftedFeatureExtractor,0.000738,0.086483,1.0,0.996902,1.0,1.0,0.993331
LogisticRegression,amazon/chronos-t5-large,0.007702,0.322449,0.996902,1.0,0.999119,0.991613,1.0
LogisticRegression,amazon/chronos-t5-small,0.001218,0.117116,1.0,0.999119,1.0,0.999985,0.997672
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000441,0.062652,1.0,0.991613,0.999985,1.0,0.984373
LogisticRegression,paris-noah/Mantis-8M,0.010572,0.37571,0.993331,1.0,0.997672,0.984373,1.0


Friedman Test f1_score: statistic=46.832, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999833,8e-06,0.012989,0.005559,0.000441,0.011724
LogisticRegression,AutonLab/MOMENT-1-large,0.999833,1.0,4.8e-05,0.040789,0.019357,0.001978,0.037306
LogisticRegression,HandcraftedFeatureExtractor,8e-06,4.8e-05,1.0,0.613898,0.766202,0.98114,0.634009
LogisticRegression,amazon/chronos-t5-large,0.012989,0.040789,0.613898,1.0,0.999985,0.977433,1.0
LogisticRegression,amazon/chronos-t5-small,0.005559,0.019357,0.766202,0.999985,1.0,0.995942,0.999993
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000441,0.001978,0.98114,0.977433,0.995942,1.0,0.98114
LogisticRegression,paris-noah/Mantis-8M,0.011724,0.037306,0.634009,1.0,0.999993,0.98114,1.0


Friedman Test precision_score: statistic=40.074, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.093463,0.00015,7.4e-05,0.000259,6.4e-05,0.001076
LogisticRegression,AutonLab/MOMENT-1-large,0.093463,1.0,0.573285,0.471953,0.653919,0.452122,0.845331
LogisticRegression,HandcraftedFeatureExtractor,0.00015,0.573285,1.0,0.999999,1.0,0.999997,0.999391
LogisticRegression,amazon/chronos-t5-large,7.4e-05,0.471953,0.999999,1.0,0.999969,1.0,0.996902
LogisticRegression,amazon/chronos-t5-small,0.000259,0.653919,1.0,0.999969,1.0,0.999943,0.9999
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,6.4e-05,0.452122,0.999997,1.0,0.999943,1.0,0.995942
LogisticRegression,paris-noah/Mantis-8M,0.001076,0.845331,0.999391,0.996902,0.9999,0.995942,1.0


Friedman Test recall_score: statistic=56.845, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.9571545,3.096421e-05,0.145149,0.177888,0.001555,0.052952
LogisticRegression,AutonLab/MOMENT-1-large,0.957155,1.0,1.414725e-07,0.007702,0.010572,1.7e-05,0.001755
LogisticRegression,HandcraftedFeatureExtractor,3.1e-05,1.414725e-07,1.0,0.258168,0.215544,0.977433,0.491993
LogisticRegression,amazon/chronos-t5-large,0.145149,0.007701812,0.2581681,1.0,1.0,0.799745,0.999734
LogisticRegression,amazon/chronos-t5-small,0.177888,0.01057172,0.2155438,1.0,1.0,0.748591,0.999119
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001555,1.697175e-05,0.9774333,0.799745,0.748591,1.0,0.950556
LogisticRegression,paris-noah/Mantis-8M,0.052952,0.001754997,0.4919929,0.999734,0.999119,0.950556,1.0


Friedman Test roc_auc_score: statistic=31.970, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.859153,0.000572,0.010572,0.001978,0.001377,0.01589
LogisticRegression,AutonLab/MOMENT-1-large,0.859153,1.0,0.057631,0.322449,0.125956,0.100885,0.394299
LogisticRegression,HandcraftedFeatureExtractor,0.000572,0.057631,1.0,0.989572,0.999943,0.999993,0.977433
LogisticRegression,amazon/chronos-t5-large,0.010572,0.322449,0.989572,1.0,0.999391,0.998282,1.0
LogisticRegression,amazon/chronos-t5-small,0.001978,0.125956,0.999943,0.999391,1.0,1.0,0.997672
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001377,0.100885,0.999993,0.998282,1.0,1.0,0.994763
LogisticRegression,paris-noah/Mantis-8M,0.01589,0.394299,0.977433,1.0,0.997672,0.994763,1.0


In [3]:
results_lopo = present_results(
    all_results,
    val_method="tacv",
    remove_xgboost=True,
    remove_chronos_small_from_test=True,
)

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
4,apsync,engagement_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.478237,0.051894,0.500512,0.031328,0.001778,0.061588,0.38797,0.202726,0.357265,0.200925,0.443405,0.221716,0.500512,0.031328
6,apsync,engagement_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.534116,0.090357,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
3,apsync,engagement_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.6405,0.131275,0.615714,0.136109,0.241794,0.286743,0.462302,0.302731,0.625714,0.362842,0.466578,0.334841,0.615714,0.136109
2,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.534116,0.090357,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
5,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.508916,0.058305,0.523843,0.049311,0.041462,0.110617,0.297648,0.210281,0.36411,0.201832,0.312121,0.266114,0.523843,0.049311
1,apsync,engagement_10sec,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.504563,0.053762,0.519786,0.050948,0.021321,0.133771,0.361768,0.19162,0.405507,0.242627,0.368939,0.21053,0.519786,0.050948


Friedman Test accuracy_score: statistic=2.898, pvalue=0.716
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.97398,0.59468,0.97398,0.984591,0.97398
LogisticRegression,AutonLab/MOMENT-1-large,0.97398,1.0,0.958997,1.0,0.999999,1.0
LogisticRegression,HandcraftedFeatureExtractor,0.59468,0.958997,1.0,0.958997,0.938967,0.958997
LogisticRegression,amazon/chronos-t5-large,0.97398,1.0,0.958997,1.0,0.999999,1.0
LogisticRegression,amazon/chronos-t5-small,0.984591,0.999999,0.938967,0.999999,1.0,0.999999
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.97398,1.0,0.958997,1.0,0.999999,1.0


Friedman Test balanced_accuracy_score: statistic=2.500, pvalue=0.776
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.845079,0.999999,0.958997,0.99942
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.882097,1.0,0.97398,0.999858
LogisticRegression,HandcraftedFeatureExtractor,0.845079,0.882097,1.0,0.882097,0.99942,0.958997
LogisticRegression,amazon/chronos-t5-large,0.999999,1.0,0.882097,1.0,0.97398,0.999858
LogisticRegression,amazon/chronos-t5-small,0.958997,0.97398,0.99942,0.97398,1.0,0.995925
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.99942,0.999858,0.958997,0.999858,0.995925,1.0


Friedman Test matthews_corrcoef: statistic=1.402, pvalue=0.924
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.984591,0.999999,0.984591,0.99942
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.97398,1.0,0.97398,0.99829
LogisticRegression,HandcraftedFeatureExtractor,0.984591,0.97398,1.0,0.97398,1.0,0.99942
LogisticRegression,amazon/chronos-t5-large,0.999999,1.0,0.97398,1.0,0.97398,0.99829
LogisticRegression,amazon/chronos-t5-small,0.984591,0.97398,1.0,0.97398,1.0,0.99942
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.99942,0.99829,0.99942,0.99829,0.99942,1.0


Friedman Test f1_score: statistic=12.548, pvalue=0.028
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.375252,0.999981,0.375252,0.999858,0.999999
LogisticRegression,AutonLab/MOMENT-1-large,0.375252,1.0,0.280437,1.0,0.238812,0.427525
LogisticRegression,HandcraftedFeatureExtractor,0.999981,0.280437,1.0,0.280437,0.999999,0.999858
LogisticRegression,amazon/chronos-t5-large,0.375252,1.0,0.280437,1.0,0.238812,0.427525
LogisticRegression,amazon/chronos-t5-small,0.999858,0.238812,0.999999,0.238812,1.0,0.99942
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.999999,0.427525,0.999858,0.427525,0.99942,1.0


Friedman Test precision_score: statistic=13.000, pvalue=0.023
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.59468,0.984591,0.59468,0.991626,0.99829
LogisticRegression,AutonLab/MOMENT-1-large,0.59468,1.0,0.201363,1.0,0.238812,0.32604
LogisticRegression,HandcraftedFeatureExtractor,0.984591,0.201363,1.0,0.201363,0.999999,0.999858
LogisticRegression,amazon/chronos-t5-large,0.59468,1.0,0.201363,1.0,0.238812,0.32604
LogisticRegression,amazon/chronos-t5-small,0.991626,0.238812,0.999999,0.238812,1.0,0.999981
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.99829,0.32604,0.999858,0.32604,0.999981,1.0


Friedman Test recall_score: statistic=12.792, pvalue=0.025
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.280437,0.999981,0.280437,0.99942,0.999981
LogisticRegression,AutonLab/MOMENT-1-large,0.280437,1.0,0.201363,1.0,0.482135,0.375252
LogisticRegression,HandcraftedFeatureExtractor,0.999981,0.201363,1.0,0.201363,0.995925,0.99942
LogisticRegression,amazon/chronos-t5-large,0.280437,1.0,0.201363,1.0,0.482135,0.375252
LogisticRegression,amazon/chronos-t5-small,0.99942,0.482135,0.995925,0.482135,1.0,0.999981
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.999981,0.375252,0.99942,0.375252,0.999981,1.0


Friedman Test roc_auc_score: statistic=2.500, pvalue=0.776
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.845079,0.999999,0.958997,0.99942
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.882097,1.0,0.97398,0.999858
LogisticRegression,HandcraftedFeatureExtractor,0.845079,0.882097,1.0,0.882097,0.99942,0.958997
LogisticRegression,amazon/chronos-t5-large,0.999999,1.0,0.882097,1.0,0.97398,0.999858
LogisticRegression,amazon/chronos-t5-small,0.958997,0.97398,0.99942,0.97398,1.0,0.995925
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.99942,0.999858,0.958997,0.999858,0.995925,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
41,apsync,immersion_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.545503,0.08005,0.54293,0.059188,0.071216,0.103792,0.583344,0.163628,0.57177,0.209098,0.705254,0.180745,0.54293,0.059188
43,apsync,immersion_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.488051,0.208738,0.526773,0.036324,0.085154,0.108571,0.533131,0.271428,0.559069,0.208522,0.749482,0.350237,0.526773,0.036324
40,apsync,immersion_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.311145,0.142635,0.328524,0.131626,-0.355593,0.26508,0.304784,0.219214,0.323593,0.197606,0.375569,0.262681,0.328524,0.131626
39,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.500288,0.199609,0.51477,0.050704,0.0323,0.159239,0.608019,0.213006,0.547907,0.208574,0.854995,0.235896,0.51477,0.050704
42,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.452856,0.174762,0.511186,0.051142,0.028248,0.10461,0.533535,0.178035,0.542154,0.203816,0.67971,0.234199,0.511186,0.051142


Friedman Test accuracy_score: statistic=5.660, pvalue=0.226
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999644,0.433878,0.99824,0.99824
LogisticRegression,AutonLab/MOMENT-1-large,0.999644,1.0,0.317223,0.999977,0.987386
LogisticRegression,HandcraftedFeatureExtractor,0.433878,0.317223,1.0,0.265889,0.627659
LogisticRegression,amazon/chronos-t5-large,0.99824,0.999977,0.265889,1.0,0.975123
LogisticRegression,amazon/chronos-t5-small,0.99824,0.987386,0.627659,0.975123,1.0


Friedman Test balanced_accuracy_score: statistic=12.000, pvalue=0.017
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.930677,0.011994,0.751424,0.930677
LogisticRegression,AutonLab/MOMENT-1-large,0.930677,1.0,0.115233,0.994615,1.0
LogisticRegression,HandcraftedFeatureExtractor,0.011994,0.115233,1.0,0.265889,0.115233
LogisticRegression,amazon/chronos-t5-large,0.751424,0.994615,0.265889,1.0,0.994615
LogisticRegression,amazon/chronos-t5-small,0.930677,1.0,0.115233,0.994615,1.0


Friedman Test matthews_corrcoef: statistic=11.167, pvalue=0.025
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,1.0,0.040876,0.975123,0.975123
LogisticRegression,AutonLab/MOMENT-1-large,1.0,1.0,0.040876,0.975123,0.975123
LogisticRegression,HandcraftedFeatureExtractor,0.040876,0.040876,1.0,0.179597,0.179597
LogisticRegression,amazon/chronos-t5-large,0.975123,0.975123,0.179597,1.0,1.0
LogisticRegression,amazon/chronos-t5-small,0.975123,0.975123,0.179597,1.0,1.0


Friedman Test f1_score: statistic=10.167, pvalue=0.038
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999644,0.265889,0.855475,0.999644
LogisticRegression,AutonLab/MOMENT-1-large,0.999644,1.0,0.179597,0.930677,0.994615
LogisticRegression,HandcraftedFeatureExtractor,0.265889,0.179597,1.0,0.022659,0.373481
LogisticRegression,amazon/chronos-t5-large,0.855475,0.930677,0.022659,1.0,0.751424
LogisticRegression,amazon/chronos-t5-small,0.999644,0.994615,0.373481,0.751424,1.0


Friedman Test precision_score: statistic=12.000, pvalue=0.017
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.896993,0.011994,0.806626,0.930677
LogisticRegression,AutonLab/MOMENT-1-large,0.896993,1.0,0.144775,0.999644,0.999977
LogisticRegression,HandcraftedFeatureExtractor,0.011994,0.144775,1.0,0.21995,0.115233
LogisticRegression,amazon/chronos-t5-large,0.806626,0.999644,0.21995,1.0,0.99824
LogisticRegression,amazon/chronos-t5-small,0.930677,0.999977,0.115233,0.99824,1.0


Friedman Test recall_score: statistic=8.128, pvalue=0.087
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.896993,0.855475,0.497325,0.987386
LogisticRegression,AutonLab/MOMENT-1-large,0.896993,1.0,0.317223,0.956544,0.994615
LogisticRegression,HandcraftedFeatureExtractor,0.855475,0.317223,1.0,0.070324,0.562449
LogisticRegression,amazon/chronos-t5-large,0.497325,0.956544,0.070324,1.0,0.806626
LogisticRegression,amazon/chronos-t5-small,0.987386,0.994615,0.562449,0.806626,1.0


Friedman Test roc_auc_score: statistic=12.000, pvalue=0.017
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.930677,0.011994,0.751424,0.930677
LogisticRegression,AutonLab/MOMENT-1-large,0.930677,1.0,0.115233,0.994615,1.0
LogisticRegression,HandcraftedFeatureExtractor,0.011994,0.115233,1.0,0.265889,0.115233
LogisticRegression,amazon/chronos-t5-large,0.751424,0.994615,0.265889,1.0,0.994615
LogisticRegression,amazon/chronos-t5-small,0.930677,1.0,0.115233,0.994615,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
46,bihearts,left,DummyClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,TACV,0.409179,0.126033,0.495931,0.005103,-0.005947,0.008245,0.380454,0.307846,0.507808,0.414703,0.305448,0.246981,0.495931,0.005103
48,bihearts,left,LogisticRegression,GroupUnderSampler,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.685536,0.047774,0.671579,0.044308,0.277698,0.111877,0.775661,0.038262,0.898969,0.045172,0.688387,0.074978,0.671579,0.044308
45,bihearts,left,LogisticRegression,GroupUnderSampler,HandcraftedFeatureExtractor,,TACV,0.776265,0.040743,0.830071,0.041804,0.512755,0.090253,0.84241,0.025621,0.961691,0.031821,0.750841,0.037521,0.830071,0.041804
44,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.733597,0.045006,0.785686,0.031861,0.441448,0.104907,0.808792,0.026878,0.952221,0.029885,0.704134,0.037858,0.785686,0.031861
47,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.673411,0.036678,0.730974,0.057816,0.339848,0.051974,0.76041,0.035016,0.913251,0.075118,0.655163,0.041796,0.730974,0.057816
0,bihearts,left,LogisticRegression,GroupUnderSampler,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.618506,0.037796,0.614872,0.046506,0.181688,0.090396,0.717323,0.041927,0.871466,0.052669,0.613656,0.063585,0.614872,0.046506


Friedman Test accuracy_score: statistic=22.281, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.201363,0.000708,0.012582,0.32604,0.91341
LogisticRegression,AutonLab/MOMENT-1-large,0.201363,1.0,0.482135,0.91341,0.999858,0.802694
LogisticRegression,HandcraftedFeatureExtractor,0.000708,0.482135,1.0,0.97398,0.32604,0.028372
LogisticRegression,amazon/chronos-t5-large,0.012582,0.91341,0.97398,1.0,0.802694,0.201363
LogisticRegression,amazon/chronos-t5-small,0.32604,0.999858,0.32604,0.802694,1.0,0.91341
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.91341,0.802694,0.028372,0.201363,0.91341,1.0


Friedman Test balanced_accuracy_score: statistic=22.486, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.427525,0.001417,0.009435,0.074302,0.958997
LogisticRegression,AutonLab/MOMENT-1-large,0.427525,1.0,0.32604,0.65049,0.958997,0.91341
LogisticRegression,HandcraftedFeatureExtractor,0.001417,0.32604,1.0,0.995925,0.845079,0.028372
LogisticRegression,amazon/chronos-t5-large,0.009435,0.65049,0.995925,1.0,0.984591,0.113891
LogisticRegression,amazon/chronos-t5-small,0.074302,0.958997,0.845079,0.984591,1.0,0.427525
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.958997,0.91341,0.028372,0.113891,0.427525,1.0


Friedman Test matthews_corrcoef: statistic=23.057, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.427525,0.001417,0.005178,0.113891,0.958997
LogisticRegression,AutonLab/MOMENT-1-large,0.427525,1.0,0.32604,0.538193,0.984591,0.91341
LogisticRegression,HandcraftedFeatureExtractor,0.001417,0.32604,1.0,0.99942,0.755551,0.028372
LogisticRegression,amazon/chronos-t5-large,0.005178,0.538193,0.99942,1.0,0.91341,0.074302
LogisticRegression,amazon/chronos-t5-small,0.113891,0.984591,0.755551,0.91341,1.0,0.538193
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.958997,0.91341,0.028372,0.074302,0.538193,1.0


Friedman Test f1_score: statistic=21.571, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.168134,0.000343,0.028372,0.427525,0.845079
LogisticRegression,AutonLab/MOMENT-1-large,0.168134,1.0,0.427525,0.984591,0.995925,0.845079
LogisticRegression,HandcraftedFeatureExtractor,0.000343,0.427525,1.0,0.845079,0.168134,0.028372
LogisticRegression,amazon/chronos-t5-large,0.028372,0.984591,0.845079,1.0,0.845079,0.427525
LogisticRegression,amazon/chronos-t5-small,0.427525,0.995925,0.168134,0.845079,1.0,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.845079,0.845079,0.028372,0.427525,0.984591,1.0


Friedman Test precision_score: statistic=20.886, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.538193,0.001417,0.009435,0.113891,0.845079
LogisticRegression,AutonLab/MOMENT-1-large,0.538193,1.0,0.238812,0.538193,0.958997,0.995925
LogisticRegression,HandcraftedFeatureExtractor,0.001417,0.238812,1.0,0.995925,0.755551,0.074302
LogisticRegression,amazon/chronos-t5-large,0.009435,0.538193,0.995925,1.0,0.958997,0.238812
LogisticRegression,amazon/chronos-t5-small,0.113891,0.958997,0.755551,0.958997,1.0,0.755551
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.845079,0.995925,0.074302,0.238812,0.755551,1.0


Friedman Test recall_score: statistic=20.200, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.074302,0.000708,0.046736,0.427525,0.845079
LogisticRegression,AutonLab/MOMENT-1-large,0.074302,1.0,0.755551,0.999981,0.958997,0.65049
LogisticRegression,HandcraftedFeatureExtractor,0.000708,0.755551,1.0,0.845079,0.238812,0.046736
LogisticRegression,amazon/chronos-t5-large,0.046736,0.999981,0.845079,1.0,0.91341,0.538193
LogisticRegression,amazon/chronos-t5-small,0.427525,0.958997,0.238812,0.91341,1.0,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.845079,0.65049,0.046736,0.538193,0.984591,1.0


Friedman Test roc_auc_score: statistic=22.486, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.427525,0.001417,0.009435,0.074302,0.958997
LogisticRegression,AutonLab/MOMENT-1-large,0.427525,1.0,0.32604,0.65049,0.958997,0.91341
LogisticRegression,HandcraftedFeatureExtractor,0.001417,0.32604,1.0,0.995925,0.845079,0.028372
LogisticRegression,amazon/chronos-t5-large,0.009435,0.65049,0.995925,1.0,0.984591,0.113891
LogisticRegression,amazon/chronos-t5-small,0.074302,0.958997,0.845079,0.984591,1.0,0.427525
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.958997,0.91341,0.028372,0.113891,0.427525,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
9,seed,engagement_10sec_35thresh,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.555644,0.096513,0.48866,0.028251,-0.023202,0.051982,0.654688,0.126983,0.663317,0.091176,0.692261,0.250659,0.48866,0.028251
11,seed,engagement_10sec_35thresh,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.648025,0.086939,0.488468,0.011939,-0.042485,0.03812,0.776995,0.068974,0.669713,0.078364,0.929057,0.055451,0.488468,0.011939
8,seed,engagement_10sec_35thresh,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.684997,0.088681,0.522659,0.046252,0.079399,0.177631,0.808665,0.059924,0.683752,0.085957,0.998889,0.0022,0.522659,0.046252
7,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.668558,0.083023,0.498674,0.002839,-0.003172,0.007836,0.792218,0.069807,0.674811,0.074602,0.962115,0.06585,0.498674,0.002839
10,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.658135,0.072629,0.495278,0.00935,-0.011676,0.023118,0.789412,0.052666,0.673304,0.072463,0.965556,0.0682,0.495278,0.00935
12,seed,engagement_10sec_35thresh,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.675595,0.073512,0.5,0.0,0.0,0.0,0.804037,0.052678,0.675595,0.073512,1.0,0.0,0.5,0.0


Friedman Test accuracy_score: statistic=8.910, pvalue=0.113
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999858,0.538193,0.938967,0.91341,0.538193
LogisticRegression,AutonLab/MOMENT-1-large,0.999858,1.0,0.375252,0.845079,0.802694,0.375252
LogisticRegression,HandcraftedFeatureExtractor,0.538193,0.375252,1.0,0.97398,0.984591,1.0
LogisticRegression,amazon/chronos-t5-large,0.938967,0.845079,0.97398,1.0,0.999999,0.97398
LogisticRegression,amazon/chronos-t5-small,0.91341,0.802694,0.984591,0.999999,1.0,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.538193,0.375252,1.0,0.97398,0.984591,1.0


Friedman Test balanced_accuracy_score: statistic=7.406, pvalue=0.192
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.938967,0.91341,0.938967,0.999981,0.958997
LogisticRegression,AutonLab/MOMENT-1-large,0.938967,1.0,0.375252,0.427525,0.882097,0.482135
LogisticRegression,HandcraftedFeatureExtractor,0.91341,0.375252,1.0,0.999999,0.958997,0.999981
LogisticRegression,amazon/chronos-t5-large,0.938967,0.427525,0.999999,1.0,0.97398,0.999999
LogisticRegression,amazon/chronos-t5-small,0.999981,0.882097,0.958997,0.97398,1.0,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.958997,0.482135,0.999981,0.999999,0.984591,1.0


Friedman Test matthews_corrcoef: statistic=8.383, pvalue=0.136
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.802694,0.958997,0.97398,1.0,0.984591
LogisticRegression,AutonLab/MOMENT-1-large,0.802694,1.0,0.280437,0.32604,0.802694,0.375252
LogisticRegression,HandcraftedFeatureExtractor,0.958997,0.280437,1.0,0.999999,0.958997,0.999981
LogisticRegression,amazon/chronos-t5-large,0.97398,0.32604,0.999999,1.0,0.97398,0.999999
LogisticRegression,amazon/chronos-t5-small,1.0,0.802694,0.958997,0.97398,1.0,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.984591,0.375252,0.999981,0.999999,0.984591,1.0


Friedman Test f1_score: statistic=8.008, pvalue=0.156
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.538193,0.97398,0.91341,0.538193
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.482135,0.958997,0.882097,0.482135
LogisticRegression,HandcraftedFeatureExtractor,0.538193,0.482135,1.0,0.938967,0.984591,1.0
LogisticRegression,amazon/chronos-t5-large,0.97398,0.958997,0.938967,1.0,0.999858,0.938967
LogisticRegression,amazon/chronos-t5-small,0.91341,0.882097,0.984591,0.999858,1.0,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.538193,0.482135,1.0,0.938967,0.984591,1.0


Friedman Test precision_score: statistic=7.406, pvalue=0.192
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.938967,0.91341,0.938967,0.999981,0.958997
LogisticRegression,AutonLab/MOMENT-1-large,0.938967,1.0,0.375252,0.427525,0.882097,0.482135
LogisticRegression,HandcraftedFeatureExtractor,0.91341,0.375252,1.0,0.999999,0.958997,0.999981
LogisticRegression,amazon/chronos-t5-large,0.938967,0.427525,0.999999,1.0,0.97398,0.999999
LogisticRegression,amazon/chronos-t5-small,0.999981,0.882097,0.958997,0.97398,1.0,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.958997,0.482135,0.999981,0.999999,0.984591,1.0


Friedman Test recall_score: statistic=7.874, pvalue=0.163
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.704482,0.958997,0.882097,0.482135
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.65049,0.938967,0.845079,0.427525
LogisticRegression,HandcraftedFeatureExtractor,0.704482,0.65049,1.0,0.991626,0.99942,0.99942
LogisticRegression,amazon/chronos-t5-large,0.958997,0.938967,0.991626,1.0,0.999858,0.938967
LogisticRegression,amazon/chronos-t5-small,0.882097,0.845079,0.99942,0.999858,1.0,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.482135,0.427525,0.99942,0.938967,0.984591,1.0


Friedman Test roc_auc_score: statistic=7.406, pvalue=0.192
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.938967,0.91341,0.938967,0.999981,0.958997
LogisticRegression,AutonLab/MOMENT-1-large,0.938967,1.0,0.375252,0.427525,0.882097,0.482135
LogisticRegression,HandcraftedFeatureExtractor,0.91341,0.375252,1.0,0.999999,0.958997,0.999981
LogisticRegression,amazon/chronos-t5-large,0.938967,0.427525,0.999999,1.0,0.97398,0.999999
LogisticRegression,amazon/chronos-t5-small,0.999981,0.882097,0.958997,0.97398,1.0,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.958997,0.482135,0.999981,0.999999,0.984591,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
16,usilaughs,left,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.47037,0.056328,0.456667,0.041791,-0.086661,0.083587,0.321277,0.163389,0.311333,0.158477,0.333333,0.171076,0.456667,0.041791
18,usilaughs,left,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.574074,0.060249,0.5525,0.05802,0.117881,0.135204,0.422028,0.098378,0.540887,0.120513,0.358333,0.099685,0.5525,0.05802
15,usilaughs,left,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.648148,0.098387,0.640833,0.09397,0.290383,0.195244,0.596089,0.090262,0.629527,0.131358,0.575,0.079987,0.640833,0.09397
14,usilaughs,left,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.740741,0.032796,0.733333,0.02846,0.479257,0.066262,0.695449,0.030717,0.740366,0.077616,0.666667,0.069024,0.733333,0.02846
17,usilaughs,left,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.718519,0.029333,0.71,0.028292,0.429669,0.058617,0.665698,0.036453,0.709698,0.052691,0.633333,0.066,0.71,0.028292


Friedman Test accuracy_score: statistic=14.667, pvalue=0.005
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.975123,0.497325,0.00858,0.090578
LogisticRegression,AutonLab/MOMENT-1-large,0.975123,1.0,0.855475,0.053938,0.317223
LogisticRegression,HandcraftedFeatureExtractor,0.497325,0.855475,1.0,0.433878,0.896993
LogisticRegression,amazon/chronos-t5-large,0.00858,0.053938,0.433878,1.0,0.930677
LogisticRegression,amazon/chronos-t5-small,0.090578,0.317223,0.896993,0.930677,1.0


Friedman Test balanced_accuracy_score: statistic=15.520, pvalue=0.004
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.855475,0.373481,0.002939,0.070324
LogisticRegression,AutonLab/MOMENT-1-large,0.855475,1.0,0.930677,0.070324,0.497325
LogisticRegression,HandcraftedFeatureExtractor,0.373481,0.930677,1.0,0.373481,0.930677
LogisticRegression,amazon/chronos-t5-large,0.002939,0.070324,0.373481,1.0,0.855475
LogisticRegression,amazon/chronos-t5-small,0.070324,0.497325,0.930677,0.855475,1.0


Friedman Test matthews_corrcoef: statistic=15.520, pvalue=0.004
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.855475,0.373481,0.002939,0.070324
LogisticRegression,AutonLab/MOMENT-1-large,0.855475,1.0,0.930677,0.070324,0.497325
LogisticRegression,HandcraftedFeatureExtractor,0.373481,0.930677,1.0,0.373481,0.930677
LogisticRegression,amazon/chronos-t5-large,0.002939,0.070324,0.373481,1.0,0.855475
LogisticRegression,amazon/chronos-t5-small,0.070324,0.497325,0.930677,0.855475,1.0


Friedman Test f1_score: statistic=17.760, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.930677,0.373481,0.001363,0.070324
LogisticRegression,AutonLab/MOMENT-1-large,0.930677,1.0,0.855475,0.022659,0.373481
LogisticRegression,HandcraftedFeatureExtractor,0.373481,0.855475,1.0,0.265889,0.930677
LogisticRegression,amazon/chronos-t5-large,0.001363,0.022659,0.265889,1.0,0.751424
LogisticRegression,amazon/chronos-t5-small,0.070324,0.373481,0.930677,0.751424,1.0


Friedman Test precision_score: statistic=15.840, pvalue=0.003
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.627659,0.265889,0.002939,0.022659
LogisticRegression,AutonLab/MOMENT-1-large,0.627659,1.0,0.975123,0.179597,0.497325
LogisticRegression,HandcraftedFeatureExtractor,0.265889,0.975123,1.0,0.497325,0.855475
LogisticRegression,amazon/chronos-t5-large,0.002939,0.179597,0.497325,1.0,0.975123
LogisticRegression,amazon/chronos-t5-small,0.022659,0.497325,0.855475,0.975123,1.0


Friedman Test recall_score: statistic=17.061, pvalue=0.002
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,1.0,0.317223,0.011994,0.115233
LogisticRegression,AutonLab/MOMENT-1-large,1.0,1.0,0.317223,0.011994,0.115233
LogisticRegression,HandcraftedFeatureExtractor,0.317223,0.317223,1.0,0.69123,0.987386
LogisticRegression,amazon/chronos-t5-large,0.011994,0.011994,0.69123,1.0,0.930677
LogisticRegression,amazon/chronos-t5-small,0.115233,0.115233,0.987386,0.930677,1.0


Friedman Test roc_auc_score: statistic=15.520, pvalue=0.004
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.855475,0.373481,0.002939,0.070324
LogisticRegression,AutonLab/MOMENT-1-large,0.855475,1.0,0.930677,0.070324,0.497325
LogisticRegression,HandcraftedFeatureExtractor,0.373481,0.930677,1.0,0.373481,0.930677
LogisticRegression,amazon/chronos-t5-large,0.002939,0.070324,0.373481,1.0,0.855475
LogisticRegression,amazon/chronos-t5-small,0.070324,0.497325,0.930677,0.855475,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
30,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.5,0.065591,0.4975,0.064466,-0.004713,0.129485,0.456869,0.067827,0.444018,0.067048,0.475,0.084939,0.4975,0.064466
35,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.522222,0.063931,0.500833,0.053619,0.00206,0.107809,0.287647,0.235803,0.272747,0.22747,0.308333,0.252401,0.500833,0.053619
23,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,CatAggregator,TACV,0.533333,0.064973,0.5175,0.063122,0.039014,0.134415,0.416845,0.071777,0.475936,0.097363,0.375,0.069024,0.5175,0.063122
32,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.6,0.044305,0.579167,0.041085,0.177598,0.099592,0.462654,0.057802,0.588405,0.098253,0.391667,0.080833,0.579167,0.041085
25,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanTimeAggregator,TACV,0.533333,0.064973,0.5175,0.063122,0.039014,0.134415,0.416845,0.071777,0.475936,0.097363,0.375,0.069024,0.5175,0.063122
29,usilaughs,right,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.740741,0.073333,0.754167,0.065638,0.517757,0.12078,0.753849,0.051885,0.667946,0.085374,0.875,0.026089,0.754167,0.065638
20,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,CatAggregator,TACV,0.762963,0.072781,0.7625,0.074661,0.528079,0.145684,0.737335,0.081847,0.72694,0.073788,0.758333,0.120687,0.7625,0.074661
28,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.777778,0.091299,0.773333,0.093433,0.561735,0.181537,0.740824,0.104231,0.775075,0.10686,0.733333,0.162087,0.773333,0.093433
22,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanTimeAggregator,TACV,0.807407,0.077435,0.8075,0.076658,0.618596,0.15271,0.789046,0.075692,0.779068,0.090541,0.808333,0.099685,0.8075,0.076658
21,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,CatAggregator,TACV,0.796296,0.043385,0.7975,0.043232,0.59468,0.0844,0.778857,0.045557,0.755329,0.051681,0.808333,0.067023,0.7975,0.043232


Friedman Test accuracy_score: statistic=17.956, pvalue=0.006
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.990645,0.137885,0.0429,0.027494,0.223897,0.137885
LogisticRegression,AutonLab/MOMENT-1-large,0.990645,1.0,0.527684,0.259158,0.191943,0.67574,0.527684
LogisticRegression,HandcraftedFeatureExtractor,0.137885,0.527684,1.0,0.999459,0.997234,0.999991,1.0
LogisticRegression,amazon/chronos-t5-large,0.0429,0.259158,0.999459,1.0,0.999999,0.994704,0.999459
LogisticRegression,amazon/chronos-t5-small,0.027494,0.191943,0.997234,0.999999,1.0,0.98454,0.997234
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.223897,0.67574,0.999991,0.994704,0.98454,1.0,0.999991
LogisticRegression,paris-noah/Mantis-8M,0.137885,0.527684,1.0,0.999459,0.997234,0.999991,1.0


Friedman Test balanced_accuracy_score: statistic=19.527, pvalue=0.003
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999459,0.079418,0.065133,0.053044,0.163298,0.065133
LogisticRegression,AutonLab/MOMENT-1-large,0.999459,1.0,0.223897,0.191943,0.163298,0.383365,0.191943
LogisticRegression,HandcraftedFeatureExtractor,0.079418,0.223897,1.0,1.0,0.999999,0.999949,1.0
LogisticRegression,amazon/chronos-t5-large,0.065133,0.191943,1.0,1.0,1.0,0.999812,1.0
LogisticRegression,amazon/chronos-t5-small,0.053044,0.163298,0.999999,1.0,1.0,0.999459,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.163298,0.383365,0.999949,0.999812,0.999459,1.0,0.999812
LogisticRegression,paris-noah/Mantis-8M,0.065133,0.191943,1.0,1.0,1.0,0.999812,1.0


Friedman Test matthews_corrcoef: statistic=18.522, pvalue=0.005
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.997234,0.079418,0.053044,0.03446,0.223897,0.115564
LogisticRegression,AutonLab/MOMENT-1-large,0.997234,1.0,0.297637,0.223897,0.163298,0.57761,0.383365
LogisticRegression,HandcraftedFeatureExtractor,0.079418,0.297637,1.0,0.999999,0.999949,0.999459,0.999999
LogisticRegression,amazon/chronos-t5-large,0.053044,0.223897,0.999999,1.0,0.999999,0.997234,0.999949
LogisticRegression,amazon/chronos-t5-small,0.03446,0.163298,0.999949,0.999999,1.0,0.990645,0.999459
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.223897,0.57761,0.999459,0.997234,0.990645,1.0,0.999949
LogisticRegression,paris-noah/Mantis-8M,0.115564,0.383365,0.999999,0.999949,0.999459,0.999949,1.0


Friedman Test f1_score: statistic=19.130, pvalue=0.004
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.115564,0.163298,0.115564,0.115564,0.079418
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.163298,0.223897,0.163298,0.163298,0.115564
LogisticRegression,HandcraftedFeatureExtractor,0.115564,0.163298,1.0,0.999999,1.0,1.0,0.999999
LogisticRegression,amazon/chronos-t5-large,0.163298,0.223897,0.999999,1.0,0.999999,0.999999,0.999949
LogisticRegression,amazon/chronos-t5-small,0.115564,0.163298,1.0,0.999999,1.0,1.0,0.999999
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.115564,0.163298,1.0,0.999999,1.0,1.0,0.999999
LogisticRegression,paris-noah/Mantis-8M,0.079418,0.115564,0.999999,0.999949,0.999999,0.999999,1.0


Friedman Test precision_score: statistic=18.524, pvalue=0.005
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.928962,0.478211,0.013423,0.008059,0.33914,0.223897
LogisticRegression,AutonLab/MOMENT-1-large,0.928962,1.0,0.98454,0.259158,0.191943,0.948498,0.876775
LogisticRegression,HandcraftedFeatureExtractor,0.478211,0.98454,1.0,0.766379,0.67574,0.999991,0.999459
LogisticRegression,amazon/chronos-t5-large,0.013423,0.259158,0.766379,1.0,0.999999,0.876775,0.948498
LogisticRegression,amazon/chronos-t5-small,0.008059,0.191943,0.67574,0.999999,1.0,0.807113,0.905096
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.33914,0.948498,0.999991,0.876775,0.807113,1.0,0.999991
LogisticRegression,paris-noah/Mantis-8M,0.223897,0.876775,0.999459,0.948498,0.905096,0.999991,1.0


Friedman Test recall_score: statistic=22.390, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.021791,0.67574,0.297637,0.096148,0.096148
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.013423,0.57761,0.223897,0.065133,0.065133
LogisticRegression,HandcraftedFeatureExtractor,0.021791,0.013423,1.0,0.67574,0.948498,0.998694,0.998694
LogisticRegression,amazon/chronos-t5-large,0.67574,0.57761,0.67574,1.0,0.997234,0.928962,0.928962
LogisticRegression,amazon/chronos-t5-small,0.297637,0.223897,0.948498,0.997234,1.0,0.998694,0.998694
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.096148,0.065133,0.998694,0.928962,0.998694,1.0,1.0
LogisticRegression,paris-noah/Mantis-8M,0.096148,0.065133,0.998694,0.928962,0.998694,1.0,1.0


Friedman Test roc_auc_score: statistic=19.527, pvalue=0.003
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999459,0.079418,0.065133,0.053044,0.163298,0.065133
LogisticRegression,AutonLab/MOMENT-1-large,0.999459,1.0,0.223897,0.191943,0.163298,0.383365,0.191943
LogisticRegression,HandcraftedFeatureExtractor,0.079418,0.223897,1.0,1.0,0.999999,0.999949,1.0
LogisticRegression,amazon/chronos-t5-large,0.065133,0.191943,1.0,1.0,1.0,0.999812,1.0
LogisticRegression,amazon/chronos-t5-small,0.053044,0.163298,0.999999,1.0,1.0,0.999459,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.163298,0.383365,0.999949,0.999812,0.999459,1.0,0.999812
LogisticRegression,paris-noah/Mantis-8M,0.065133,0.191943,1.0,1.0,1.0,0.999812,1.0
