In [1]:
import os
import pickle
from pathlib import Path
from typing import Generator

import numpy as np
import pandas as pd
import scikit_posthocs as sp
from IPython.display import HTML, display
from scipy import stats
from yaml import safe_load
from jmetal.lab.statistical_test.functions import (
    friedman_aligned_rank_test,
    friedman_aligned_ph_test,
)


pd.set_option("display.max_columns", 100)


def friedmann_nemenyi_test(
    unravelled_detailed_results: dict[str : dict[tuple[str, str], list[float]]],
) -> None:
    for metric in unravelled_detailed_results.keys():
        metric_df = pd.DataFrame(unravelled_detailed_results[metric])
        metric_vals = [metric_df[col].values for col in metric_df.columns]
        out = stats.friedmanchisquare(*metric_vals)
        print(
            f"Friedman Test {metric}: statistic={out.statistic:.3f}, pvalue={out.pvalue:.3f}"
        )
        metric_vals = np.array(metric_vals).T
        out = sp.posthoc_nemenyi_friedman(metric_vals)
        print("Nemenyi post-hoc test")
        feature_names = metric_df.columns
        out.index = feature_names
        out.columns = feature_names
        out = out.sort_index(axis=0).sort_index(axis=1)
        display(out)


def aligned_friedmann_holm_test(
    unravelled_detailed_results: dict[str : dict[tuple[str, str], list[float]]],
) -> None:
    for metric in unravelled_detailed_results.keys():
        metric_df = pd.DataFrame(unravelled_detailed_results[metric])
        # metric_vals = np.asarray([metric_df[col].values for col in metric_df.columns])
        metric_vals = metric_df.values

        out = friedman_aligned_rank_test(metric_vals)
        chi2_stat = out.loc["Aligned Rank stat"].iloc[0]
        p_value = out.loc["p-value"].iloc[0]
        print(f"Aligned-rank Friedman χ² {metric} = {chi2_stat:.3f}, p = {p_value:.3f}")

        z_vals, out, _ = friedman_aligned_ph_test(
            metric_vals, apv_procedure="Holm"  # Holm step-down correction
        )
        print("Holm post-hoc test")
        feature_names = metric_df.columns
        out.index = feature_names
        out.columns = feature_names
        out = out.sort_index(axis=0).sort_index(axis=1)
        display(out)


def present_results(
    paths: Generator,
    val_method: str = "lopo",
    remove_xgboost: bool = False,
    remove_chronos_small_from_test: bool = False,
    which_test: str = "friedmann-nemenyi",
    test_args: dict = {},
) -> None:
    results = []
    for reports_path in paths:
        report = pd.read_csv(reports_path, index_col=0)
        conf = safe_load(open(reports_path.parent / ".hydra/config.yaml"))
        if conf["validation_method"]["_target_"].split(".")[-1].lower() != val_method:
            continue
        model_name: str = conf["model"]["model"]["_target_"].split(".")[-1]
        features_name = (
            conf["feature_extractor"]["_target_"].split(".")[-1]
            if "model_name" not in conf["feature_extractor"]
            else conf["feature_extractor"]["model_name"]
        )
        validation_method = conf["validation_method"]["_target_"].split(".")[-1]
        if "aggregator" not in conf:
            aggregator = "MeanTimeAggregator"
        else:
            aggregator = (
                conf["aggregator"]["_target_"].split(".")[-1]
                if "_target_" in conf["aggregator"]
                else None
            )
        report_results = {}
        for col in report.columns:
            report_results[f"{col} avg"] = report[col].mean()
            report_results[f"{col} sem"] = report[col].sem() * 1.98  # 95% CI

        dataset = conf["dataset"]
        side = conf["side"]
        resampling = (
            conf["resampling"]["_target_"].split(".")[-1]
            if "resampling" in conf
            else "None"
        )
        resampling = resampling if resampling != "NoUnderSampler" else "None"
        # Collect results in a list of dicts
        if remove_xgboost and model_name == "XGBClassifier":
            continue
        results.append(
            {
                "Dataset": dataset,
                "Side": side,
                "Model": model_name,
                "Resampling": resampling,
                "Features": features_name,
                "Aggregator": aggregator,
                "Validation": validation_method,
                "Detailed Report": report,
                **report_results,
            }
        )

    # After the loop, display as a table
    df_results = pd.DataFrame(results)
    for (dataset, side, resampling), group in df_results.groupby(
        ["Dataset", "Side", "Resampling"]
    ):
        try:
            display(
                HTML(
                    f"""
                <div style='background-color:#ffe6e6; padding:18px; margin:10px 0; border-radius:8px;'>
                    <h2 style='color:#b30000; margin:0; font-size:2em;'>
                    Results for Dataset: <i>{dataset}</i>, Side: <i>{side}</i>, resampling: <i>{resampling}</i>
                    </h2>
                </div>
                """
                )
            )
            grouped_data = (
                group.sort_values(by=["Model", "Features", "Aggregator"])
                .drop(columns=["Detailed Report"])
                .drop_duplicates()
            )
            # display(grouped_data)

            def test_fn(x):
                if len(x) > 1:
                    return pd.Series(
                        [
                            "%.2f ± %.2f" % (np.round(x.values[0][i], 2), np.round(x.values[1][i], 2))
                            for i in range(x.shape[1])
                        ],
                        index=x.columns,
                    )
                else:
                    return pd.Series(
                        [f"{x.values[0][i]}" for i in range(x.shape[1])],
                        index=x.columns,
                    )

            grouped_data.columns = pd.MultiIndex.from_tuples(
                [
                    (
                        tuple(col.split(" "))
                        if "avg" in col or "sem" in col
                        else tuple([col])
                    )
                    for col in grouped_data.columns
                ]
            )
            grouped_data = grouped_data.T.groupby(level=0).apply(test_fn).T
            display(grouped_data)
            grouped_data = grouped_data.drop(columns=['accuracy_score', 'Dataset', "Side", "Resampling", "Validation"])
            grouped_data = grouped_data.rename(
                columns={
                    "balanced_accuracy_score": "Balanced Accuracy",
                    "f1_score": "F1",
                    "matthews_corrcoef": "MCC",
                    "roc_auc_score": "ROC AUC",
                    "precision_score": "Precision",
                    "recall_score": "Recall",
                }
            )


            # save to latex
            latex_path = Path("../tables_latex") / f"results_{dataset}_{side}_{resampling}_{validation_method}.tex"
            with open(latex_path, "w") as f:
                f.write(grouped_data.to_latex())  

            if "aggregator" in test_args.keys() and group["Aggregator"].nunique() > 2:
                group = group[
                    (group["Aggregator"] == test_args["aggregator"])
                    | (group["Aggregator"].isnull())
                ]

            unravelled_detailed_results = {
                metric: {} for metric in group["Detailed Report"].iloc[0].columns
            }
            for i, model_results in group.iterrows():
                for metric in model_results["Detailed Report"].columns:
                    cv_results = model_results["Detailed Report"][metric]
                    unravelled_detailed_results[metric][
                        (model_results["Model"], model_results["Features"])
                    ] = cv_results

            if which_test == "friedmann-nemenyi":
                friedmann_nemenyi_test(unravelled_detailed_results)
            elif which_test == "alignedfriedmann-holm":
                aligned_friedmann_holm_test(unravelled_detailed_results)
            else:
                raise ValueError(f"Unknown test: {which_test}")
        except Exception as e:
            print(f"Error processing group {dataset}, {side}, {resampling}: {e}")
            continue

    return results

In [2]:
results_path: str = "../outputs/"
all_results = list(Path(results_path).glob("*/*/*/reports.csv"))
results_path_adula: str = "../outputs_adula/"
all_results += list(Path(results_path_adula).glob("*/*/*/reports.csv"))


results_lopo = present_results(
    all_results,
    val_method="lopo",
    remove_xgboost=True,
    remove_chronos_small_from_test=True,
    which_test="friedmann-nemenyi",
    test_args={
        "aggregator": "MeanChanAggregator"}
)

Unnamed: 0,Aggregator,Dataset,Features,Model,Resampling,Side,Validation,accuracy_score,balanced_accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score,roc_auc_score
3,,apsync,NoneFeatureExtractor,LogisticRegression,,engagement_10sec_35thresh,LOPO,0.65 ± 0.18,0.57 ± 0.14,0.77 ± 0.14,0.00 ± 0.02,0.66 ± 0.18,0.99 ± 0.01,0.50 ± 0.00


Error processing group apsync, engagement_10sec_35thresh, None: At least 3 sets of samples must be given for Friedman test, got 1.


Unnamed: 0,Aggregator,Dataset,Features,Model,Resampling,Side,Validation,accuracy_score,balanced_accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score,roc_auc_score
51,,apsync,HandcraftedFeatureExtractor,DummyClassifier,,engagement_10sec_4thresh,LOPO,0.56 ± 0.12,0.49 ± 0.01,0.67 ± 0.12,-0.01 ± 0.02,0.65 ± 0.18,0.78 ± 0.16,0.49 ± 0.01
54,MeanChanAggregator,apsync,AutonLab/MOMENT-1-large,LogisticRegression,,engagement_10sec_4thresh,LOPO,0.58 ± 0.12,0.53 ± 0.07,0.71 ± 0.10,-0.02 ± 0.03,0.65 ± 0.18,0.88 ± 0.07,0.49 ± 0.01
50,,apsync,HandcraftedFeatureExtractor,LogisticRegression,,engagement_10sec_4thresh,LOPO,0.65 ± 0.17,0.56 ± 0.13,0.76 ± 0.13,-0.01 ± 0.04,0.66 ± 0.18,0.98 ± 0.01,0.50 ± 0.01
4,,apsync,NoneFeatureExtractor,LogisticRegression,,engagement_10sec_4thresh,LOPO,0.65 ± 0.18,0.57 ± 0.14,0.77 ± 0.14,0.00 ± 0.02,0.66 ± 0.18,0.99 ± 0.01,0.50 ± 0.00
49,MeanChanAggregator,apsync,amazon/chronos-t5-large,LogisticRegression,,engagement_10sec_4thresh,LOPO,0.58 ± 0.13,0.51 ± 0.07,0.71 ± 0.12,-0.04 ± 0.05,0.65 ± 0.18,0.86 ± 0.09,0.48 ± 0.03
53,MeanChanAggregator,apsync,amazon/chronos-t5-small,LogisticRegression,,engagement_10sec_4thresh,LOPO,0.58 ± 0.12,0.54 ± 0.08,0.70 ± 0.11,-0.00 ± 0.05,0.65 ± 0.18,0.82 ± 0.07,0.50 ± 0.02
52,MeanChanAggregator,apsync,ibm-granite/granite-timeseries-patchtsmixer,LogisticRegression,,engagement_10sec_4thresh,LOPO,0.66 ± 0.18,0.57 ± 0.14,0.77 ± 0.14,-0.00 ± 0.01,0.66 ± 0.18,1.00 ± 0.00,0.50 ± 0.00
55,,apsync,paris-noah/Mantis-8M,LogisticRegression,,engagement_10sec_4thresh,LOPO,0.55 ± 0.13,0.50 ± 0.05,0.68 ± 0.10,0.00 ± 0.12,0.65 ± 0.19,0.83 ± 0.13,0.50 ± 0.06


Friedman Test accuracy_score: statistic=9.368, pvalue=0.227
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.983489,0.988441,0.958863,0.999995,1.0,0.977064,0.992155
LogisticRegression,AutonLab/MOMENT-1-large,0.983489,1.0,0.619891,0.469151,0.932143,0.994859,0.544321,1.0
LogisticRegression,HandcraftedFeatureExtractor,0.988441,0.619891,1.0,0.999999,0.998887,0.96893,1.0,0.693121
LogisticRegression,NoneFeatureExtractor,0.958863,0.469151,0.999999,1.0,0.992155,0.915185,1.0,0.544321
LogisticRegression,amazon/chronos-t5-large,0.999995,0.932143,0.998887,0.992155,1.0,0.999865,0.996762,0.958863
LogisticRegression,amazon/chronos-t5-small,1.0,0.994859,0.96893,0.915185,0.999865,1.0,0.946658,0.998051
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.977064,0.544321,1.0,1.0,0.996762,0.946658,1.0,0.619891
LogisticRegression,paris-noah/Mantis-8M,0.992155,1.0,0.693121,0.544321,0.958863,0.998051,0.619891,1.0


Friedman Test balanced_accuracy_score: statistic=5.135, pvalue=0.643
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,1.0,0.983489,0.915185,0.999865,0.983489,0.977064,0.999403
LogisticRegression,AutonLab/MOMENT-1-large,1.0,1.0,0.994859,0.958863,0.998887,0.994859,0.992155,0.999945
LogisticRegression,HandcraftedFeatureExtractor,0.983489,0.994859,1.0,0.999981,0.873655,1.0,1.0,0.999945
LogisticRegression,NoneFeatureExtractor,0.915185,0.958863,0.999981,1.0,0.693121,0.999981,0.999995,0.996762
LogisticRegression,amazon/chronos-t5-large,0.999865,0.998887,0.873655,0.693121,1.0,0.873655,0.849079,0.977064
LogisticRegression,amazon/chronos-t5-small,0.983489,0.994859,1.0,0.999981,0.873655,1.0,1.0,0.999945
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.977064,0.992155,1.0,0.999995,0.849079,1.0,1.0,0.999865
LogisticRegression,paris-noah/Mantis-8M,0.999403,0.999945,0.999945,0.996762,0.977064,0.999945,0.999865,1.0


Friedman Test matthews_corrcoef: statistic=3.168, pvalue=0.869
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,1.0,0.999945,0.983489,0.999865,0.999703,0.999981,0.999403
LogisticRegression,AutonLab/MOMENT-1-large,1.0,1.0,0.999865,0.977064,0.999945,0.999403,0.999945,0.998887
LogisticRegression,HandcraftedFeatureExtractor,0.999945,0.999865,1.0,0.999403,0.992155,1.0,1.0,1.0
LogisticRegression,NoneFeatureExtractor,0.983489,0.977064,0.999403,1.0,0.873655,0.999865,0.998887,0.999945
LogisticRegression,amazon/chronos-t5-large,0.999865,0.999945,0.992155,0.873655,1.0,0.983489,0.994859,0.977064
LogisticRegression,amazon/chronos-t5-small,0.999703,0.999403,1.0,0.999865,0.983489,1.0,1.0,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.999981,0.999945,1.0,0.998887,0.994859,1.0,1.0,0.999999
LogisticRegression,paris-noah/Mantis-8M,0.999403,0.998887,1.0,0.999945,0.977064,1.0,0.999999,1.0


Friedman Test f1_score: statistic=15.533, pvalue=0.030
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999945,0.693121,0.329561,1.0,1.0,0.298244,0.999999
LogisticRegression,AutonLab/MOMENT-1-large,0.999945,1.0,0.895699,0.582225,0.999945,0.999999,0.544321,1.0
LogisticRegression,HandcraftedFeatureExtractor,0.693121,0.895699,1.0,0.999403,0.693121,0.792733,0.998887,0.822058
LogisticRegression,NoneFeatureExtractor,0.329561,0.582225,0.999403,1.0,0.329561,0.432513,1.0,0.469151
LogisticRegression,amazon/chronos-t5-large,1.0,0.999945,0.693121,0.329561,1.0,1.0,0.298244,0.999999
LogisticRegression,amazon/chronos-t5-small,1.0,0.999999,0.792733,0.432513,1.0,1.0,0.396881,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.298244,0.544321,0.998887,1.0,0.298244,0.396881,1.0,0.432513
LogisticRegression,paris-noah/Mantis-8M,0.999999,1.0,0.822058,0.469151,0.999999,1.0,0.432513,1.0


Friedman Test precision_score: statistic=3.108, pvalue=0.875
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,1.0,0.999945,0.998887,0.998051,0.999703,0.999999,0.999865
LogisticRegression,AutonLab/MOMENT-1-large,1.0,1.0,0.999403,0.994859,0.999703,0.998051,0.999945,0.998887
LogisticRegression,HandcraftedFeatureExtractor,0.999945,0.999403,1.0,0.999999,0.96893,1.0,1.0,1.0
LogisticRegression,NoneFeatureExtractor,0.998887,0.994859,0.999999,1.0,0.915185,1.0,0.999945,1.0
LogisticRegression,amazon/chronos-t5-large,0.998051,0.999703,0.96893,0.915185,1.0,0.946658,0.988441,0.958863
LogisticRegression,amazon/chronos-t5-small,0.999703,0.998051,1.0,1.0,0.946658,1.0,0.999995,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.999999,0.999945,1.0,0.999945,0.988441,0.999995,1.0,0.999999
LogisticRegression,paris-noah/Mantis-8M,0.999865,0.998887,1.0,1.0,0.958863,1.0,0.999999,1.0


Friedman Test recall_score: statistic=21.546, pvalue=0.003
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.998887,0.761299,0.432513,1.0,0.996762,0.149068,1.0
LogisticRegression,AutonLab/MOMENT-1-large,0.998887,1.0,0.977064,0.822058,0.998887,0.895699,0.469151,0.998051
LogisticRegression,HandcraftedFeatureExtractor,0.761299,0.977064,1.0,0.999703,0.761299,0.298244,0.96893,0.728001
LogisticRegression,NoneFeatureExtractor,0.432513,0.822058,0.999703,1.0,0.432513,0.099381,0.999403,0.396881
LogisticRegression,amazon/chronos-t5-large,1.0,0.998887,0.761299,0.432513,1.0,0.996762,0.149068,1.0
LogisticRegression,amazon/chronos-t5-small,0.996762,0.895699,0.298244,0.099381,0.996762,1.0,0.01978,0.998051
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.149068,0.469151,0.96893,0.999403,0.149068,0.01978,1.0,0.130777
LogisticRegression,paris-noah/Mantis-8M,1.0,0.998051,0.728001,0.396881,1.0,0.998051,0.130777,1.0


Friedman Test roc_auc_score: statistic=nan, pvalue=nan
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,1.0,0.999908,0.998171,0.996822,0.999504,0.999998,0.999773
LogisticRegression,AutonLab/MOMENT-1-large,1.0,1.0,0.999012,0.991762,0.999504,0.996822,0.999908,0.998171
LogisticRegression,HandcraftedFeatureExtractor,0.999908,0.999012,1.0,0.999998,0.952767,1.0,1.0,1.0
LogisticRegression,NoneFeatureExtractor,0.998171,0.991762,0.999998,1.0,0.877288,1.0,0.999908,1.0
LogisticRegression,amazon/chronos-t5-large,0.996822,0.999504,0.952767,0.877288,1.0,0.920838,0.981837,0.9382
LogisticRegression,amazon/chronos-t5-small,0.999504,0.996822,1.0,1.0,0.920838,1.0,0.999991,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.999998,0.999908,1.0,0.999908,0.981837,0.999991,1.0,0.999998
LogisticRegression,paris-noah/Mantis-8M,0.999773,0.998171,1.0,1.0,0.9382,1.0,0.999998,1.0


Unnamed: 0,Aggregator,Dataset,Features,Model,Resampling,Side,Validation,accuracy_score,balanced_accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score,roc_auc_score
2,,apsync,NoneFeatureExtractor,LogisticRegression,,engagement_10sec_medianthresh,LOPO,0.80 ± 0.12,0.57 ± 0.14,0.00 ± 0.01,-0.01 ± 0.01,0.03 ± 0.06,0.00 ± 0.00,0.50 ± 0.00


Error processing group apsync, engagement_10sec_medianthresh, None: At least 3 sets of samples must be given for Friedman test, got 1.


Unnamed: 0,Aggregator,Dataset,Features,Model,Resampling,Side,Validation,accuracy_score,balanced_accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score,roc_auc_score
0,,apsync,NoneFeatureExtractor,LogisticRegression,,enjoyment_10sec,LOPO,0.40 ± 0.18,0.42 ± 0.14,0.53 ± 0.22,-0.02 ± 0.08,0.55 ± 0.20,0.84 ± 0.28,0.49 ± 0.02


Error processing group apsync, enjoyment_10sec, None: At least 3 sets of samples must be given for Friedman test, got 1.


Unnamed: 0,Aggregator,Dataset,Features,Model,Resampling,Side,Validation,accuracy_score,balanced_accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score,roc_auc_score
1,,apsync,NoneFeatureExtractor,LogisticRegression,,immersion_10sec,LOPO,0.25 ± 0.16,0.30 ± 0.16,0.08 ± 0.05,-0.09 ± 0.21,0.42 ± 0.30,0.09 ± 0.10,0.43 ± 0.15


Error processing group apsync, immersion_10sec, None: At least 3 sets of samples must be given for Friedman test, got 1.


Unnamed: 0,Aggregator,Dataset,Features,Model,Resampling,Side,Validation,accuracy_score,balanced_accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score,roc_auc_score
58,,bihearts,HandcraftedFeatureExtractor,DummyClassifier,GroupUnderSampler,left,LOPO,0.38 ± 0.13,0.48 ± 0.04,0.43 ± 0.22,-0.03 ± 0.05,0.55 ± 0.29,0.36 ± 0.18,0.48 ± 0.04
60,MeanChanAggregator,bihearts,AutonLab/MOMENT-1-large,LogisticRegression,GroupUnderSampler,left,LOPO,0.68 ± 0.05,0.62 ± 0.05,0.77 ± 0.05,0.20 ± 0.10,0.88 ± 0.05,0.70 ± 0.08,0.62 ± 0.05
57,,bihearts,HandcraftedFeatureExtractor,LogisticRegression,GroupUnderSampler,left,LOPO,0.72 ± 0.05,0.75 ± 0.07,0.80 ± 0.03,0.37 ± 0.12,0.93 ± 0.05,0.71 ± 0.05,0.75 ± 0.07
6,,bihearts,NoneFeatureExtractor,LogisticRegression,GroupUnderSampler,left,LOPO,0.62 ± 0.07,0.69 ± 0.08,0.71 ± 0.05,0.28 ± 0.13,0.91 ± 0.06,0.59 ± 0.06,0.69 ± 0.08
56,MeanChanAggregator,bihearts,amazon/chronos-t5-large,LogisticRegression,GroupUnderSampler,left,LOPO,0.71 ± 0.03,0.75 ± 0.05,0.80 ± 0.03,0.37 ± 0.09,0.92 ± 0.05,0.71 ± 0.04,0.75 ± 0.05
59,MeanChanAggregator,bihearts,amazon/chronos-t5-small,LogisticRegression,GroupUnderSampler,left,LOPO,0.66 ± 0.01,0.69 ± 0.04,0.76 ± 0.02,0.27 ± 0.05,0.89 ± 0.07,0.66 ± 0.03,0.69 ± 0.04
9,MeanChanAggregator,bihearts,ibm-granite/granite-timeseries-patchtsmixer,LogisticRegression,GroupUnderSampler,left,LOPO,0.60 ± 0.03,0.58 ± 0.02,0.71 ± 0.05,0.12 ± 0.04,0.85 ± 0.06,0.61 ± 0.05,0.58 ± 0.02
8,,bihearts,paris-noah/Mantis-8M,LogisticRegression,GroupUnderSampler,left,LOPO,0.77 ± 0.03,0.78 ± 0.07,0.84 ± 0.02,0.42 ± 0.11,0.93 ± 0.05,0.77 ± 0.04,0.78 ± 0.07


Friedman Test accuracy_score: statistic=38.000, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.063825,0.000552,0.728001,0.003364,0.191192,0.895699,2.5e-05
LogisticRegression,AutonLab/MOMENT-1-large,0.063825,1.0,0.895699,0.895699,0.988441,0.999865,0.728001,0.506521
LogisticRegression,HandcraftedFeatureExtractor,0.000552,0.895699,1.0,0.149068,0.999865,0.656973,0.063825,0.998051
LogisticRegression,NoneFeatureExtractor,0.728001,0.895699,0.149068,1.0,0.362495,0.988441,0.999981,0.023652
LogisticRegression,amazon/chronos-t5-large,0.003364,0.988441,0.999865,0.362495,1.0,0.895699,0.191192,0.958863
LogisticRegression,amazon/chronos-t5-small,0.191192,0.999865,0.656973,0.988441,0.895699,1.0,0.932143,0.240944
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.895699,0.728001,0.063825,0.999981,0.191192,0.932143,1.0,0.007659
LogisticRegression,paris-noah/Mantis-8M,2.5e-05,0.506521,0.998051,0.023652,0.958863,0.240944,0.007659,1.0


Friedman Test balanced_accuracy_score: statistic=38.095, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.656973,0.00034,0.046563,0.001399,0.191192,0.958863,0.000124
LogisticRegression,AutonLab/MOMENT-1-large,0.656973,1.0,0.149068,0.895699,0.298244,0.994859,0.998051,0.086095
LogisticRegression,HandcraftedFeatureExtractor,0.00034,0.149068,1.0,0.895699,0.999981,0.582225,0.023652,0.999999
LogisticRegression,NoneFeatureExtractor,0.046563,0.895699,0.895699,1.0,0.977064,0.999403,0.506521,0.792733
LogisticRegression,amazon/chronos-t5-large,0.001399,0.298244,0.999981,0.977064,1.0,0.792733,0.063825,0.999403
LogisticRegression,amazon/chronos-t5-small,0.191192,0.994859,0.582225,0.999403,0.792733,1.0,0.849079,0.432513
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.958863,0.998051,0.023652,0.506521,0.063825,0.849079,1.0,0.011316
LogisticRegression,paris-noah/Mantis-8M,0.000124,0.086095,0.999999,0.792733,0.999403,0.432513,0.011316,1.0


Friedman Test matthews_corrcoef: statistic=40.381, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.656973,0.000124,0.086095,0.001399,0.240944,0.958863,7.4e-05
LogisticRegression,AutonLab/MOMENT-1-large,0.656973,1.0,0.086095,0.958863,0.298244,0.998051,0.998051,0.063825
LogisticRegression,HandcraftedFeatureExtractor,0.000124,0.086095,1.0,0.656973,0.999403,0.362495,0.011316,1.0
LogisticRegression,NoneFeatureExtractor,0.086095,0.958863,0.656973,1.0,0.932143,0.999865,0.656973,0.582225
LogisticRegression,amazon/chronos-t5-large,0.001399,0.298244,0.999403,0.932143,1.0,0.728001,0.063825,0.998051
LogisticRegression,amazon/chronos-t5-small,0.240944,0.998051,0.362495,0.999865,0.728001,1.0,0.895699,0.298244
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.958863,0.998051,0.011316,0.656973,0.063825,0.895699,1.0,0.007659
LogisticRegression,paris-noah/Mantis-8M,7.4e-05,0.063825,1.0,0.582225,0.998051,0.298244,0.007659,1.0


Friedman Test f1_score: statistic=37.286, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.063825,0.000885,0.958863,0.003364,0.298244,0.792733,7.4e-05
LogisticRegression,AutonLab/MOMENT-1-large,0.063825,1.0,0.932143,0.582225,0.988441,0.998051,0.849079,0.656973
LogisticRegression,HandcraftedFeatureExtractor,0.000885,0.932143,1.0,0.046563,0.999981,0.582225,0.149068,0.999403
LogisticRegression,NoneFeatureExtractor,0.958863,0.582225,0.046563,1.0,0.114243,0.932143,0.999865,0.007659
LogisticRegression,amazon/chronos-t5-large,0.003364,0.988441,0.999981,0.114243,1.0,0.792733,0.298244,0.988441
LogisticRegression,amazon/chronos-t5-small,0.298244,0.998051,0.582225,0.932143,0.792733,1.0,0.994859,0.240944
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.792733,0.849079,0.149068,0.999865,0.298244,0.994859,1.0,0.033441
LogisticRegression,paris-noah/Mantis-8M,7.4e-05,0.656973,0.999403,0.007659,0.988441,0.240944,0.033441,1.0


Friedman Test precision_score: statistic=35.857, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.728001,0.000552,0.005111,0.005111,0.191192,0.932143,0.00034
LogisticRegression,AutonLab/MOMENT-1-large,0.728001,1.0,0.149068,0.432513,0.432513,0.988441,0.999865,0.114243
LogisticRegression,HandcraftedFeatureExtractor,0.000552,0.149068,1.0,0.999403,0.999403,0.656973,0.046563,1.0
LogisticRegression,NoneFeatureExtractor,0.005111,0.432513,0.999403,1.0,1.0,0.932143,0.191192,0.998051
LogisticRegression,amazon/chronos-t5-large,0.005111,0.432513,0.999403,1.0,1.0,0.932143,0.191192,0.998051
LogisticRegression,amazon/chronos-t5-small,0.191192,0.988441,0.656973,0.932143,0.932143,1.0,0.895699,0.582225
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.932143,0.999865,0.046563,0.191192,0.191192,0.895699,1.0,0.033441
LogisticRegression,paris-noah/Mantis-8M,0.00034,0.114243,1.0,0.998051,0.998051,0.582225,0.033441,1.0


Friedman Test recall_score: statistic=31.208, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.023652,0.01648,0.977064,0.006268,0.240944,0.761299,0.00034
LogisticRegression,AutonLab/MOMENT-1-large,0.023652,1.0,1.0,0.298244,0.999945,0.988441,0.693121,0.958863
LogisticRegression,HandcraftedFeatureExtractor,0.01648,1.0,1.0,0.240944,0.999995,0.977064,0.619891,0.977064
LogisticRegression,NoneFeatureExtractor,0.977064,0.298244,0.240944,1.0,0.130777,0.849079,0.998887,0.01648
LogisticRegression,amazon/chronos-t5-large,0.006268,0.999945,0.999995,0.130777,1.0,0.915185,0.432513,0.996762
LogisticRegression,amazon/chronos-t5-small,0.240944,0.988441,0.977064,0.849079,0.915185,1.0,0.992155,0.506521
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.761299,0.693121,0.619891,0.998887,0.432513,0.992155,1.0,0.099381
LogisticRegression,paris-noah/Mantis-8M,0.00034,0.958863,0.977064,0.01648,0.996762,0.506521,0.099381,1.0


Friedman Test roc_auc_score: statistic=38.095, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.656973,0.00034,0.046563,0.001399,0.191192,0.958863,0.000124
LogisticRegression,AutonLab/MOMENT-1-large,0.656973,1.0,0.149068,0.895699,0.298244,0.994859,0.998051,0.086095
LogisticRegression,HandcraftedFeatureExtractor,0.00034,0.149068,1.0,0.895699,0.999981,0.582225,0.023652,0.999999
LogisticRegression,NoneFeatureExtractor,0.046563,0.895699,0.895699,1.0,0.977064,0.999403,0.506521,0.792733
LogisticRegression,amazon/chronos-t5-large,0.001399,0.298244,0.999981,0.977064,1.0,0.792733,0.063825,0.999403
LogisticRegression,amazon/chronos-t5-small,0.191192,0.994859,0.582225,0.999403,0.792733,1.0,0.849079,0.432513
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.958863,0.998051,0.023652,0.506521,0.063825,0.849079,1.0,0.011316
LogisticRegression,paris-noah/Mantis-8M,0.000124,0.086095,0.999999,0.792733,0.999403,0.432513,0.011316,1.0


Unnamed: 0,Aggregator,Dataset,Features,Model,Resampling,Side,Validation,accuracy_score,balanced_accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score,roc_auc_score
12,,seed,HandcraftedFeatureExtractor,DummyClassifier,,engagement_10sec_35thresh,LOPO,0.55 ± 0.05,0.51 ± 0.02,0.59 ± 0.12,-0.01 ± 0.01,0.64 ± 0.15,0.65 ± 0.14,0.50 ± 0.03
15,MeanChanAggregator,seed,AutonLab/MOMENT-1-large,LogisticRegression,,engagement_10sec_35thresh,LOPO,0.63 ± 0.15,0.61 ± 0.13,0.71 ± 0.14,-0.00 ± 0.00,0.64 ± 0.15,0.90 ± 0.12,0.50 ± 0.00
11,,seed,HandcraftedFeatureExtractor,LogisticRegression,,engagement_10sec_35thresh,LOPO,0.62 ± 0.15,0.60 ± 0.12,0.70 ± 0.14,0.00 ± 0.01,0.64 ± 0.15,0.89 ± 0.13,0.50 ± 0.00
5,,seed,NoneFeatureExtractor,LogisticRegression,,engagement_10sec_35thresh,LOPO,0.64 ± 0.15,0.61 ± 0.13,0.71 ± 0.14,-0.00 ± 0.00,0.64 ± 0.15,0.91 ± 0.12,0.50 ± 0.00
17,,seed,amazon/chronos-t5-large,LogisticRegression,,engagement_10sec_35thresh,LOPO,0.64 ± 0.15,0.61 ± 0.13,0.71 ± 0.14,0.00 ± 0.00,0.64 ± 0.15,0.91 ± 0.12,0.50 ± 0.00
14,MeanChanAggregator,seed,amazon/chronos-t5-small,LogisticRegression,,engagement_10sec_35thresh,LOPO,0.64 ± 0.15,0.61 ± 0.13,0.71 ± 0.14,-0.00 ± 0.00,0.64 ± 0.15,0.90 ± 0.12,0.50 ± 0.00
13,MeanChanAggregator,seed,ibm-granite/granite-timeseries-patchtsmixer,LogisticRegression,,engagement_10sec_35thresh,LOPO,0.64 ± 0.15,0.61 ± 0.13,0.71 ± 0.14,0.00 ± 0.00,0.64 ± 0.15,0.91 ± 0.12,0.50 ± 0.00
16,,seed,paris-noah/Mantis-8M,LogisticRegression,,engagement_10sec_35thresh,LOPO,0.56 ± 0.11,0.54 ± 0.10,0.63 ± 0.13,-0.01 ± 0.03,0.64 ± 0.15,0.73 ± 0.12,0.50 ± 0.03


Friedman Test accuracy_score: statistic=14.587, pvalue=0.042
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.989299,0.999976,0.999925,0.940639,0.994624,0.940639,0.890315
LogisticRegression,AutonLab/MOMENT-1-large,0.989299,1.0,0.932159,0.999803,0.999987,1.0,0.999987,0.360833
LogisticRegression,HandcraftedFeatureExtractor,0.999976,0.932159,1.0,0.995826,0.803552,0.955355,0.803552,0.976696
LogisticRegression,NoneFeatureExtractor,0.999925,0.999803,0.995826,1.0,0.994624,0.999956,0.994624,0.671989
LogisticRegression,amazon/chronos-t5-large,0.940639,0.999987,0.803552,0.994624,1.0,0.999925,1.0,0.198396
LogisticRegression,amazon/chronos-t5-small,0.994624,1.0,0.955355,0.999956,0.999925,1.0,0.999925,0.419657
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.940639,0.999987,0.803552,0.994624,1.0,0.999925,1.0,0.198396
LogisticRegression,paris-noah/Mantis-8M,0.890315,0.360833,0.976696,0.671989,0.198396,0.419657,0.198396,1.0


Friedman Test balanced_accuracy_score: statistic=20.552, pvalue=0.004
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.460685,0.890315,0.731542,0.272423,0.399644,0.272423,0.999999
LogisticRegression,AutonLab/MOMENT-1-large,0.460685,1.0,0.996803,0.999925,0.999987,1.0,0.999987,0.323878
LogisticRegression,HandcraftedFeatureExtractor,0.890315,0.996803,1.0,0.999987,0.972283,0.99316,0.972283,0.78649
LogisticRegression,NoneFeatureExtractor,0.731542,0.999925,0.999987,1.0,0.996803,0.999697,0.996803,0.588104
LogisticRegression,amazon/chronos-t5-large,0.272423,0.999987,0.972283,0.996803,1.0,0.999999,1.0,0.173045
LogisticRegression,amazon/chronos-t5-small,0.399644,1.0,0.99316,0.999697,0.999999,1.0,0.999999,0.272423
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.272423,0.999987,0.972283,0.996803,1.0,0.999999,1.0,0.173045
LogisticRegression,paris-noah/Mantis-8M,0.999999,0.323878,0.78649,0.588104,0.173045,0.272423,0.173045,1.0


Friedman Test matthews_corrcoef: statistic=6.466, pvalue=0.486
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999697,0.998206,1.0,0.998206,0.991398,0.998206,0.999803
LogisticRegression,AutonLab/MOMENT-1-large,0.999697,1.0,1.0,0.999976,1.0,0.999976,1.0,0.980563
LogisticRegression,HandcraftedFeatureExtractor,0.998206,1.0,1.0,0.999697,1.0,1.0,1.0,0.955355
LogisticRegression,NoneFeatureExtractor,1.0,0.999976,0.999697,1.0,0.999697,0.997586,0.999697,0.998689
LogisticRegression,amazon/chronos-t5-large,0.998206,1.0,1.0,0.999697,1.0,1.0,1.0,0.955355
LogisticRegression,amazon/chronos-t5-small,0.991398,0.999976,1.0,0.997586,1.0,1.0,1.0,0.901989
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.998206,1.0,1.0,0.999697,1.0,1.0,1.0,0.955355
LogisticRegression,paris-noah/Mantis-8M,0.999803,0.980563,0.955355,0.998689,0.955355,0.901989,0.955355,1.0


Friedman Test f1_score: statistic=62.227, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.074022,0.481605,0.111098,0.010672,0.173045,0.010672,0.932159
LogisticRegression,AutonLab/MOMENT-1-large,0.074022,1.0,0.986821,1.0,0.998689,0.999976,0.998689,0.001109
LogisticRegression,HandcraftedFeatureExtractor,0.481605,0.986821,1.0,0.995826,0.803552,0.99934,0.803552,0.02986
LogisticRegression,NoneFeatureExtractor,0.111098,1.0,0.995826,1.0,0.994624,1.0,0.994624,0.00209
LogisticRegression,amazon/chronos-t5-large,0.010672,0.998689,0.803552,0.994624,1.0,0.980563,1.0,6.8e-05
LogisticRegression,amazon/chronos-t5-small,0.173045,0.999976,0.99934,1.0,0.980563,1.0,0.980563,0.004315
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.010672,0.998689,0.803552,0.994624,1.0,0.980563,1.0,6.8e-05
LogisticRegression,paris-noah/Mantis-8M,0.932159,0.001109,0.02986,0.00209,6.8e-05,0.004315,6.8e-05,1.0


Friedman Test precision_score: statistic=6.798, pvalue=0.450
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.998689,0.996803,0.999999,0.996803,0.986821,0.996803,0.999925
LogisticRegression,AutonLab/MOMENT-1-large,0.998689,1.0,1.0,0.999925,1.0,0.999994,1.0,0.972283
LogisticRegression,HandcraftedFeatureExtractor,0.996803,1.0,1.0,0.999697,1.0,1.0,1.0,0.955355
LogisticRegression,NoneFeatureExtractor,0.999999,0.999925,0.999697,1.0,0.999697,0.997586,0.999697,0.998689
LogisticRegression,amazon/chronos-t5-large,0.996803,1.0,1.0,0.999697,1.0,1.0,1.0,0.955355
LogisticRegression,amazon/chronos-t5-small,0.986821,0.999994,1.0,0.997586,1.0,1.0,1.0,0.901989
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.996803,1.0,1.0,0.999697,1.0,1.0,1.0,0.955355
LogisticRegression,paris-noah/Mantis-8M,0.999925,0.972283,0.955355,0.998689,0.955355,0.901989,0.955355,1.0


Friedman Test recall_score: statistic=81.063, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.047753,0.989299,0.062355,0.004851,0.139505,0.004851,0.835554
LogisticRegression,AutonLab/MOMENT-1-large,0.047753,1.0,0.360833,1.0,0.997586,0.999925,0.997586,0.000189
LogisticRegression,HandcraftedFeatureExtractor,0.989299,0.360833,1.0,0.419657,0.080494,0.630482,0.080494,0.289015
LogisticRegression,NoneFeatureExtractor,0.062355,1.0,0.419657,1.0,0.994624,0.999987,0.994624,0.000289
LogisticRegression,amazon/chronos-t5-large,0.004851,0.997586,0.080494,0.994624,1.0,0.96165,1.0,6e-06
LogisticRegression,amazon/chronos-t5-small,0.139505,0.999925,0.630482,0.999987,0.96165,1.0,0.96165,0.001109
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.004851,0.997586,0.080494,0.994624,1.0,0.96165,1.0,6e-06
LogisticRegression,paris-noah/Mantis-8M,0.835554,0.000189,0.289015,0.000289,6e-06,0.001109,6e-06,1.0


Friedman Test roc_auc_score: statistic=nan, pvalue=nan
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.993132,0.984171,0.999993,0.984171,0.942831,0.984171,0.999563
LogisticRegression,AutonLab/MOMENT-1-large,0.993132,1.0,1.0,0.999563,1.0,0.999963,1.0,0.891167
LogisticRegression,HandcraftedFeatureExtractor,0.984171,1.0,1.0,0.998305,1.0,0.999998,1.0,0.838108
LogisticRegression,NoneFeatureExtractor,0.999993,0.999563,0.998305,1.0,0.998305,0.987811,0.998305,0.993132
LogisticRegression,amazon/chronos-t5-large,0.984171,1.0,1.0,0.998305,1.0,0.999998,1.0,0.838108
LogisticRegression,amazon/chronos-t5-small,0.942831,0.999963,0.999998,0.987811,0.999998,1.0,0.999998,0.699517
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.984171,1.0,1.0,0.998305,1.0,0.999998,1.0,0.838108
LogisticRegression,paris-noah/Mantis-8M,0.999563,0.891167,0.838108,0.993132,0.838108,0.699517,0.838108,1.0


Unnamed: 0,Aggregator,Dataset,Features,Model,Resampling,Side,Validation,accuracy_score,balanced_accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score,roc_auc_score
24,,usilaughs,HandcraftedFeatureExtractor,DummyClassifier,,left,LOPO,0.49 ± 0.03,0.47 ± 0.02,0.31 ± 0.07,-0.06 ± 0.05,0.31 ± 0.07,0.31 ± 0.07,0.47 ± 0.02
26,MeanChanAggregator,usilaughs,AutonLab/MOMENT-1-large,LogisticRegression,,left,LOPO,0.54 ± 0.04,0.53 ± 0.04,0.43 ± 0.05,0.07 ± 0.08,0.49 ± 0.06,0.40 ± 0.06,0.53 ± 0.04
23,,usilaughs,HandcraftedFeatureExtractor,LogisticRegression,,left,LOPO,0.64 ± 0.07,0.63 ± 0.07,0.58 ± 0.08,0.30 ± 0.15,0.67 ± 0.11,0.58 ± 0.10,0.63 ± 0.07
22,MeanChanAggregator,usilaughs,amazon/chronos-t5-large,LogisticRegression,,left,LOPO,0.71 ± 0.06,0.70 ± 0.06,0.63 ± 0.09,0.43 ± 0.13,0.72 ± 0.11,0.62 ± 0.11,0.70 ± 0.06
25,MeanChanAggregator,usilaughs,amazon/chronos-t5-small,LogisticRegression,,left,LOPO,0.70 ± 0.07,0.69 ± 0.07,0.61 ± 0.10,0.40 ± 0.14,0.68 ± 0.11,0.61 ± 0.11,0.69 ± 0.07


Friedman Test accuracy_score: statistic=46.851, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.286002,0.016439,2.512613e-08,1e-05
LogisticRegression,AutonLab/MOMENT-1-large,0.2860024,1.0,0.783543,0.0006037305,0.02723
LogisticRegression,HandcraftedFeatureExtractor,0.01643881,0.783543,1.0,0.03464118,0.375652
LogisticRegression,amazon/chronos-t5-large,2.512613e-08,0.000604,0.034641,1.0,0.826275
LogisticRegression,amazon/chronos-t5-small,9.53622e-06,0.02723,0.375652,0.8262746,1.0


Friedman Test balanced_accuracy_score: statistic=46.099, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.265686,0.012628,1.955751e-08,8e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.2656858,1.0,0.760677,0.0006037305,0.02723
LogisticRegression,HandcraftedFeatureExtractor,0.01262753,0.760677,1.0,0.03895772,0.399924
LogisticRegression,amazon/chronos-t5-large,1.955751e-08,0.000604,0.038958,1.0,0.826275
LogisticRegression,amazon/chronos-t5-small,7.746619e-06,0.02723,0.399924,0.8262746,1.0


Friedman Test matthews_corrcoef: statistic=45.064, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.193592,0.00632,3.222757e-08,4e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.1935917,1.0,0.736935,0.001620493,0.030743
LogisticRegression,HandcraftedFeatureExtractor,0.006319986,0.736935,1.0,0.08382349,0.450246
LogisticRegression,amazon/chronos-t5-large,3.222757e-08,0.00162,0.083823,1.0,0.912338
LogisticRegression,amazon/chronos-t5-small,4.111551e-06,0.030743,0.450246,0.9123379,1.0


Friedman Test f1_score: statistic=41.696, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.897689,0.011036,8.85139e-07,0.000253
LogisticRegression,AutonLab/MOMENT-1-large,0.8976892,1.0,0.136288,8.404476e-05,0.008383
LogisticRegression,HandcraftedFeatureExtractor,0.0110361,0.136288,1.0,0.2102181,0.864477
LogisticRegression,amazon/chronos-t5-large,8.85139e-07,8.4e-05,0.210218,1.0,0.783543
LogisticRegression,amazon/chronos-t5-small,0.0002527819,0.008383,0.864477,0.7835433,1.0


Friedman Test precision_score: statistic=49.703, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.04898,7e-05,9.133075e-09,5e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.04898045,1.0,0.399924,0.006319986,0.149262
LogisticRegression,HandcraftedFeatureExtractor,6.95391e-05,0.399924,1.0,0.4761338,0.984218
LogisticRegression,amazon/chronos-t5-large,9.133075e-09,0.00632,0.476134,1.0,0.805438
LogisticRegression,amazon/chronos-t5-small,5.086584e-06,0.149262,0.984218,0.8054383,1.0


Friedman Test recall_score: statistic=37.333, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.760677,0.004081,1e-05,0.000428
LogisticRegression,AutonLab/MOMENT-1-large,0.760677,1.0,0.136288,0.001899,0.030743
LogisticRegression,HandcraftedFeatureExtractor,0.004081,0.136288,1.0,0.63538,0.979204
LogisticRegression,amazon/chronos-t5-large,1e-05,0.001899,0.63538,1.0,0.925665
LogisticRegression,amazon/chronos-t5-small,0.000428,0.030743,0.979204,0.925665,1.0


Friedman Test roc_auc_score: statistic=46.099, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.265686,0.012628,1.955751e-08,8e-06
LogisticRegression,AutonLab/MOMENT-1-large,0.2656858,1.0,0.760677,0.0006037305,0.02723
LogisticRegression,HandcraftedFeatureExtractor,0.01262753,0.760677,1.0,0.03895772,0.399924
LogisticRegression,amazon/chronos-t5-large,1.955751e-08,0.000604,0.038958,1.0,0.826275
LogisticRegression,amazon/chronos-t5-small,7.746619e-06,0.02723,0.399924,0.8262746,1.0


Unnamed: 0,Aggregator,Dataset,Features,Model,Resampling,Side,Validation,accuracy_score,balanced_accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score,roc_auc_score
45,,usilaughs,HandcraftedFeatureExtractor,DummyClassifier,GroupUnderSampler,right,LOPO,0.54 ± 0.03,0.53 ± 0.03,0.42 ± 0.08,0.06 ± 0.06,0.39 ± 0.08,0.46 ± 0.09,0.53 ± 0.03
44,,usilaughs,HandcraftedFeatureExtractor,LogisticRegression,GroupUnderSampler,right,LOPO,0.73 ± 0.10,0.74 ± 0.10,0.78 ± 0.08,0.48 ± 0.21,0.75 ± 0.11,0.86 ± 0.06,0.74 ± 0.10
43,MeanChanAggregator,usilaughs,amazon/chronos-t5-large,LogisticRegression,GroupUnderSampler,right,LOPO,0.74 ± 0.07,0.74 ± 0.07,0.71 ± 0.08,0.49 ± 0.14,0.74 ± 0.10,0.74 ± 0.10,0.74 ± 0.07


Friedman Test accuracy_score: statistic=13.611, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.004454,0.006822
LogisticRegression,HandcraftedFeatureExtractor,0.004454,1.0,0.990854
LogisticRegression,amazon/chronos-t5-large,0.006822,0.990854,1.0


Friedman Test balanced_accuracy_score: statistic=13.310, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.003576,0.008391
LogisticRegression,HandcraftedFeatureExtractor,0.003576,1.0,0.963917
LogisticRegression,amazon/chronos-t5-large,0.008391,0.963917,1.0


Friedman Test matthews_corrcoef: statistic=14.966, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.002276,0.003576
LogisticRegression,HandcraftedFeatureExtractor,0.002276,1.0,0.990854
LogisticRegression,amazon/chronos-t5-large,0.003576,0.990854,1.0


Friedman Test f1_score: statistic=25.737, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,4e-06,0.002859
LogisticRegression,HandcraftedFeatureExtractor,4e-06,1.0,0.268023
LogisticRegression,amazon/chronos-t5-large,0.002859,0.268023,1.0


Friedman Test precision_score: statistic=23.138, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.000243,0.000185
LogisticRegression,HandcraftedFeatureExtractor,0.000243,1.0,0.997705
LogisticRegression,amazon/chronos-t5-large,0.000185,0.997705,1.0


Friedman Test recall_score: statistic=32.849, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,2.525956e-07,0.004454
LogisticRegression,HandcraftedFeatureExtractor,2.525956e-07,1.0,0.072064
LogisticRegression,amazon/chronos-t5-large,0.004453623,0.07206404,1.0


Friedman Test roc_auc_score: statistic=13.310, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,HandcraftedFeatureExtractor,amazon/chronos-t5-large
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.003576,0.008391
LogisticRegression,HandcraftedFeatureExtractor,0.003576,1.0,0.963917
LogisticRegression,amazon/chronos-t5-large,0.008391,0.963917,1.0


Unnamed: 0,Aggregator,Dataset,Features,Model,Resampling,Side,Validation,accuracy_score,balanced_accuracy_score,f1_score,matthews_corrcoef,precision_score,recall_score,roc_auc_score
38,,usilaughs,HandcraftedFeatureExtractor,DummyClassifier,,right,LOPO,0.52 ± 0.03,0.51 ± 0.03,0.40 ± 0.06,0.01 ± 0.07,0.40 ± 0.06,0.41 ± 0.07,0.51 ± 0.03
32,CatAggregator,usilaughs,AutonLab/MOMENT-1-large,LogisticRegression,,right,LOPO,0.55 ± 0.05,0.54 ± 0.05,0.43 ± 0.07,0.08 ± 0.10,0.48 ± 0.07,0.42 ± 0.08,0.54 ± 0.05
40,MeanChanAggregator,usilaughs,AutonLab/MOMENT-1-large,LogisticRegression,,right,LOPO,0.57 ± 0.04,0.55 ± 0.04,0.40 ± 0.07,0.12 ± 0.11,0.57 ± 0.11,0.34 ± 0.07,0.55 ± 0.04
33,MeanTimeAggregator,usilaughs,AutonLab/MOMENT-1-large,LogisticRegression,,right,LOPO,0.55 ± 0.05,0.54 ± 0.05,0.43 ± 0.07,0.08 ± 0.10,0.48 ± 0.07,0.42 ± 0.08,0.54 ± 0.05
37,,usilaughs,HandcraftedFeatureExtractor,LogisticRegression,,right,LOPO,0.72 ± 0.10,0.73 ± 0.09,0.74 ± 0.09,0.47 ± 0.20,0.75 ± 0.11,0.78 ± 0.08,0.73 ± 0.09
7,,usilaughs,NoneFeatureExtractor,LogisticRegression,,right,LOPO,0.69 ± 0.12,0.68 ± 0.12,0.63 ± 0.14,0.39 ± 0.25,0.74 ± 0.14,0.61 ± 0.14,0.68 ± 0.12
28,CatAggregator,usilaughs,amazon/chronos-t5-large,LogisticRegression,,right,LOPO,0.74 ± 0.07,0.73 ± 0.07,0.68 ± 0.09,0.48 ± 0.15,0.74 ± 0.11,0.67 ± 0.10,0.73 ± 0.07
36,MeanChanAggregator,usilaughs,amazon/chronos-t5-large,LogisticRegression,,right,LOPO,0.73 ± 0.07,0.72 ± 0.07,0.66 ± 0.09,0.48 ± 0.15,0.77 ± 0.10,0.66 ± 0.11,0.72 ± 0.07
30,MeanTimeAggregator,usilaughs,amazon/chronos-t5-large,LogisticRegression,,right,LOPO,0.76 ± 0.07,0.76 ± 0.07,0.72 ± 0.09,0.53 ± 0.15,0.77 ± 0.10,0.74 ± 0.10,0.76 ± 0.07
29,CatAggregator,usilaughs,amazon/chronos-t5-small,LogisticRegression,,right,LOPO,0.75 ± 0.08,0.74 ± 0.08,0.69 ± 0.11,0.50 ± 0.18,0.77 ± 0.11,0.69 ± 0.11,0.74 ± 0.08


Friedman Test accuracy_score: statistic=31.441, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.902427,0.003694,0.006088,0.028513,0.003694,0.001281,0.033637
LogisticRegression,AutonLab/MOMENT-1-large,0.902427,1.0,0.193743,0.255293,0.53438,0.193743,0.103382,0.571004
LogisticRegression,HandcraftedFeatureExtractor,0.003694,0.193743,1.0,1.0,0.999111,1.0,0.999996,0.998438
LogisticRegression,NoneFeatureExtractor,0.006088,0.255293,1.0,1.0,0.999839,1.0,0.999931,0.999661
LogisticRegression,amazon/chronos-t5-large,0.028513,0.53438,0.999111,0.999839,1.0,0.999111,0.990595,1.0
LogisticRegression,amazon/chronos-t5-small,0.003694,0.193743,1.0,1.0,0.999111,1.0,0.999996,0.998438
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001281,0.103382,0.999996,0.999931,0.990595,0.999996,1.0,0.986508
LogisticRegression,paris-noah/Mantis-8M,0.033637,0.571004,0.998438,0.999661,1.0,0.998438,0.986508,1.0


Friedman Test balanced_accuracy_score: statistic=33.418, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.943196,0.000919,0.008137,0.028513,0.004997,0.001281,0.020272
LogisticRegression,AutonLab/MOMENT-1-large,0.943196,1.0,0.058304,0.229335,0.444228,0.172252,0.07279,0.375751
LogisticRegression,HandcraftedFeatureExtractor,0.000919,0.058304,1.0,0.999344,0.98401,0.999893,1.0,0.992236
LogisticRegression,NoneFeatureExtractor,0.008137,0.229335,0.999344,1.0,0.999957,1.0,0.999764,0.999996
LogisticRegression,amazon/chronos-t5-large,0.028513,0.444228,0.98401,0.999957,1.0,0.999661,0.990595,1.0
LogisticRegression,amazon/chronos-t5-small,0.004997,0.172252,0.999893,1.0,0.999661,1.0,0.999974,0.999931
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001281,0.07279,1.0,0.999764,0.990595,0.999974,1.0,0.99585
LogisticRegression,paris-noah/Mantis-8M,0.020272,0.375751,0.992236,0.999996,1.0,0.999931,0.99585,1.0


Friedman Test matthews_corrcoef: statistic=34.350, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.91175,0.001429,0.004089,0.022106,0.003334,0.000463,0.018574
LogisticRegression,AutonLab/MOMENT-1-large,0.91175,1.0,0.103382,0.193743,0.461957,0.172252,0.050053,0.426715
LogisticRegression,HandcraftedFeatureExtractor,0.001429,0.103382,1.0,0.999996,0.99585,0.999999,0.999996,0.997396
LogisticRegression,NoneFeatureExtractor,0.004089,0.193743,0.999996,1.0,0.999764,1.0,0.999524,0.999893
LogisticRegression,amazon/chronos-t5-large,0.022106,0.461957,0.99585,0.999764,1.0,0.999524,0.974378,1.0
LogisticRegression,amazon/chronos-t5-small,0.003334,0.172252,0.999999,1.0,0.999524,1.0,0.999764,0.999764
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000463,0.050053,0.999996,0.999524,0.974378,0.999764,1.0,0.981174
LogisticRegression,paris-noah/Mantis-8M,0.018574,0.426715,0.997396,0.999893,1.0,0.999764,0.981174,1.0


Friedman Test f1_score: statistic=45.309, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999893,1.7e-05,0.033637,0.033637,0.014215,0.000463,0.020272
LogisticRegression,AutonLab/MOMENT-1-large,0.999893,1.0,0.000139,0.110577,0.110577,0.054046,0.002711,0.07279
LogisticRegression,HandcraftedFeatureExtractor,1.7e-05,0.000139,1.0,0.607503,0.607503,0.777274,0.997396,0.713113
LogisticRegression,NoneFeatureExtractor,0.033637,0.110577,0.607503,1.0,1.0,0.999996,0.949652,1.0
LogisticRegression,amazon/chronos-t5-large,0.033637,0.110577,0.607503,1.0,1.0,0.999996,0.949652,1.0
LogisticRegression,amazon/chronos-t5-small,0.014215,0.054046,0.777274,0.999996,0.999996,1.0,0.988694,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000463,0.002711,0.997396,0.949652,0.949652,0.988694,1.0,0.977972
LogisticRegression,paris-noah/Mantis-8M,0.020272,0.07279,0.713113,1.0,1.0,1.0,0.977972,1.0


Friedman Test precision_score: statistic=41.356, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.182775,0.000288,0.000288,0.000256,0.000919,0.000108,0.001975
LogisticRegression,AutonLab/MOMENT-1-large,0.182775,1.0,0.571004,0.571004,0.552689,0.746005,0.426715,0.846889
LogisticRegression,HandcraftedFeatureExtractor,0.000288,0.571004,1.0,1.0,1.0,0.999996,0.999999,0.999839
LogisticRegression,NoneFeatureExtractor,0.000288,0.571004,1.0,1.0,1.0,0.999996,0.999999,0.999839
LogisticRegression,amazon/chronos-t5-large,0.000256,0.552689,1.0,1.0,1.0,0.999992,1.0,0.999764
LogisticRegression,amazon/chronos-t5-small,0.000919,0.746005,0.999996,0.999996,0.999992,1.0,0.999764,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.000108,0.426715,0.999999,0.999999,1.0,0.999764,1.0,0.997971
LogisticRegression,paris-noah/Mantis-8M,0.001975,0.846889,0.999839,0.999839,0.999764,1.0,0.997971,1.0


Friedman Test recall_score: statistic=55.684, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.9840101,6.521091e-05,0.497935,0.217023,0.229335,0.001775,0.07823
LogisticRegression,AutonLab/MOMENT-1-large,0.98401,1.0,4.754837e-07,0.07279,0.017004,0.018574,2.6e-05,0.003694
LogisticRegression,HandcraftedFeatureExtractor,6.5e-05,4.754837e-07,1.0,0.110577,0.312379,0.297484,0.99585,0.589289
LogisticRegression,NoneFeatureExtractor,0.497935,0.07278964,0.1105769,1.0,0.999764,0.999839,0.47987,0.986508
LogisticRegression,amazon/chronos-t5-large,0.217023,0.01700433,0.3123791,0.999764,1.0,1.0,0.792225,0.999893
LogisticRegression,amazon/chronos-t5-small,0.229335,0.01857439,0.2974837,0.999839,1.0,1.0,0.777274,0.999839
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001775,2.610858e-05,0.9958502,0.47987,0.792225,0.777274,1.0,0.955576
LogisticRegression,paris-noah/Mantis-8M,0.07823,0.003693698,0.5892887,0.986508,0.999893,0.999839,0.955576,1.0


Friedman Test roc_auc_score: statistic=32.411, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.949652,0.001148,0.009833,0.033637,0.006088,0.001593,0.030983
LogisticRegression,AutonLab/MOMENT-1-large,0.949652,1.0,0.062839,0.242093,0.461957,0.182775,0.07823,0.444228
LogisticRegression,HandcraftedFeatureExtractor,0.001148,0.062839,1.0,0.999344,0.98401,0.999893,1.0,0.986508
LogisticRegression,NoneFeatureExtractor,0.009833,0.242093,0.999344,1.0,0.999957,1.0,0.999764,0.999974
LogisticRegression,amazon/chronos-t5-large,0.033637,0.461957,0.98401,0.999957,1.0,0.999661,0.990595,1.0
LogisticRegression,amazon/chronos-t5-small,0.006088,0.182775,0.999893,1.0,0.999661,1.0,0.999974,0.999764
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.001593,0.07823,1.0,0.999764,0.990595,0.999974,1.0,0.992236
LogisticRegression,paris-noah/Mantis-8M,0.030983,0.444228,0.986508,0.999974,1.0,0.999764,0.992236,1.0


In [3]:
results_lopo = present_results(
    all_results,
    val_method="tacv",
    remove_xgboost=True,
    remove_chronos_small_from_test=True,
)

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
39,apsync,engagement_10sec_4thresh,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.560551,0.050561,0.503783,0.005718,0.005368,0.011405,0.663263,0.110992,0.664136,0.190783,0.734895,0.219935,0.503433,0.007327
41,apsync,engagement_10sec_4thresh,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.607922,0.14102,0.567032,0.137903,0.000652,0.061786,0.737747,0.109768,0.660955,0.19318,0.894366,0.06269,0.498298,0.028745
36,apsync,engagement_10sec_4thresh,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.667375,0.172993,0.594323,0.189552,0.015067,0.038193,0.75949,0.162466,0.660075,0.198181,0.919078,0.134397,0.498733,0.01339
35,apsync,engagement_10sec_4thresh,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.649133,0.169867,0.5935,0.178338,0.001016,0.035938,0.759958,0.134301,0.663225,0.191739,0.92928,0.043862,0.503533,0.010989
40,apsync,engagement_10sec_4thresh,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.626427,0.158589,0.591177,0.154383,0.039567,0.050727,0.743578,0.123521,0.667028,0.19259,0.892729,0.080608,0.513582,0.019573
38,apsync,engagement_10sec_4thresh,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.661483,0.194475,0.6,0.198,0.0,0.0,0.779894,0.138221,0.661483,0.194475,1.0,0.0,0.5,0.0
37,apsync,engagement_10sec_4thresh,LogisticRegression,,paris-noah/Mantis-8M,,TACV,0.577785,0.059046,0.542561,0.071585,0.027412,0.112598,0.663833,0.092072,0.676391,0.180319,0.675645,0.078516,0.519523,0.071223


Friedman Test accuracy_score: statistic=5.978, pvalue=0.426
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.67574,0.999812,0.963987,0.948498,0.999991
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.57761,0.998694,0.928962,0.905096,1.0
LogisticRegression,HandcraftedFeatureExtractor,0.67574,0.57761,1.0,0.876775,0.994704,0.997234,0.527684
LogisticRegression,amazon/chronos-t5-large,0.999812,0.998694,0.876775,1.0,0.997234,0.994704,0.997234
LogisticRegression,amazon/chronos-t5-small,0.963987,0.928962,0.994704,0.997234,1.0,1.0,0.905096
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.948498,0.905096,0.997234,0.994704,1.0,1.0,0.876775
LogisticRegression,paris-noah/Mantis-8M,0.999991,1.0,0.527684,0.997234,0.905096,0.876775,1.0


Friedman Test balanced_accuracy_score: statistic=2.196, pvalue=0.901
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999991,0.963987,0.990645,0.905096,0.990645,0.999949
LogisticRegression,AutonLab/MOMENT-1-large,0.999991,1.0,0.990645,0.998694,0.963987,0.998694,1.0
LogisticRegression,HandcraftedFeatureExtractor,0.963987,0.990645,1.0,0.999991,0.999991,0.999991,0.994704
LogisticRegression,amazon/chronos-t5-large,0.990645,0.998694,0.999991,1.0,0.999459,1.0,0.999459
LogisticRegression,amazon/chronos-t5-small,0.905096,0.963987,0.999991,0.999459,1.0,0.999459,0.975835
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.990645,0.998694,0.999991,1.0,0.999459,1.0,0.999459
LogisticRegression,paris-noah/Mantis-8M,0.999949,1.0,0.994704,0.999459,0.975835,0.999459,1.0


Friedman Test matthews_corrcoef: statistic=2.427, pvalue=0.877
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,1.0,0.999812,0.999949,0.975835,0.999999,0.999949
LogisticRegression,AutonLab/MOMENT-1-large,1.0,1.0,0.999459,0.999991,0.963987,1.0,0.999812
LogisticRegression,HandcraftedFeatureExtractor,0.999812,0.999459,1.0,0.994704,0.998694,0.998694,1.0
LogisticRegression,amazon/chronos-t5-large,0.999949,0.999991,0.994704,1.0,0.905096,0.999999,0.997234
LogisticRegression,amazon/chronos-t5-small,0.975835,0.963987,0.998694,0.905096,1.0,0.948498,0.997234
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.999999,1.0,0.998694,0.999999,0.948498,1.0,0.999459
LogisticRegression,paris-noah/Mantis-8M,0.999949,0.999812,1.0,0.997234,0.997234,0.999459,1.0


Friedman Test f1_score: statistic=13.804, pvalue=0.032
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.998694,0.627226,0.990645,0.999459,0.163298,0.990645
LogisticRegression,AutonLab/MOMENT-1-large,0.998694,1.0,0.905096,0.999991,1.0,0.429896,0.876775
LogisticRegression,HandcraftedFeatureExtractor,0.627226,0.905096,1.0,0.963987,0.876775,0.98454,0.191943
LogisticRegression,amazon/chronos-t5-large,0.990645,0.999991,0.963987,1.0,0.999949,0.57761,0.766379
LogisticRegression,amazon/chronos-t5-small,0.999459,1.0,0.876775,0.999949,1.0,0.383365,0.905096
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.163298,0.429896,0.98454,0.57761,0.383365,1.0,0.021791
LogisticRegression,paris-noah/Mantis-8M,0.990645,0.876775,0.191943,0.766379,0.905096,0.021791,1.0


Friedman Test precision_score: statistic=1.282, pvalue=0.973
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999991,1.0,0.999999,0.997234,0.999949,1.0
LogisticRegression,AutonLab/MOMENT-1-large,0.999991,1.0,0.999949,1.0,0.98454,1.0,0.999991
LogisticRegression,HandcraftedFeatureExtractor,1.0,0.999949,1.0,0.999991,0.998694,0.999812,1.0
LogisticRegression,amazon/chronos-t5-large,0.999999,1.0,0.999991,1.0,0.990645,0.999999,0.999999
LogisticRegression,amazon/chronos-t5-small,0.997234,0.98454,0.998694,0.990645,1.0,0.975835,0.997234
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.999949,1.0,0.999812,0.999999,0.975835,1.0,0.999949
LogisticRegression,paris-noah/Mantis-8M,1.0,0.999991,1.0,0.999999,0.997234,0.999949,1.0


Friedman Test recall_score: statistic=15.604, pvalue=0.016
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999459,0.766379,0.98454,0.998694,0.163298,0.928962
LogisticRegression,AutonLab/MOMENT-1-large,0.999459,1.0,0.948498,0.999812,1.0,0.383365,0.72237
LogisticRegression,HandcraftedFeatureExtractor,0.766379,0.948498,1.0,0.994704,0.963987,0.948498,0.137885
LogisticRegression,amazon/chronos-t5-large,0.98454,0.999812,0.994704,1.0,0.999949,0.627226,0.478211
LogisticRegression,amazon/chronos-t5-small,0.998694,1.0,0.963987,0.999949,1.0,0.429896,0.67574
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.163298,0.383365,0.948498,0.627226,0.429896,1.0,0.006187
LogisticRegression,paris-noah/Mantis-8M,0.928962,0.72237,0.137885,0.478211,0.67574,0.006187,1.0


Friedman Test roc_auc_score: statistic=nan, pvalue=nan
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,1.0,0.999982,0.999998,0.983178,0.999998,0.999998
LogisticRegression,AutonLab/MOMENT-1-large,1.0,1.0,0.999902,0.999982,0.972653,1.0,0.999982
LogisticRegression,HandcraftedFeatureExtractor,0.999982,0.999902,1.0,1.0,0.997553,0.99964,1.0
LogisticRegression,amazon/chronos-t5-large,0.999998,0.999982,1.0,1.0,0.99488,0.999902,1.0
LogisticRegression,amazon/chronos-t5-small,0.983178,0.972653,0.997553,0.99488,1.0,0.958006,0.99488
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.999998,1.0,0.99964,0.999902,0.958006,1.0,0.999902
LogisticRegression,paris-noah/Mantis-8M,0.999998,0.999982,1.0,1.0,0.99488,0.999902,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
44,bihearts,left,DummyClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,TACV,0.409179,0.126033,0.495931,0.005103,-0.005947,0.008245,0.380454,0.307846,0.507808,0.414703,0.305448,0.246981,0.495931,0.005103
46,bihearts,left,LogisticRegression,GroupUnderSampler,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.685536,0.047774,0.671579,0.044308,0.277698,0.111877,0.775661,0.038262,0.898969,0.045172,0.688387,0.074978,0.671579,0.044308
43,bihearts,left,LogisticRegression,GroupUnderSampler,HandcraftedFeatureExtractor,,TACV,0.776265,0.040743,0.830071,0.041804,0.512755,0.090253,0.84241,0.025621,0.961691,0.031821,0.750841,0.037521,0.830071,0.041804
0,bihearts,left,LogisticRegression,GroupUnderSampler,NoneFeatureExtractor,,TACV,0.647594,0.041539,0.749733,0.047064,0.370803,0.059361,0.728479,0.039175,0.946038,0.076188,0.598348,0.05867,0.749733,0.047064
42,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.733597,0.045006,0.785686,0.031861,0.441448,0.104907,0.808792,0.026878,0.952221,0.029885,0.704134,0.037858,0.785686,0.031861
45,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.673411,0.036678,0.730974,0.057816,0.339848,0.051974,0.76041,0.035016,0.913251,0.075118,0.655163,0.041796,0.730974,0.057816
3,bihearts,left,LogisticRegression,GroupUnderSampler,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.618506,0.037796,0.614872,0.046506,0.181688,0.090396,0.717323,0.041927,0.871466,0.052669,0.613656,0.063585,0.614872,0.046506
2,bihearts,left,LogisticRegression,GroupUnderSampler,paris-noah/Mantis-8M,,TACV,0.790419,0.075543,0.823985,0.069022,0.517173,0.146705,0.855281,0.056925,0.952759,0.041259,0.782201,0.091148,0.823985,0.069022


Friedman Test accuracy_score: statistic=29.329, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.316437,0.000943,0.848284,0.022251,0.437482,0.969547,0.002719
LogisticRegression,AutonLab/MOMENT-1-large,0.316437,1.0,0.57036,0.990868,0.969547,0.999999,0.924296,0.742183
LogisticRegression,HandcraftedFeatureExtractor,0.000943,0.57036,1.0,0.119104,0.990868,0.437482,0.040902,0.999996
LogisticRegression,NoneFeatureExtractor,0.848284,0.990868,0.119104,1.0,0.57036,0.998219,0.99994,0.21605
LogisticRegression,amazon/chronos-t5-large,0.022251,0.969547,0.990868,0.57036,1.0,0.924296,0.316437,0.999099
LogisticRegression,amazon/chronos-t5-small,0.437482,0.999999,0.437482,0.998219,0.924296,1.0,0.969547,0.615019
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.969547,0.924296,0.040902,0.99994,0.316437,0.969547,1.0,0.085327
LogisticRegression,paris-noah/Mantis-8M,0.002719,0.742183,0.999996,0.21605,0.999099,0.615019,0.085327,1.0


Friedman Test balanced_accuracy_score: statistic=29.533, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.780488,0.001616,0.119104,0.040902,0.354807,0.998219,0.002719
LogisticRegression,AutonLab/MOMENT-1-large,0.780488,1.0,0.21605,0.942583,0.780488,0.998219,0.98581,0.280411
LogisticRegression,HandcraftedFeatureExtractor,0.001616,0.21605,1.0,0.902525,0.98581,0.615019,0.017974,1.0
LogisticRegression,NoneFeatureExtractor,0.119104,0.942583,0.902525,1.0,0.99994,0.999584,0.437482,0.942583
LogisticRegression,amazon/chronos-t5-large,0.040902,0.780488,0.98581,0.99994,1.0,0.98581,0.21605,0.994392
LogisticRegression,amazon/chronos-t5-small,0.354807,0.998219,0.615019,0.999584,0.98581,1.0,0.780488,0.701493
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.998219,0.98581,0.017974,0.437482,0.21605,0.780488,1.0,0.027402
LogisticRegression,paris-noah/Mantis-8M,0.002719,0.280411,1.0,0.942583,0.994392,0.701493,0.027402,1.0


Friedman Test matthews_corrcoef: statistic=30.467, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.780488,0.001616,0.280411,0.027402,0.280411,0.998219,0.001616
LogisticRegression,AutonLab/MOMENT-1-large,0.780488,1.0,0.21605,0.994392,0.701493,0.994392,0.98581,0.21605
LogisticRegression,HandcraftedFeatureExtractor,0.001616,0.21605,1.0,0.701493,0.994392,0.701493,0.017974,1.0
LogisticRegression,NoneFeatureExtractor,0.280411,0.994392,0.701493,1.0,0.98581,1.0,0.701493,0.701493
LogisticRegression,amazon/chronos-t5-large,0.027402,0.701493,0.994392,0.98581,1.0,0.98581,0.162371,0.994392
LogisticRegression,amazon/chronos-t5-small,0.280411,0.994392,0.701493,1.0,0.98581,1.0,0.701493,0.701493
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.998219,0.98581,0.017974,0.701493,0.162371,0.701493,1.0,0.017974
LogisticRegression,paris-noah/Mantis-8M,0.001616,0.21605,1.0,0.701493,0.994392,0.701493,0.017974,1.0


Friedman Test f1_score: statistic=28.733, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.280411,0.000943,0.848284,0.040902,0.52552,0.942583,0.001616
LogisticRegression,AutonLab/MOMENT-1-large,0.280411,1.0,0.615019,0.98581,0.994392,0.99994,0.942583,0.701493
LogisticRegression,HandcraftedFeatureExtractor,0.000943,0.615019,1.0,0.119104,0.969547,0.354807,0.05974,1.0
LogisticRegression,NoneFeatureExtractor,0.848284,0.98581,0.119104,1.0,0.701493,0.999584,0.999996,0.162371
LogisticRegression,amazon/chronos-t5-large,0.040902,0.994392,0.969547,0.701493,1.0,0.942583,0.52552,0.98581
LogisticRegression,amazon/chronos-t5-small,0.52552,0.99994,0.354807,0.999584,0.942583,1.0,0.994392,0.437482
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.942583,0.942583,0.05974,0.999996,0.52552,0.994392,1.0,0.085327
LogisticRegression,paris-noah/Mantis-8M,0.001616,0.701493,1.0,0.162371,0.98581,0.437482,0.085327,1.0


Friedman Test precision_score: statistic=25.588, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.848284,0.005728,0.022251,0.040902,0.437482,0.969547,0.011548
LogisticRegression,AutonLab/MOMENT-1-large,0.848284,1.0,0.316437,0.57036,0.701493,0.998219,0.99994,0.437482
LogisticRegression,HandcraftedFeatureExtractor,0.005728,0.316437,1.0,0.99994,0.999099,0.742183,0.139481,0.999999
LogisticRegression,NoneFeatureExtractor,0.022251,0.57036,0.99994,1.0,0.999999,0.924296,0.316437,0.999999
LogisticRegression,amazon/chronos-t5-large,0.040902,0.701493,0.999099,0.999999,1.0,0.969547,0.437482,0.99994
LogisticRegression,amazon/chronos-t5-small,0.437482,0.998219,0.742183,0.924296,0.969547,1.0,0.969547,0.848284
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.969547,0.99994,0.139481,0.316437,0.437482,0.969547,1.0,0.21605
LogisticRegression,paris-noah/Mantis-8M,0.011548,0.437482,0.999999,0.999999,0.99994,0.848284,0.21605,1.0


Friedman Test recall_score: statistic=26.254, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.246911,0.002719,0.942583,0.085327,0.57036,0.957576,0.003503
LogisticRegression,AutonLab/MOMENT-1-large,0.246911,1.0,0.815971,0.924296,0.999829,0.999584,0.902525,0.848284
LogisticRegression,HandcraftedFeatureExtractor,0.002719,0.815971,1.0,0.119104,0.969547,0.481055,0.101104,1.0
LogisticRegression,NoneFeatureExtractor,0.942583,0.924296,0.119104,1.0,0.701493,0.996738,1.0,0.139481
LogisticRegression,amazon/chronos-t5-large,0.085327,0.999829,0.969547,0.701493,1.0,0.978833,0.658921,0.978833
LogisticRegression,amazon/chronos-t5-small,0.57036,0.999584,0.481055,0.996738,0.978833,1.0,0.994392,0.52552
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.957576,0.902525,0.101104,1.0,0.658921,0.994392,1.0,0.119104
LogisticRegression,paris-noah/Mantis-8M,0.003503,0.848284,1.0,0.139481,0.978833,0.52552,0.119104,1.0


Friedman Test roc_auc_score: statistic=29.533, pvalue=0.000
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,NoneFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.780488,0.001616,0.119104,0.040902,0.354807,0.998219,0.002719
LogisticRegression,AutonLab/MOMENT-1-large,0.780488,1.0,0.21605,0.942583,0.780488,0.998219,0.98581,0.280411
LogisticRegression,HandcraftedFeatureExtractor,0.001616,0.21605,1.0,0.902525,0.98581,0.615019,0.017974,1.0
LogisticRegression,NoneFeatureExtractor,0.119104,0.942583,0.902525,1.0,0.99994,0.999584,0.437482,0.942583
LogisticRegression,amazon/chronos-t5-large,0.040902,0.780488,0.98581,0.99994,1.0,0.98581,0.21605,0.994392
LogisticRegression,amazon/chronos-t5-small,0.354807,0.998219,0.615019,0.999584,0.98581,1.0,0.780488,0.701493
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.998219,0.98581,0.017974,0.437482,0.21605,0.780488,1.0,0.027402
LogisticRegression,paris-noah/Mantis-8M,0.002719,0.280411,1.0,0.942583,0.994392,0.701493,0.027402,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
7,seed,engagement_10sec_35thresh,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.564216,0.075064,0.506402,0.009246,0.013024,0.018696,0.660378,0.100339,0.657639,0.113525,0.69668,0.166605,0.506402,0.009246
9,seed,engagement_10sec_35thresh,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.637509,0.113738,0.49973,0.006204,-0.00637,0.030633,0.76787,0.09646,0.653294,0.121066,0.954389,0.025569,0.49973,0.006204
4,seed,engagement_10sec_35thresh,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.650703,0.117537,0.49906,0.002339,-0.012792,0.026634,0.781054,0.09706,0.653127,0.118686,0.994055,0.002797,0.49906,0.002339
8,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.653377,0.118538,0.500063,0.000666,0.001493,0.016727,0.78318,0.0976,0.653571,0.118629,0.999516,0.000693,0.500063,0.000666
6,seed,engagement_10sec_35thresh,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.653545,0.118616,0.5,0.0,0.0,0.0,0.78333,0.097671,0.653545,0.118616,1.0,0.0,0.5,0.0
5,seed,engagement_10sec_35thresh,LogisticRegression,,paris-noah/Mantis-8M,,TACV,0.59561,0.115999,0.507833,0.063359,0.021297,0.150405,0.703972,0.127011,0.654905,0.145066,0.776186,0.110211,0.507833,0.063359


Friedman Test accuracy_score: statistic=10.238, pvalue=0.069
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999858,0.999981,0.538193,0.59468,0.984591
LogisticRegression,AutonLab/MOMENT-1-large,0.999858,1.0,0.99829,0.375252,0.427525,0.99829
LogisticRegression,HandcraftedFeatureExtractor,0.999981,0.99829,1.0,0.65049,0.704482,0.958997
LogisticRegression,amazon/chronos-t5-small,0.538193,0.375252,0.65049,1.0,0.999999,0.168134
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.59468,0.427525,0.704482,0.999999,1.0,0.201363
LogisticRegression,paris-noah/Mantis-8M,0.984591,0.99829,0.958997,0.168134,0.201363,1.0


Friedman Test balanced_accuracy_score: statistic=2.440, pvalue=0.785
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.97398,0.755551,0.995925,0.97398,0.845079
LogisticRegression,AutonLab/MOMENT-1-large,0.97398,1.0,0.991626,0.999858,1.0,0.99829
LogisticRegression,HandcraftedFeatureExtractor,0.755551,0.991626,1.0,0.958997,0.991626,0.999981
LogisticRegression,amazon/chronos-t5-small,0.995925,0.999858,0.958997,1.0,0.999858,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.97398,1.0,0.991626,0.999858,1.0,0.99829
LogisticRegression,paris-noah/Mantis-8M,0.845079,0.99829,0.999981,0.984591,0.99829,1.0


Friedman Test matthews_corrcoef: statistic=2.381, pvalue=0.794
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.991626,0.755551,0.984591,0.97398,0.845079
LogisticRegression,AutonLab/MOMENT-1-large,0.991626,1.0,0.97398,0.999999,0.999981,0.991626
LogisticRegression,HandcraftedFeatureExtractor,0.755551,0.97398,1.0,0.984591,0.991626,0.999981
LogisticRegression,amazon/chronos-t5-small,0.984591,0.999999,0.984591,1.0,0.999999,0.995925
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.97398,0.999981,0.991626,0.999999,1.0,0.99829
LogisticRegression,paris-noah/Mantis-8M,0.845079,0.991626,0.999981,0.995925,0.99829,1.0


Friedman Test f1_score: statistic=19.524, pvalue=0.002
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.97398,0.845079,0.074302,0.092418,0.984591
LogisticRegression,AutonLab/MOMENT-1-large,0.97398,1.0,0.99829,0.375252,0.427525,0.704482
LogisticRegression,HandcraftedFeatureExtractor,0.845079,0.99829,1.0,0.65049,0.704482,0.427525
LogisticRegression,amazon/chronos-t5-small,0.074302,0.375252,0.65049,1.0,0.999999,0.009435
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.092418,0.427525,0.704482,0.999999,1.0,0.012582
LogisticRegression,paris-noah/Mantis-8M,0.984591,0.704482,0.427525,0.009435,0.012582,1.0


Friedman Test precision_score: statistic=2.440, pvalue=0.785
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.97398,0.755551,0.995925,0.97398,0.845079
LogisticRegression,AutonLab/MOMENT-1-large,0.97398,1.0,0.991626,0.999858,1.0,0.99829
LogisticRegression,HandcraftedFeatureExtractor,0.755551,0.991626,1.0,0.958997,0.991626,0.999981
LogisticRegression,amazon/chronos-t5-small,0.995925,0.999858,0.958997,1.0,0.999858,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.97398,1.0,0.991626,0.999858,1.0,0.99829
LogisticRegression,paris-noah/Mantis-8M,0.845079,0.99829,0.999981,0.984591,0.99829,1.0


Friedman Test recall_score: statistic=18.452, pvalue=0.002
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.938967,0.845079,0.168134,0.059195,0.984591
LogisticRegression,AutonLab/MOMENT-1-large,0.938967,1.0,0.999858,0.704482,0.427525,0.59468
LogisticRegression,HandcraftedFeatureExtractor,0.845079,0.999858,1.0,0.845079,0.59468,0.427525
LogisticRegression,amazon/chronos-t5-small,0.168134,0.704482,0.845079,1.0,0.99829,0.028372
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.059195,0.427525,0.59468,0.99829,1.0,0.007018
LogisticRegression,paris-noah/Mantis-8M,0.984591,0.59468,0.427525,0.028372,0.007018,1.0


Friedman Test roc_auc_score: statistic=2.440, pvalue=0.785
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.97398,0.755551,0.995925,0.97398,0.845079
LogisticRegression,AutonLab/MOMENT-1-large,0.97398,1.0,0.991626,0.999858,1.0,0.99829
LogisticRegression,HandcraftedFeatureExtractor,0.755551,0.991626,1.0,0.958997,0.991626,0.999981
LogisticRegression,amazon/chronos-t5-small,0.995925,0.999858,0.958997,1.0,0.999858,0.984591
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.97398,1.0,0.991626,0.999858,1.0,0.99829
LogisticRegression,paris-noah/Mantis-8M,0.845079,0.99829,0.999981,0.984591,0.99829,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
12,usilaughs,left,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.47037,0.056328,0.456667,0.041791,-0.086661,0.083587,0.321277,0.163389,0.311333,0.158477,0.333333,0.171076,0.456667,0.041791
14,usilaughs,left,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.574074,0.060249,0.5525,0.05802,0.117881,0.135204,0.422028,0.098378,0.540887,0.120513,0.358333,0.099685,0.5525,0.05802
11,usilaughs,left,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.648148,0.098387,0.640833,0.09397,0.290383,0.195244,0.596089,0.090262,0.629527,0.131358,0.575,0.079987,0.640833,0.09397
10,usilaughs,left,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.740741,0.032796,0.733333,0.02846,0.479257,0.066262,0.695449,0.030717,0.740366,0.077616,0.666667,0.069024,0.733333,0.02846
13,usilaughs,left,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.718519,0.029333,0.71,0.028292,0.429669,0.058617,0.665698,0.036453,0.709698,0.052691,0.633333,0.066,0.71,0.028292


Friedman Test accuracy_score: statistic=14.667, pvalue=0.005
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.975123,0.497325,0.00858,0.090578
LogisticRegression,AutonLab/MOMENT-1-large,0.975123,1.0,0.855475,0.053938,0.317223
LogisticRegression,HandcraftedFeatureExtractor,0.497325,0.855475,1.0,0.433878,0.896993
LogisticRegression,amazon/chronos-t5-large,0.00858,0.053938,0.433878,1.0,0.930677
LogisticRegression,amazon/chronos-t5-small,0.090578,0.317223,0.896993,0.930677,1.0


Friedman Test balanced_accuracy_score: statistic=15.520, pvalue=0.004
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.855475,0.373481,0.002939,0.070324
LogisticRegression,AutonLab/MOMENT-1-large,0.855475,1.0,0.930677,0.070324,0.497325
LogisticRegression,HandcraftedFeatureExtractor,0.373481,0.930677,1.0,0.373481,0.930677
LogisticRegression,amazon/chronos-t5-large,0.002939,0.070324,0.373481,1.0,0.855475
LogisticRegression,amazon/chronos-t5-small,0.070324,0.497325,0.930677,0.855475,1.0


Friedman Test matthews_corrcoef: statistic=15.520, pvalue=0.004
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.855475,0.373481,0.002939,0.070324
LogisticRegression,AutonLab/MOMENT-1-large,0.855475,1.0,0.930677,0.070324,0.497325
LogisticRegression,HandcraftedFeatureExtractor,0.373481,0.930677,1.0,0.373481,0.930677
LogisticRegression,amazon/chronos-t5-large,0.002939,0.070324,0.373481,1.0,0.855475
LogisticRegression,amazon/chronos-t5-small,0.070324,0.497325,0.930677,0.855475,1.0


Friedman Test f1_score: statistic=17.760, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.930677,0.373481,0.001363,0.070324
LogisticRegression,AutonLab/MOMENT-1-large,0.930677,1.0,0.855475,0.022659,0.373481
LogisticRegression,HandcraftedFeatureExtractor,0.373481,0.855475,1.0,0.265889,0.930677
LogisticRegression,amazon/chronos-t5-large,0.001363,0.022659,0.265889,1.0,0.751424
LogisticRegression,amazon/chronos-t5-small,0.070324,0.373481,0.930677,0.751424,1.0


Friedman Test precision_score: statistic=15.840, pvalue=0.003
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.627659,0.265889,0.002939,0.022659
LogisticRegression,AutonLab/MOMENT-1-large,0.627659,1.0,0.975123,0.179597,0.497325
LogisticRegression,HandcraftedFeatureExtractor,0.265889,0.975123,1.0,0.497325,0.855475
LogisticRegression,amazon/chronos-t5-large,0.002939,0.179597,0.497325,1.0,0.975123
LogisticRegression,amazon/chronos-t5-small,0.022659,0.497325,0.855475,0.975123,1.0


Friedman Test recall_score: statistic=17.061, pvalue=0.002
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,1.0,0.317223,0.011994,0.115233
LogisticRegression,AutonLab/MOMENT-1-large,1.0,1.0,0.317223,0.011994,0.115233
LogisticRegression,HandcraftedFeatureExtractor,0.317223,0.317223,1.0,0.69123,0.987386
LogisticRegression,amazon/chronos-t5-large,0.011994,0.011994,0.69123,1.0,0.930677
LogisticRegression,amazon/chronos-t5-small,0.115233,0.115233,0.987386,0.930677,1.0


Friedman Test roc_auc_score: statistic=15.520, pvalue=0.004
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.855475,0.373481,0.002939,0.070324
LogisticRegression,AutonLab/MOMENT-1-large,0.855475,1.0,0.930677,0.070324,0.497325
LogisticRegression,HandcraftedFeatureExtractor,0.373481,0.930677,1.0,0.373481,0.930677
LogisticRegression,amazon/chronos-t5-large,0.002939,0.070324,0.373481,1.0,0.855475
LogisticRegression,amazon/chronos-t5-small,0.070324,0.497325,0.930677,0.855475,1.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
26,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.5,0.065591,0.4975,0.064466,-0.004713,0.129485,0.456869,0.067827,0.444018,0.067048,0.475,0.084939,0.4975,0.064466
31,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.522222,0.063931,0.500833,0.053619,0.00206,0.107809,0.287647,0.235803,0.272747,0.22747,0.308333,0.252401,0.500833,0.053619
19,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,CatAggregator,TACV,0.533333,0.064973,0.5175,0.063122,0.039014,0.134415,0.416845,0.071777,0.475936,0.097363,0.375,0.069024,0.5175,0.063122
28,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.6,0.044305,0.579167,0.041085,0.177598,0.099592,0.462654,0.057802,0.588405,0.098253,0.391667,0.080833,0.579167,0.041085
21,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanTimeAggregator,TACV,0.533333,0.064973,0.5175,0.063122,0.039014,0.134415,0.416845,0.071777,0.475936,0.097363,0.375,0.069024,0.5175,0.063122
25,usilaughs,right,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.740741,0.073333,0.754167,0.065638,0.517757,0.12078,0.753849,0.051885,0.667946,0.085374,0.875,0.026089,0.754167,0.065638
16,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,CatAggregator,TACV,0.762963,0.072781,0.7625,0.074661,0.528079,0.145684,0.737335,0.081847,0.72694,0.073788,0.758333,0.120687,0.7625,0.074661
24,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.777778,0.091299,0.773333,0.093433,0.561735,0.181537,0.740824,0.104231,0.775075,0.10686,0.733333,0.162087,0.773333,0.093433
18,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanTimeAggregator,TACV,0.807407,0.077435,0.8075,0.076658,0.618596,0.15271,0.789046,0.075692,0.779068,0.090541,0.808333,0.099685,0.8075,0.076658
17,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,CatAggregator,TACV,0.796296,0.043385,0.7975,0.043232,0.59468,0.0844,0.778857,0.045557,0.755329,0.051681,0.808333,0.067023,0.7975,0.043232


Friedman Test accuracy_score: statistic=17.956, pvalue=0.006
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.990645,0.137885,0.0429,0.027494,0.223897,0.137885
LogisticRegression,AutonLab/MOMENT-1-large,0.990645,1.0,0.527684,0.259158,0.191943,0.67574,0.527684
LogisticRegression,HandcraftedFeatureExtractor,0.137885,0.527684,1.0,0.999459,0.997234,0.999991,1.0
LogisticRegression,amazon/chronos-t5-large,0.0429,0.259158,0.999459,1.0,0.999999,0.994704,0.999459
LogisticRegression,amazon/chronos-t5-small,0.027494,0.191943,0.997234,0.999999,1.0,0.98454,0.997234
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.223897,0.67574,0.999991,0.994704,0.98454,1.0,0.999991
LogisticRegression,paris-noah/Mantis-8M,0.137885,0.527684,1.0,0.999459,0.997234,0.999991,1.0


Friedman Test balanced_accuracy_score: statistic=19.527, pvalue=0.003
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999459,0.079418,0.065133,0.053044,0.163298,0.065133
LogisticRegression,AutonLab/MOMENT-1-large,0.999459,1.0,0.223897,0.191943,0.163298,0.383365,0.191943
LogisticRegression,HandcraftedFeatureExtractor,0.079418,0.223897,1.0,1.0,0.999999,0.999949,1.0
LogisticRegression,amazon/chronos-t5-large,0.065133,0.191943,1.0,1.0,1.0,0.999812,1.0
LogisticRegression,amazon/chronos-t5-small,0.053044,0.163298,0.999999,1.0,1.0,0.999459,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.163298,0.383365,0.999949,0.999812,0.999459,1.0,0.999812
LogisticRegression,paris-noah/Mantis-8M,0.065133,0.191943,1.0,1.0,1.0,0.999812,1.0


Friedman Test matthews_corrcoef: statistic=18.522, pvalue=0.005
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.997234,0.079418,0.053044,0.03446,0.223897,0.115564
LogisticRegression,AutonLab/MOMENT-1-large,0.997234,1.0,0.297637,0.223897,0.163298,0.57761,0.383365
LogisticRegression,HandcraftedFeatureExtractor,0.079418,0.297637,1.0,0.999999,0.999949,0.999459,0.999999
LogisticRegression,amazon/chronos-t5-large,0.053044,0.223897,0.999999,1.0,0.999999,0.997234,0.999949
LogisticRegression,amazon/chronos-t5-small,0.03446,0.163298,0.999949,0.999999,1.0,0.990645,0.999459
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.223897,0.57761,0.999459,0.997234,0.990645,1.0,0.999949
LogisticRegression,paris-noah/Mantis-8M,0.115564,0.383365,0.999999,0.999949,0.999459,0.999949,1.0


Friedman Test f1_score: statistic=19.130, pvalue=0.004
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.115564,0.163298,0.115564,0.115564,0.079418
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.163298,0.223897,0.163298,0.163298,0.115564
LogisticRegression,HandcraftedFeatureExtractor,0.115564,0.163298,1.0,0.999999,1.0,1.0,0.999999
LogisticRegression,amazon/chronos-t5-large,0.163298,0.223897,0.999999,1.0,0.999999,0.999999,0.999949
LogisticRegression,amazon/chronos-t5-small,0.115564,0.163298,1.0,0.999999,1.0,1.0,0.999999
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.115564,0.163298,1.0,0.999999,1.0,1.0,0.999999
LogisticRegression,paris-noah/Mantis-8M,0.079418,0.115564,0.999999,0.999949,0.999999,0.999999,1.0


Friedman Test precision_score: statistic=18.524, pvalue=0.005
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.928962,0.478211,0.013423,0.008059,0.33914,0.223897
LogisticRegression,AutonLab/MOMENT-1-large,0.928962,1.0,0.98454,0.259158,0.191943,0.948498,0.876775
LogisticRegression,HandcraftedFeatureExtractor,0.478211,0.98454,1.0,0.766379,0.67574,0.999991,0.999459
LogisticRegression,amazon/chronos-t5-large,0.013423,0.259158,0.766379,1.0,0.999999,0.876775,0.948498
LogisticRegression,amazon/chronos-t5-small,0.008059,0.191943,0.67574,0.999999,1.0,0.807113,0.905096
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.33914,0.948498,0.999991,0.876775,0.807113,1.0,0.999991
LogisticRegression,paris-noah/Mantis-8M,0.223897,0.876775,0.999459,0.948498,0.905096,0.999991,1.0


Friedman Test recall_score: statistic=22.390, pvalue=0.001
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999999,0.021791,0.67574,0.297637,0.096148,0.096148
LogisticRegression,AutonLab/MOMENT-1-large,0.999999,1.0,0.013423,0.57761,0.223897,0.065133,0.065133
LogisticRegression,HandcraftedFeatureExtractor,0.021791,0.013423,1.0,0.67574,0.948498,0.998694,0.998694
LogisticRegression,amazon/chronos-t5-large,0.67574,0.57761,0.67574,1.0,0.997234,0.928962,0.928962
LogisticRegression,amazon/chronos-t5-small,0.297637,0.223897,0.948498,0.997234,1.0,0.998694,0.998694
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.096148,0.065133,0.998694,0.928962,0.998694,1.0,1.0
LogisticRegression,paris-noah/Mantis-8M,0.096148,0.065133,0.998694,0.928962,0.998694,1.0,1.0


Friedman Test roc_auc_score: statistic=19.527, pvalue=0.003
Nemenyi post-hoc test


Unnamed: 0_level_0,Unnamed: 1_level_0,DummyClassifier,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression,LogisticRegression
Unnamed: 0_level_1,Unnamed: 1_level_1,HandcraftedFeatureExtractor,AutonLab/MOMENT-1-large,HandcraftedFeatureExtractor,amazon/chronos-t5-large,amazon/chronos-t5-small,ibm-granite/granite-timeseries-patchtsmixer,paris-noah/Mantis-8M
DummyClassifier,HandcraftedFeatureExtractor,1.0,0.999459,0.079418,0.065133,0.053044,0.163298,0.065133
LogisticRegression,AutonLab/MOMENT-1-large,0.999459,1.0,0.223897,0.191943,0.163298,0.383365,0.191943
LogisticRegression,HandcraftedFeatureExtractor,0.079418,0.223897,1.0,1.0,0.999999,0.999949,1.0
LogisticRegression,amazon/chronos-t5-large,0.065133,0.191943,1.0,1.0,1.0,0.999812,1.0
LogisticRegression,amazon/chronos-t5-small,0.053044,0.163298,0.999999,1.0,1.0,0.999459,1.0
LogisticRegression,ibm-granite/granite-timeseries-patchtsmixer,0.163298,0.383365,0.999949,0.999812,0.999459,1.0,0.999812
LogisticRegression,paris-noah/Mantis-8M,0.065133,0.191943,1.0,1.0,1.0,0.999812,1.0
