In [1]:
from typing import Generator
from pathlib import Path


import pandas as pd
import os
from yaml import safe_load
from IPython.display import display
from IPython.display import HTML

pd.set_option('display.max_columns', 100)

def present_results(paths: Generator, val_method: str = "lopo") -> None:
    results = []
    for reports_path in paths:
        report = pd.read_csv(reports_path, index_col=0)
        conf = safe_load(open(reports_path.parent / ".hydra/config.yaml"))
        if conf["validation_method"]["_target_"].split(".")[-1].lower() != val_method:
            continue
        model_name: str = conf["model"]["model"]["_target_"].split(".")[-1]
        features_name = (
            conf["feature_extractor"]["_target_"].split(".")[-1]
            if "model_name" not in conf["feature_extractor"]
            else conf["feature_extractor"]["model_name"]
        )
        validation_method = conf["validation_method"]["_target_"].split(".")[-1]
        if "aggregator" not in conf:
            aggregator = "MeanTimeAggregator"
        else:
            aggregator = (
                conf["aggregator"]["_target_"].split(".")[-1]
                if "_target_" in conf["aggregator"]
                else None
            )
        report_results = {}    
        for col in report.columns:
            report_results[f"{col} avg"] = report[col].mean()
            report_results[f"{col} sem"] = report[col].sem()*1.98  # 95% CI
            
        dataset = conf["dataset"]
        side = conf["side"]
        resampling = conf['resampling']['_target_'].split(".")[-1] if 'resampling' in conf else 'None'
        resampling = resampling if resampling != "NoUnderSampler" else "None"
        # Collect results in a list of dicts
        results.append(
            {
                "Dataset": dataset,
                "Side": side,
                "Model": model_name,
                "Resampling": resampling,
                "Features": features_name,
                "Aggregator": aggregator,
                "Validation": validation_method,
                **report_results
            }
        )

    # After the loop, display as a table
    df_results = pd.DataFrame(results)
    for (dataset, side, resampling), group in df_results.groupby(["Dataset", "Side", "Resampling"]):
        display(HTML(f"<h3>Results for Dataset: <i>{dataset}</i>, Side: <i>{side}</i>, resampling: <i>{resampling}</i></h3>"))
        display(group.sort_values(by=["Model", "Features", "Aggregator"]).drop_duplicates())




In [2]:
results_path: str = "../outputs/"
all_results = list(Path(results_path).glob("*/*/*/reports.csv"))
present_results(all_results, val_method="lopo")

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
4,apsync,engagement_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.421389,0.092253,0.518713,0.027086,0.034524,0.051993,0.318314,0.17451,0.272775,0.169406,0.457165,0.26818,0.518713,0.027086
6,apsync,engagement_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.484316,0.153246,0.5,0.0,0.0,0.0,0.065514,0.129718,0.042503,0.084156,0.142857,0.282857,0.5,0.0
3,apsync,engagement_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.627536,0.107493,0.641607,0.09203,0.282201,0.15794,0.535047,0.163484,0.623977,0.168456,0.580068,0.249043,0.641607,0.09203
2,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.482812,0.153343,0.498413,0.003143,-0.015532,0.030753,0.065514,0.129718,0.042503,0.084156,0.142857,0.282857,0.498413,0.003143
5,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.499654,0.086121,0.500339,0.050637,-0.003384,0.100121,0.297661,0.185441,0.311768,0.213402,0.328211,0.214678,0.500339,0.050637
1,apsync,engagement_10sec,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,LOPO,0.425518,0.088017,0.492814,0.032105,-0.009583,0.084659,0.288127,0.098984,0.466334,0.196568,0.360991,0.23421,0.492814,0.032105


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
34,apsync,immersion_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.424948,0.142739,0.440485,0.146541,0.012393,0.032708,0.401787,0.182682,0.461874,0.27004,0.441729,0.151303,0.510965,0.025945
36,apsync,immersion_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.253624,0.149921,0.338205,0.178744,-0.015014,0.071629,0.198312,0.162606,0.357748,0.291745,0.315289,0.277528,0.473488,0.065684
33,apsync,immersion_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.261139,0.131941,0.331712,0.143075,-0.081566,0.096574,0.274058,0.176519,0.413604,0.258524,0.43388,0.31093,0.438179,0.062473
32,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.306481,0.141948,0.35797,0.164798,-0.017755,0.132807,0.290548,0.145073,0.475471,0.301033,0.334796,0.241437,0.464015,0.122785
35,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.30748,0.164373,0.318506,0.169606,-0.056672,0.080626,0.26962,0.186584,0.300543,0.2387,0.310951,0.167046,0.445908,0.068776


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
0,bihearts,left,LogisticRegression,GroupUnderSampler,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,LOPO,0.602818,0.026664,0.580985,0.024202,0.120241,0.040113,0.706916,0.046263,0.851138,0.061871,0.608197,0.053613,0.580985,0.024202


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
9,seed,engagement_10sec_35thresh,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.566454,0.069834,0.563785,0.064325,0.014386,0.013761,0.607081,0.101947,0.662085,0.133996,0.638808,0.116072,0.512763,0.01087
11,seed,engagement_10sec_35thresh,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.65779,0.132067,0.630016,0.126509,-0.008993,0.008466,0.733725,0.123666,0.655589,0.134423,0.895782,0.117773,0.495507,0.003905
8,seed,engagement_10sec_35thresh,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.644175,0.129965,0.61771,0.123183,-0.007219,0.017336,0.730887,0.120988,0.656422,0.134193,0.897988,0.118505,0.498472,0.004105
7,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.651427,0.131188,0.625663,0.12549,-0.003951,0.01794,0.731138,0.122383,0.655591,0.134032,0.893233,0.118862,0.496869,0.01015
10,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.660957,0.127395,0.631197,0.12239,0.003315,0.006441,0.734903,0.122734,0.656956,0.134046,0.897834,0.118219,0.500452,0.002127
12,seed,engagement_10sec_35thresh,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,LOPO,0.655841,0.133618,0.629387,0.127295,0.0,0.0,0.738007,0.122733,0.656888,0.134081,0.911996,0.118828,0.5,0.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
16,usilaughs,left,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.488889,0.027326,0.471389,0.023477,-0.057482,0.047797,0.307668,0.06837,0.312222,0.068741,0.313889,0.074501,0.471389,0.023477
18,usilaughs,left,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.544444,0.03886,0.529722,0.037839,0.06787,0.079153,0.426971,0.052136,0.48812,0.063227,0.397222,0.055763,0.529722,0.037839
15,usilaughs,left,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.639506,0.072138,0.633889,0.069685,0.29942,0.151118,0.581475,0.082914,0.672775,0.109169,0.583333,0.096243,0.633889,0.069685
14,usilaughs,left,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.711111,0.063328,0.7025,0.06392,0.433858,0.134917,0.63382,0.093704,0.716975,0.11233,0.625,0.105474,0.7025,0.06392
17,usilaughs,left,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.695062,0.066031,0.686389,0.066281,0.400102,0.137941,0.612699,0.097435,0.683939,0.110048,0.608333,0.108515,0.686389,0.066281


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
26,usilaughs,right,DummyClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.537037,0.031791,0.528889,0.030555,0.057068,0.061686,0.416111,0.081522,0.3892,0.0778,0.455556,0.0908,0.528889,0.030555
25,usilaughs,right,LogisticRegression,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.730864,0.103107,0.743611,0.096125,0.479467,0.205783,0.777788,0.07931,0.751362,0.106554,0.858333,0.05658,0.743611,0.096125
24,usilaughs,right,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.737037,0.072515,0.7375,0.070212,0.494875,0.144505,0.707187,0.084681,0.744827,0.097383,0.741667,0.097647,0.7375,0.070212


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
21,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.516049,0.033713,0.505278,0.034151,0.010152,0.068488,0.40094,0.064931,0.402968,0.060762,0.408333,0.073087,0.505278,0.034151
23,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.571605,0.043871,0.548333,0.043829,0.121783,0.105163,0.397259,0.068851,0.572211,0.107839,0.338889,0.069839,0.548333,0.043829
20,usilaughs,right,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.724691,0.099802,0.73,0.094771,0.465681,0.197678,0.743902,0.085226,0.752967,0.108212,0.777778,0.076837,0.73,0.094771
19,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.730864,0.071708,0.723611,0.071174,0.478667,0.146334,0.664974,0.094681,0.771658,0.101693,0.658333,0.10683,0.723611,0.071174
22,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.730864,0.079087,0.722778,0.079596,0.463984,0.165255,0.655712,0.109457,0.726762,0.118299,0.65,0.114753,0.722778,0.079596
30,usilaughs,right,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,LOPO,0.707407,0.119513,0.709444,0.116578,0.432399,0.23904,0.704856,0.118233,0.752253,0.12933,0.727778,0.117856,0.709444,0.116578
29,usilaughs,right,XGBClassifier,,HandcraftedFeatureExtractor,,LOPO,0.754321,0.080293,0.751944,0.078892,0.53416,0.156471,0.725814,0.086325,0.784381,0.097427,0.730556,0.095814,0.751944,0.078892
31,usilaughs,right,XGBClassifier,,HandcraftedFeatureExtractor,,LOPO,0.751852,0.080344,0.749444,0.079048,0.528684,0.155534,0.715733,0.094181,0.759042,0.106104,0.727778,0.101291,0.749444,0.079048


In [3]:
present_results(all_results, val_method="tacv")

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
4,apsync,engagement_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.478237,0.051894,0.500512,0.031328,0.001778,0.061588,0.38797,0.202726,0.357265,0.200925,0.443405,0.221716,0.500512,0.031328
6,apsync,engagement_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.534116,0.090357,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
3,apsync,engagement_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.6405,0.131275,0.615714,0.136109,0.241794,0.286743,0.462302,0.302731,0.625714,0.362842,0.466578,0.334841,0.615714,0.136109
2,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.534116,0.090357,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
5,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.508916,0.058305,0.523843,0.049311,0.041462,0.110617,0.297648,0.210281,0.36411,0.201832,0.312121,0.266114,0.523843,0.049311
1,apsync,engagement_10sec,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.504563,0.053762,0.519786,0.050948,0.021321,0.133771,0.361768,0.19162,0.405507,0.242627,0.368939,0.21053,0.519786,0.050948


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
32,apsync,immersion_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.545503,0.08005,0.54293,0.059188,0.071216,0.103792,0.583344,0.163628,0.57177,0.209098,0.705254,0.180745,0.54293,0.059188
34,apsync,immersion_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.488051,0.208738,0.526773,0.036324,0.085154,0.108571,0.533131,0.271428,0.559069,0.208522,0.749482,0.350237,0.526773,0.036324
31,apsync,immersion_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.311145,0.142635,0.328524,0.131626,-0.355593,0.26508,0.304784,0.219214,0.323593,0.197606,0.375569,0.262681,0.328524,0.131626
30,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.500288,0.199609,0.51477,0.050704,0.0323,0.159239,0.608019,0.213006,0.547907,0.208574,0.854995,0.235896,0.51477,0.050704
33,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.452856,0.174762,0.511186,0.051142,0.028248,0.10461,0.533535,0.178035,0.542154,0.203816,0.67971,0.234199,0.511186,0.051142


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
0,bihearts,left,LogisticRegression,GroupUnderSampler,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.618506,0.037796,0.614872,0.046506,0.181688,0.090396,0.717323,0.041927,0.871466,0.052669,0.613656,0.063585,0.614872,0.046506


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
9,seed,engagement_10sec_35thresh,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.555644,0.096513,0.48866,0.028251,-0.023202,0.051982,0.654688,0.126983,0.663317,0.091176,0.692261,0.250659,0.48866,0.028251
11,seed,engagement_10sec_35thresh,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.648025,0.086939,0.488468,0.011939,-0.042485,0.03812,0.776995,0.068974,0.669713,0.078364,0.929057,0.055451,0.488468,0.011939
8,seed,engagement_10sec_35thresh,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.684997,0.088681,0.522659,0.046252,0.079399,0.177631,0.808665,0.059924,0.683752,0.085957,0.998889,0.0022,0.522659,0.046252
7,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.668558,0.083023,0.498674,0.002839,-0.003172,0.007836,0.792218,0.069807,0.674811,0.074602,0.962115,0.06585,0.498674,0.002839
10,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.658135,0.072629,0.495278,0.00935,-0.011676,0.023118,0.789412,0.052666,0.673304,0.072463,0.965556,0.0682,0.495278,0.00935
12,seed,engagement_10sec_35thresh,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.675595,0.073512,0.5,0.0,0.0,0.0,0.804037,0.052678,0.675595,0.073512,1.0,0.0,0.5,0.0


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
16,usilaughs,left,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.47037,0.056328,0.456667,0.041791,-0.086661,0.083587,0.321277,0.163389,0.311333,0.158477,0.333333,0.171076,0.456667,0.041791
18,usilaughs,left,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.574074,0.060249,0.5525,0.05802,0.117881,0.135204,0.422028,0.098378,0.540887,0.120513,0.358333,0.099685,0.5525,0.05802
15,usilaughs,left,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.648148,0.098387,0.640833,0.09397,0.290383,0.195244,0.596089,0.090262,0.629527,0.131358,0.575,0.079987,0.640833,0.09397
14,usilaughs,left,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.740741,0.032796,0.733333,0.02846,0.479257,0.066262,0.695449,0.030717,0.740366,0.077616,0.666667,0.069024,0.733333,0.02846
17,usilaughs,left,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.718519,0.029333,0.71,0.028292,0.429669,0.058617,0.665698,0.036453,0.709698,0.052691,0.633333,0.066,0.71,0.028292


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
21,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.5,0.065591,0.4975,0.064466,-0.004713,0.129485,0.456869,0.067827,0.444018,0.067048,0.475,0.084939,0.4975,0.064466
26,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.522222,0.063931,0.500833,0.053619,0.00206,0.107809,0.287647,0.235803,0.272747,0.22747,0.308333,0.252401,0.500833,0.053619
23,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.6,0.044305,0.579167,0.041085,0.177598,0.099592,0.462654,0.057802,0.588405,0.098253,0.391667,0.080833,0.579167,0.041085
20,usilaughs,right,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.740741,0.073333,0.754167,0.065638,0.517757,0.12078,0.753849,0.051885,0.667946,0.085374,0.875,0.026089,0.754167,0.065638
19,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.777778,0.091299,0.773333,0.093433,0.561735,0.181537,0.740824,0.104231,0.775075,0.10686,0.733333,0.162087,0.773333,0.093433
22,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.785185,0.062009,0.783333,0.069221,0.579,0.126937,0.751664,0.088778,0.765517,0.069522,0.766667,0.168268,0.783333,0.069221
29,usilaughs,right,LogisticRegression,,ibm-granite/granite-timeseries-patchtsmixer,MeanChanAggregator,TACV,0.744444,0.090856,0.754167,0.088702,0.509859,0.172999,0.747267,0.083511,0.674165,0.087644,0.841667,0.084134,0.754167,0.088702


In [4]:
results_path: str = "../outputs_adula/"
all_results = list(Path(results_path).glob("*/*/*/reports.csv"))

present_results(all_results, val_method="lopo")

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
10,apsync,engagement_10sec,XGBClassifier,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.488401,0.07885,0.490296,0.047758,-0.017699,0.093088,0.311633,0.124473,0.439855,0.179942,0.299068,0.161808,0.490296,0.047758
8,apsync,engagement_10sec,XGBClassifier,,HandcraftedFeatureExtractor,,LOPO,0.565708,0.087577,0.582908,0.079675,0.181071,0.152296,0.503019,0.133356,0.588098,0.185258,0.598491,0.228028,0.582908,0.079675
15,apsync,engagement_10sec,XGBClassifier,,HandcraftedFeatureExtractor,,LOPO,0.591906,0.099472,0.600839,0.104529,0.207584,0.194072,0.551494,0.118446,0.601427,0.17769,0.634916,0.208379,0.600839,0.104529
16,apsync,engagement_10sec,XGBClassifier,,HandcraftedFeatureExtractor,,LOPO,0.553271,0.112821,0.579256,0.103853,0.147093,0.191879,0.532292,0.141812,0.542071,0.152365,0.680005,0.244048,0.579256,0.103853
7,apsync,engagement_10sec,XGBClassifier,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.478476,0.078283,0.499203,0.042536,0.004351,0.091499,0.342758,0.074282,0.465981,0.156318,0.349289,0.133392,0.499203,0.042536
9,apsync,engagement_10sec,XGBClassifier,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.434577,0.072813,0.451078,0.060542,-0.095187,0.12741,0.270712,0.091285,0.397678,0.188699,0.272608,0.125778,0.451078,0.060542


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
2,bihearts,left,DummyClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.383712,0.131498,0.475504,0.038458,-0.030541,0.053279,0.428377,0.220202,0.548873,0.292851,0.356154,0.182575,0.475504,0.038458
4,bihearts,left,LogisticRegression,GroupUnderSampler,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.67624,0.045756,0.624431,0.049235,0.201433,0.100414,0.769869,0.050766,0.876599,0.048957,0.695483,0.083177,0.624431,0.049235
1,bihearts,left,LogisticRegression,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.720179,0.051051,0.752718,0.074657,0.374824,0.124084,0.804301,0.033096,0.927746,0.050036,0.713747,0.046953,0.752718,0.074657
0,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.714566,0.034155,0.748198,0.048663,0.368776,0.08969,0.798172,0.025449,0.92476,0.04556,0.706176,0.044744,0.748198,0.048663
3,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.660845,0.014725,0.686496,0.039834,0.269149,0.046403,0.756432,0.022548,0.892093,0.069324,0.661942,0.029569,0.686496,0.039834


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
24,seed,engagement_10sec_35thresh,XGBClassifier,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.63552,0.124287,0.613867,0.117795,-0.005123,0.019647,0.722869,0.119322,0.656293,0.134202,0.875709,0.115047,0.497676,0.00689
20,seed,engagement_10sec_35thresh,XGBClassifier,,HandcraftedFeatureExtractor,,LOPO,0.636577,0.13187,0.610178,0.124789,-0.014187,0.032487,0.724302,0.121643,0.653884,0.134805,0.887315,0.120944,0.49073,0.018801
21,seed,engagement_10sec_35thresh,XGBClassifier,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.648418,0.128778,0.6204,0.123305,0.002985,0.021965,0.730874,0.120969,0.657116,0.133807,0.891924,0.117763,0.501478,0.006798
23,seed,engagement_10sec_35thresh,XGBClassifier,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.648393,0.127377,0.622087,0.121307,-9.3e-05,0.008838,0.731056,0.121003,0.656844,0.134065,0.894619,0.11751,0.49999,0.002165


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
32,usilaughs,right,XGBClassifier,GroupUnderSampler,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.55679,0.047886,0.556389,0.047603,0.117325,0.098295,0.522071,0.054832,0.512782,0.062003,0.552778,0.064989,0.556389,0.047603
30,usilaughs,right,XGBClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,LOPO,0.74321,0.078222,0.751389,0.073241,0.522285,0.148679,0.75269,0.073703,0.757896,0.089285,0.825,0.072226,0.751389,0.073241
29,usilaughs,right,XGBClassifier,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.751852,0.069539,0.750556,0.067671,0.518002,0.140451,0.725723,0.077998,0.758757,0.090295,0.738889,0.084369,0.750556,0.067671
31,usilaughs,right,XGBClassifier,GroupUnderSampler,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.734568,0.076879,0.732778,0.075164,0.488944,0.154802,0.697881,0.087442,0.772527,0.092996,0.716667,0.100568,0.732778,0.075164


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
28,usilaughs,right,XGBClassifier,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.541975,0.04047,0.517778,0.03918,0.044559,0.091601,0.35238,0.062694,0.480308,0.088871,0.3,0.06056,0.517778,0.03918
26,usilaughs,right,XGBClassifier,,HandcraftedFeatureExtractor,,LOPO,0.749383,0.079922,0.745833,0.078964,0.520317,0.156576,0.713991,0.088551,0.775069,0.095412,0.713889,0.09871,0.745833,0.078964
25,usilaughs,right,XGBClassifier,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.732099,0.0716,0.726944,0.071221,0.4765,0.145613,0.678834,0.091749,0.743351,0.100275,0.680556,0.10408,0.726944,0.071221
27,usilaughs,right,XGBClassifier,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.714815,0.079726,0.707778,0.079364,0.431234,0.166938,0.650465,0.102611,0.734437,0.111134,0.644444,0.109601,0.707778,0.079364


In [12]:
present_results(all_results, val_method="tacv")

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
7,apsync,engagement_10sec,XGBClassifier,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.571778,0.045032,0.559552,0.068247,0.132926,0.144147,0.431007,0.115241,0.570012,0.189474,0.357776,0.092613,0.559552,0.068247
6,apsync,engagement_10sec,XGBClassifier,,HandcraftedFeatureExtractor,,TACV,0.632775,0.170787,0.602166,0.156596,0.253699,0.32207,0.534402,0.26234,0.700152,0.204748,0.613636,0.383036,0.602166,0.156596
5,apsync,engagement_10sec,XGBClassifier,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.494219,0.083199,0.50524,0.080229,0.005941,0.163245,0.428629,0.131117,0.459192,0.128688,0.440374,0.157569,0.50524,0.080229
8,apsync,engagement_10sec,XGBClassifier,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.523685,0.078796,0.522359,0.091138,0.050315,0.20615,0.35135,0.147116,0.505965,0.231624,0.295633,0.133804,0.522359,0.091138


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
2,bihearts,left,DummyClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,TACV,0.409179,0.126033,0.495931,0.005103,-0.005947,0.008245,0.380454,0.307846,0.507808,0.414703,0.305448,0.246981,0.495931,0.005103
4,bihearts,left,LogisticRegression,GroupUnderSampler,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.685536,0.047774,0.671579,0.044308,0.277698,0.111877,0.775661,0.038262,0.898969,0.045172,0.688387,0.074978,0.671579,0.044308
1,bihearts,left,LogisticRegression,GroupUnderSampler,HandcraftedFeatureExtractor,,TACV,0.776265,0.040743,0.830071,0.041804,0.512755,0.090253,0.84241,0.025621,0.961691,0.031821,0.750841,0.037521,0.830071,0.041804
0,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.733597,0.045006,0.785686,0.031861,0.441448,0.104907,0.808792,0.026878,0.952221,0.029885,0.704134,0.037858,0.785686,0.031861
3,bihearts,left,LogisticRegression,GroupUnderSampler,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.673411,0.036678,0.730974,0.057816,0.339848,0.051974,0.76041,0.035016,0.913251,0.075118,0.655163,0.041796,0.730974,0.057816


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
15,seed,engagement_10sec_35thresh,XGBClassifier,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.672406,0.073013,0.509033,0.011633,0.043288,0.054512,0.798683,0.054929,0.679216,0.075644,0.976394,0.016372,0.509033,0.011633
14,seed,engagement_10sec_35thresh,XGBClassifier,,HandcraftedFeatureExtractor,,TACV,0.627309,0.065767,0.480157,0.062669,-0.018255,0.200797,0.766578,0.045838,0.666371,0.064573,0.919769,0.101365,0.480157,0.062669
13,seed,engagement_10sec_35thresh,XGBClassifier,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.673993,0.074835,0.498592,0.002789,-0.015824,0.031331,0.802745,0.054988,0.674868,0.073435,0.997183,0.005577,0.498592,0.002789
16,seed,engagement_10sec_35thresh,XGBClassifier,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.666131,0.070485,0.506101,0.013355,0.003512,0.062846,0.795132,0.050991,0.677903,0.076784,0.97092,0.020223,0.506101,0.013355


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
23,usilaughs,right,XGBClassifier,GroupUnderSampler,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.503704,0.10201,0.500833,0.107738,-0.005269,0.222582,0.434428,0.179454,0.42382,0.138996,0.475,0.23996,0.500833,0.107738
22,usilaughs,right,XGBClassifier,GroupUnderSampler,HandcraftedFeatureExtractor,,TACV,0.8,0.084731,0.813333,0.080639,0.635817,0.154803,0.808817,0.073334,0.717935,0.092062,0.933333,0.067023,0.813333,0.080639
21,usilaughs,right,XGBClassifier,GroupUnderSampler,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.796296,0.064558,0.799167,0.067951,0.612066,0.126437,0.777314,0.080895,0.754952,0.068599,0.825,0.161666,0.799167,0.067951
24,usilaughs,right,XGBClassifier,GroupUnderSampler,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.785185,0.051333,0.7875,0.05522,0.587058,0.101331,0.764503,0.06865,0.748039,0.069328,0.808333,0.151225,0.7875,0.05522


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
19,usilaughs,right,XGBClassifier,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.555556,0.06957,0.536667,0.06978,0.082053,0.156668,0.413821,0.112833,0.50907,0.1316,0.366667,0.136562,0.536667,0.06978
18,usilaughs,right,XGBClassifier,,HandcraftedFeatureExtractor,,TACV,0.803704,0.078299,0.804167,0.083362,0.608629,0.1655,0.781486,0.093422,0.760842,0.068576,0.808333,0.134553,0.804167,0.083362
17,usilaughs,right,XGBClassifier,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.803704,0.062009,0.804167,0.066871,0.625393,0.126262,0.778835,0.081697,0.781814,0.093826,0.808333,0.170278,0.804167,0.066871
20,usilaughs,right,XGBClassifier,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.796296,0.059123,0.793333,0.066544,0.602506,0.123695,0.761465,0.087899,0.786743,0.085175,0.766667,0.170278,0.793333,0.066544
