In [37]:
from typing import Generator
from pathlib import Path


import pandas as pd
import os
from yaml import safe_load
from IPython.display import display
from IPython.display import HTML

pd.set_option('display.max_columns', 100)

def present_results(paths: Generator, val_method: str = "lopo") -> None:
    results = []
    for reports_path in paths:
        report = pd.read_csv(reports_path, index_col=0)
        conf = safe_load(open(reports_path.parent / ".hydra/config.yaml"))
        if conf["validation_method"]["_target_"].split(".")[-1].lower() != val_method:
            continue
        model_name: str = conf["model"]["model"]["_target_"].split(".")[-1]
        features_name = (
            conf["feature_extractor"]["_target_"].split(".")[-1]
            if "model_name" not in conf["feature_extractor"]
            else conf["feature_extractor"]["model_name"]
        )
        validation_method = conf["validation_method"]["_target_"].split(".")[-1]
        if "aggregator" not in conf:
            aggregator = "MeanTimeAggregator"
        else:
            aggregator = (
                conf["aggregator"]["_target_"].split(".")[-1]
                if "_target_" in conf["aggregator"]
                else None
            )
        report_results = {}    
        for col in report.columns:
            report_results[f"{col} avg"] = report[col].mean()
            report_results[f"{col} sem"] = report[col].sem()*1.98  # 95% CI
            
        dataset = conf["dataset"]
        side = conf["side"]
        resampling = conf['resampling']['_target_'].split(".")[-1] if 'resampling' in conf else 'None'
        resampling = resampling if resampling != "NoUnderSampler" else "None"
        # Collect results in a list of dicts
        results.append(
            {
                "Dataset": dataset,
                "Side": side,
                "Model": model_name,
                "Resampling": resampling,
                "Features": features_name,
                "Aggregator": aggregator,
                "Validation": validation_method,
                **report_results
            }
        )

    # After the loop, display as a table
    df_results = pd.DataFrame(results)
    for (dataset, side, resampling), group in df_results.groupby(["Dataset", "Side", "Resampling"]):
        display(HTML(f"<h3>Results for Dataset: <i>{dataset}</i>, Side: <i>{side}</i>, resampling: <i>{resampling}</i></h3>"))
        display(group.sort_values(by=["Model", "Features", "Aggregator"]))


results_path: str = "../outputs/"
all_results = list(Path(results_path).glob("*/*/*/reports.csv"))

In [45]:
present_results(all_results, val_method="lopo")

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
2,apsync,engagement_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.421389,0.092253,0.518713,0.027086,0.034524,0.051993,0.318314,0.17451,0.272775,0.169406,0.457165,0.26818,0.518713,0.027086
4,apsync,engagement_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.484316,0.153246,0.5,0.0,0.0,0.0,0.065514,0.129718,0.042503,0.084156,0.142857,0.282857,0.5,0.0
1,apsync,engagement_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.627536,0.107493,0.641607,0.09203,0.282201,0.15794,0.535047,0.163484,0.623977,0.168456,0.580068,0.249043,0.641607,0.09203
0,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.482812,0.153343,0.498413,0.003143,-0.015532,0.030753,0.065514,0.129718,0.042503,0.084156,0.142857,0.282857,0.498413,0.003143
3,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.499654,0.086121,0.500339,0.050637,-0.003384,0.100121,0.297661,0.185441,0.311768,0.213402,0.328211,0.214678,0.500339,0.050637


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
22,apsync,immersion_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.424948,0.142739,0.440485,0.146541,0.012393,0.032708,0.401787,0.182682,0.461874,0.27004,0.441729,0.151303,0.510965,0.025945
24,apsync,immersion_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.253624,0.149921,0.338205,0.178744,-0.015014,0.071629,0.198312,0.162606,0.357748,0.291745,0.315289,0.277528,0.473488,0.065684
21,apsync,immersion_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.261139,0.131941,0.331712,0.143075,-0.081566,0.096574,0.274058,0.176519,0.413604,0.258524,0.43388,0.31093,0.438179,0.062473
20,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.306481,0.141948,0.35797,0.164798,-0.017755,0.132807,0.290548,0.145073,0.475471,0.301033,0.334796,0.241437,0.464015,0.122785
23,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.30748,0.164373,0.318506,0.169606,-0.056672,0.080626,0.26962,0.186584,0.300543,0.2387,0.310951,0.167046,0.445908,0.068776


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
7,seed,engagement_10sec_35thresh,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.566454,0.069834,0.563785,0.064325,0.014386,0.013761,0.607081,0.101947,0.662085,0.133996,0.638808,0.116072,0.512763,0.01087
9,seed,engagement_10sec_35thresh,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.65779,0.132067,0.630016,0.126509,-0.008993,0.008466,0.733725,0.123666,0.655589,0.134423,0.895782,0.117773,0.495507,0.003905
6,seed,engagement_10sec_35thresh,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.644175,0.129965,0.61771,0.123183,-0.007219,0.017336,0.730887,0.120988,0.656422,0.134193,0.897988,0.118505,0.498472,0.004105
5,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.651427,0.131188,0.625663,0.12549,-0.003951,0.01794,0.731138,0.122383,0.655591,0.134032,0.893233,0.118862,0.496869,0.01015
8,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.660957,0.127395,0.631197,0.12239,0.003315,0.006441,0.734903,0.122734,0.656956,0.134046,0.897834,0.118219,0.500452,0.002127


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
12,usilaughs,left,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.488889,0.027326,0.471389,0.023477,-0.057482,0.047797,0.307668,0.06837,0.312222,0.068741,0.313889,0.074501,0.471389,0.023477
14,usilaughs,left,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.544444,0.03886,0.529722,0.037839,0.06787,0.079153,0.426971,0.052136,0.48812,0.063227,0.397222,0.055763,0.529722,0.037839
11,usilaughs,left,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.639506,0.072138,0.633889,0.069685,0.29942,0.151118,0.581475,0.082914,0.672775,0.109169,0.583333,0.096243,0.633889,0.069685
10,usilaughs,left,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.711111,0.063328,0.7025,0.06392,0.433858,0.134917,0.63382,0.093704,0.716975,0.11233,0.625,0.105474,0.7025,0.06392
13,usilaughs,left,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.695062,0.066031,0.686389,0.066281,0.400102,0.137941,0.612699,0.097435,0.683939,0.110048,0.608333,0.108515,0.686389,0.066281


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
17,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,LOPO,0.516049,0.033713,0.505278,0.034151,0.010152,0.068488,0.40094,0.064931,0.402968,0.060762,0.408333,0.073087,0.505278,0.034151
19,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,LOPO,0.571605,0.043871,0.548333,0.043829,0.121783,0.105163,0.397259,0.068851,0.572211,0.107839,0.338889,0.069839,0.548333,0.043829
16,usilaughs,right,LogisticRegression,,HandcraftedFeatureExtractor,,LOPO,0.724691,0.099802,0.73,0.094771,0.465681,0.197678,0.743902,0.085226,0.752967,0.108212,0.777778,0.076837,0.73,0.094771
15,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,LOPO,0.730864,0.071708,0.723611,0.071174,0.478667,0.146334,0.664974,0.094681,0.771658,0.101693,0.658333,0.10683,0.723611,0.071174
18,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,LOPO,0.730864,0.079087,0.722778,0.079596,0.463984,0.165255,0.655712,0.109457,0.726762,0.118299,0.65,0.114753,0.722778,0.079596


In [46]:
present_results(all_results, val_method="tacv")

Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
2,apsync,engagement_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.478237,0.051894,0.500512,0.031328,0.001778,0.061588,0.38797,0.202726,0.357265,0.200925,0.443405,0.221716,0.500512,0.031328
4,apsync,engagement_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.534116,0.090357,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
1,apsync,engagement_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.6405,0.131275,0.615714,0.136109,0.241794,0.286743,0.462302,0.302731,0.625714,0.362842,0.466578,0.334841,0.615714,0.136109
0,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.534116,0.090357,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0
3,apsync,engagement_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.508916,0.058305,0.523843,0.049311,0.041462,0.110617,0.297648,0.210281,0.36411,0.201832,0.312121,0.266114,0.523843,0.049311


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
22,apsync,immersion_10sec,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.545503,0.08005,0.54293,0.059188,0.071216,0.103792,0.583344,0.163628,0.57177,0.209098,0.705254,0.180745,0.54293,0.059188
24,apsync,immersion_10sec,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.488051,0.208738,0.526773,0.036324,0.085154,0.108571,0.533131,0.271428,0.559069,0.208522,0.749482,0.350237,0.526773,0.036324
21,apsync,immersion_10sec,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.311145,0.142635,0.328524,0.131626,-0.355593,0.26508,0.304784,0.219214,0.323593,0.197606,0.375569,0.262681,0.328524,0.131626
20,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.500288,0.199609,0.51477,0.050704,0.0323,0.159239,0.608019,0.213006,0.547907,0.208574,0.854995,0.235896,0.51477,0.050704
23,apsync,immersion_10sec,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.452856,0.174762,0.511186,0.051142,0.028248,0.10461,0.533535,0.178035,0.542154,0.203816,0.67971,0.234199,0.511186,0.051142


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
7,seed,engagement_10sec_35thresh,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.555644,0.096513,0.48866,0.028251,-0.023202,0.051982,0.654688,0.126983,0.663317,0.091176,0.692261,0.250659,0.48866,0.028251
9,seed,engagement_10sec_35thresh,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.648025,0.086939,0.488468,0.011939,-0.042485,0.03812,0.776995,0.068974,0.669713,0.078364,0.929057,0.055451,0.488468,0.011939
6,seed,engagement_10sec_35thresh,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.684997,0.088681,0.522659,0.046252,0.079399,0.177631,0.808665,0.059924,0.683752,0.085957,0.998889,0.0022,0.522659,0.046252
5,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.668558,0.083023,0.498674,0.002839,-0.003172,0.007836,0.792218,0.069807,0.674811,0.074602,0.962115,0.06585,0.498674,0.002839
8,seed,engagement_10sec_35thresh,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.658135,0.072629,0.495278,0.00935,-0.011676,0.023118,0.789412,0.052666,0.673304,0.072463,0.965556,0.0682,0.495278,0.00935


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
12,usilaughs,left,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.47037,0.056328,0.456667,0.041791,-0.086661,0.083587,0.321277,0.163389,0.311333,0.158477,0.333333,0.171076,0.456667,0.041791
14,usilaughs,left,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.574074,0.060249,0.5525,0.05802,0.117881,0.135204,0.422028,0.098378,0.540887,0.120513,0.358333,0.099685,0.5525,0.05802
11,usilaughs,left,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.648148,0.098387,0.640833,0.09397,0.290383,0.195244,0.596089,0.090262,0.629527,0.131358,0.575,0.079987,0.640833,0.09397
10,usilaughs,left,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.740741,0.032796,0.733333,0.02846,0.479257,0.066262,0.695449,0.030717,0.740366,0.077616,0.666667,0.069024,0.733333,0.02846
13,usilaughs,left,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.718519,0.029333,0.71,0.028292,0.429669,0.058617,0.665698,0.036453,0.709698,0.052691,0.633333,0.066,0.71,0.028292


Unnamed: 0,Dataset,Side,Model,Resampling,Features,Aggregator,Validation,accuracy_score avg,accuracy_score sem,balanced_accuracy_score avg,balanced_accuracy_score sem,matthews_corrcoef avg,matthews_corrcoef sem,f1_score avg,f1_score sem,precision_score avg,precision_score sem,recall_score avg,recall_score sem,roc_auc_score avg,roc_auc_score sem
17,usilaughs,right,DummyClassifier,,HandcraftedFeatureExtractor,,TACV,0.5,0.065591,0.4975,0.064466,-0.004713,0.129485,0.456869,0.067827,0.444018,0.067048,0.475,0.084939,0.4975,0.064466
19,usilaughs,right,LogisticRegression,,AutonLab/MOMENT-1-large,MeanChanAggregator,TACV,0.6,0.044305,0.579167,0.041085,0.177598,0.099592,0.462654,0.057802,0.588405,0.098253,0.391667,0.080833,0.579167,0.041085
16,usilaughs,right,LogisticRegression,,HandcraftedFeatureExtractor,,TACV,0.740741,0.073333,0.754167,0.065638,0.517757,0.12078,0.753849,0.051885,0.667946,0.085374,0.875,0.026089,0.754167,0.065638
15,usilaughs,right,LogisticRegression,,amazon/chronos-t5-large,MeanChanAggregator,TACV,0.777778,0.091299,0.773333,0.093433,0.561735,0.181537,0.740824,0.104231,0.775075,0.10686,0.733333,0.162087,0.773333,0.093433
18,usilaughs,right,LogisticRegression,,amazon/chronos-t5-small,MeanChanAggregator,TACV,0.785185,0.062009,0.783333,0.069221,0.579,0.126937,0.751664,0.088778,0.765517,0.069522,0.766667,0.168268,0.783333,0.069221
