In [5]:
from pathlib import Path
import pandas as pd

In [10]:
logs = Path(r"D:\Files\logs\category_runs")
categories = ['humerus', 'shoulder', 'wrist', 'full']
runs = {
    'single': ['single'],
    'multi': ['dd-mean', 'dd-mean-std', 'dd-mean-tanh', 'dd-mean-std-tanh', 'fusion-mean', 'fusion-mean-post']
}
ids = ['run_1', 'run_2', 'run_3']

metric_options = {
    'kappa': {
        'col_name': 'cohen-kappa',
        'direction': 'max',
        'repr': "Cohen's Kappa"
    },
    'loss': {
        'col_name': 'loss',
        'direction': 'min',
        'repr': 'Binary Cross Entropy'
    }
}
selected = 'loss'

metric = metric_options[selected]['col_name']
representation = metric_options[selected]['repr']
direction = metric_options[selected]['direction']

In [11]:
def get_metric_df(metric):
    df = None
    for m, names in runs.items():
        for c in categories:
            for name in names:
                p = logs / m / c / name
                if p.exists():
                    for child in p.iterdir():
                        for rid in ids:
                            csv = child / rid / 'log.csv'

                            if csv.exists():
                                d = pd.read_csv(csv)
                                d['metric'] = d[f'val_{metric}']
                                d = d[['metric']]

                                d['category'] = c
                                d['name'] = name
                                d['run'] = rid

                                if df is None:
                                    df = d
                                else:
                                    df = pd.concat([df, d])
    return df

df = get_metric_df(metric)
df

Unnamed: 0,metric,category,name,run
0,0.526194,humerus,single,run_1
1,0.618038,humerus,single,run_1
2,0.624759,humerus,single,run_1
3,0.446345,humerus,single,run_1
4,0.430071,humerus,single,run_1
...,...,...,...,...
15,0.411761,full,dd-mean-std-tanh,run_3
16,0.413614,full,dd-mean-std-tanh,run_3
17,0.422594,full,dd-mean-std-tanh,run_3
18,0.418266,full,dd-mean-std-tanh,run_3


In [12]:
def to_report(df, repr_metric, direction):
    formatter = lambda x: f"{x:.4}"

    scores = df.groupby(['category', 'name', 'run']).agg(metric=('metric', direction)).reset_index()
    scores = scores.groupby(['category', 'name']).agg(metric=('metric', 'mean'), metric_std=('metric', 'std'))
    
    scores['metric'] = scores['metric'].apply(formatter) + " (+-" + scores['metric_std'].apply(formatter) + ")"
    scores[repr_metric] = scores['metric']
    scores = scores[[repr_metric]]
    
    
    order = ['single', 'fusion-mean', 'fusion-mean-post', 'dd-mean', 'dd-mean-tanh', 'dd-mean-std', 'dd-mean-std-tanh']
    res = scores.reset_index().pivot(index='name', columns='category', values=[repr_metric]).reindex(order)
    
    return res

res = to_report(df, representation, direction)
res

Unnamed: 0_level_0,Binary Cross Entropy,Binary Cross Entropy,Binary Cross Entropy,Binary Cross Entropy
category,full,humerus,shoulder,wrist
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
single,0.4058 (+-0.005928),0.3593 (+-0.02164),0.4689 (+-0.02328),0.3447 (+-0.01582)
fusion-mean,,0.3872 (+-0.04168),0.5238 (+-0.03014),0.3886 (+-0.01361)
fusion-mean-post,,0.4109 (+-0.02709),0.5277 (+-0.01544),0.4026 (+-0.01968)
dd-mean,0.4195 (+-0.002568),0.3703 (+-0.002615),0.5087 (+-0.007348),
dd-mean-tanh,0.4183 (+-0.007395),0.3676 (+-0.007537),0.5057 (+-0.01181),
dd-mean-std,0.4181 (+-0.01746),0.3852 (+-0.009509),0.517 (+-nan),
dd-mean-std-tanh,0.4078 (+-0.01156),0.3579 (+-0.02134),,


In [13]:
print(res.to_latex().replace('lllll', 'l|llll').replace('category', '').replace('name', 'Architecture'))

\begin{tabular}{l|llll}
\toprule
{} & \multicolumn{4}{l}{Binary Cross Entropy} \\
 &                 full &              humerus &             shoulder &               wrist \\
Architecture             &                      &                      &                      &                     \\
\midrule
single           &  0.4058 (+-0.005928) &   0.3593 (+-0.02164) &   0.4689 (+-0.02328) &  0.3447 (+-0.01582) \\
fusion-mean      &                  NaN &   0.3872 (+-0.04168) &   0.5238 (+-0.03014) &  0.3886 (+-0.01361) \\
fusion-mean-post &                  NaN &   0.4109 (+-0.02709) &   0.5277 (+-0.01544) &  0.4026 (+-0.01968) \\
dd-mean          &  0.4195 (+-0.002568) &  0.3703 (+-0.002615) &  0.5087 (+-0.007348) &                 NaN \\
dd-mean-tanh     &  0.4183 (+-0.007395) &  0.3676 (+-0.007537) &   0.5057 (+-0.01181) &                 NaN \\
dd-mean-std      &   0.4181 (+-0.01746) &  0.3852 (+-0.009509) &        0.517 (+-nan) &                 NaN \\
dd-mean-std-tanh &   0.4078 (