In [96]:
from result_utils import *

In [None]:
import pandas as pd   
import re

def parse_run_name(df):
    def parse_row(run_name):
        if 'pretraining' in run_name and 'wikipedia' in run_name:
            stage = 'pretrain'
        elif 'pretraining' in run_name and any(d in run_name for d in ['arxiv', 'hyperpartisan', 'imdb']):
            stage = 'tapt'
        elif 'finetuning' in run_name:
            stage = 'finetune'
        else:
            stage = 'unknown'
        
        for dataset in ['wikipedia', 'arxiv', 'hyperpartisan', 'imdb']:
            if dataset in run_name:
                break
        else:
            dataset = 'unknown'
        
        if '_mha' in run_name:
            arch = 'mha'
        elif match := re.search(r'favor_nb([\d.]+)', run_name):
            arch = f'favor_nb{match.group(1)}'
        elif match := re.search(r'lsh_h(\d+)_c(\d+)', run_name):
            arch = f'lsh_h{match.group(1)}_c{match.group(2)}'
        else:
            arch = 'unknown'
        
        return pd.Series({'stage': stage, 'dataset': dataset, 'arch': arch})
    
    parsed = df['run_name'].apply(parse_row)
    return pd.concat([df, parsed], axis=1)


def merge_stages(df_pretrain: pd.DataFrame, 
                 df_tapt: pd.DataFrame, 
                 df_finetune: pd.DataFrame) -> pd.DataFrame:
    """
    Łączy 3 tabele etapów:
    1. pretrain + tapt -> join po 'arch'
    2. wynik + finetune -> join po ['dataset', 'arch']
    """
    pretrain = df_pretrain.rename(columns={
        col: f'pretrain/{col}' 
        for col in df_pretrain.columns 
        if col not in ['dataset', 'arch']
    })
    
    tapt = df_tapt.rename(columns={
        col: f'tapt/{col}' 
        for col in df_tapt.columns 
        if col not in ['dataset', 'arch']
    })
    
    finetune = df_finetune.rename(columns={
        col: f'finetune/{col}' 
        for col in df_finetune.columns 
        if col not in ['dataset', 'arch']
    })
    
    pretrain_cols = [c for c in pretrain.columns if c != 'dataset']
    result = tapt.merge(pretrain[pretrain_cols], on='arch', how='left')
    
    result = result.merge(finetune, on=['dataset', 'arch'], how='outer')
    
    return result


def calculate_relative_metrics(df: pd.DataFrame) -> pd.DataFrame:
    """
    Oblicza:
    1. Zmianę procentową względem MHA (duration, RAM) w obrębie datasetu
    2. Różnicę w punktach procentowych F1 względem baseline w obrębie datasetu
    3. Różnicę w punktach procentowych F1 względem MHA w obrębie datasetu
    """
    result = df.copy()
    
    pct_cols = ['finetune_tapt/train/gpu_mem_peak_gb', 'finetune_tapt/duration_min']
    
    f1_col = 'finetune/test/f1_macro'
    
    for dataset in df['dataset'].unique():
        mask = df['dataset'] == dataset
        
        mha_mask = mask & (df['arch'] == 'mha')
        if mha_mask.sum() > 0:
            for col in pct_cols:
                mha_val = df.loc[mha_mask, col].values[0]
                if mha_val != 0:
                    result.loc[mask, f'{col}_pct_vs_mha'] = ((df.loc[mask, col] - mha_val) / mha_val) * 100
            
            mha_f1 = df.loc[mha_mask, f1_col].values[0]
            result.loc[mask, f'{f1_col}_pp_vs_mha'] = (df.loc[mask, f1_col] - mha_f1)
        
        baseline_mask = mask & (df['arch'] == 'baseline')
        if baseline_mask.sum() > 0:
            baseline_f1 = df.loc[baseline_mask, f1_col].values[0]
            result.loc[mask, f'{f1_col}_pp_vs_baseline'] = (df.loc[mask, f1_col] - baseline_f1) 
    
    return result

def generate_latex_table_simple(
    df: pd.DataFrame,
    metrics: list[dict],
    filter_col: str = None,
    filter_val: str = None,
) -> str:
    """Prosta tabela - metryki jako kolumny (bez podziału na datasety)."""
    
    arch_config = {
        'mha': {'label': r'\textit{SDPA}', 'group': None},
        'lsh_h2_c64': {'label': r'$N_h{=}2$, $C{=}64$', 'group': 'LSH'},
        'lsh_h2_c128': {'label': r'$N_h{=}2$, $C{=}128$', 'group': 'LSH'},
        'lsh_h4_c64': {'label': r'$N_h{=}4$, $C{=}64$', 'group': 'LSH'},
        'lsh_h4_c128': {'label': r'$N_h{=}4$, $C{=}128$', 'group': 'LSH'},
        'favor_nb0.125': {'label': r'$N_f{=}0.125$', 'group': 'FAVOR+'},
        'favor_nb0.25': {'label': r'$N_f{=}0.25$', 'group': 'FAVOR+'},
        'favor_nb0.5': {'label': r'$N_f{=}0.5$', 'group': 'FAVOR+'},
        'favor_nb1': {'label': r'$N_f{=}1.0$', 'group': 'FAVOR+'},
    }
    
    arch_order = ['mha', 'lsh_h2_c64', 'lsh_h2_c128', 'lsh_h4_c64', 'lsh_h4_c128',
                  'favor_nb0.125', 'favor_nb0.25', 'favor_nb0.5', 'favor_nb1']
    
    data = df
    if filter_col and filter_val:
        data = df[df[filter_col] == filter_val]
    
    n_metrics = len(metrics)
    n_cols = n_metrics + 1
    
    lines = []
    lines.append(r'\begin{tabular}{@{}l' + 'c' * n_metrics + '@{}}')
    lines.append(r'\toprule')
    
    metric_header = ' & '.join([f'\\textbf{{{m["label"]}}}' for m in metrics])
    lines.append(f'\\textbf{{Konfiguracja}} & {metric_header} \\\\')
    lines.append(r'\midrule')
    
    def get_value(arch, metric_cfg):
        row = data[data['arch'] == arch]
        if row.empty:
            return '-'
        val = row[metric_cfg['col']].values[0]
        if pd.isna(val):
            return '-'
        suffix = metric_cfg.get('suffix', '')
        return metric_cfg['fmt'].format(val) + suffix
    
    current_group = None
    for arch in arch_order:
        if arch not in arch_config:
            continue
        cfg = arch_config[arch]
        
        if cfg['group'] != current_group:
            if cfg['group'] is not None:
                lines.append(r'\midrule')
                lines.append(f'\\multicolumn{{{n_cols}}}{{l}}{{\\textit{{{cfg["group"]}}}}} \\\\')
            current_group = cfg['group']
        
        values = [get_value(arch, m) for m in metrics]
        lines.append(f'{cfg["label"]} & {" & ".join(values)} \\\\')
    
    lines.append(r'\bottomrule')
    lines.append(r'\end{tabular}')
    
    return '\n'.join(lines)


def generate_latex_table(
    df: pd.DataFrame,
    metrics: list[dict], 
    datasets: list[str] = ['imdb', 'hyperpartisan', 'arxiv'],
    dataset_labels: dict = None,
    show_baseline: bool = False,
    show_sdpa: bool = False,
    combined_cell: dict = None,
    extra_col: dict = None,
) -> str:
    """
    Tabela z metrykami jako kolumny nadrzędne, datasetami jako podkolumny.
    """
    
    dataset_labels = dataset_labels or {
        'imdb': 'IMDB', 
        'hyperpartisan': 'Hyper.', 
        'arxiv': 'Arxiv'
    }
    
    arch_config = {
        'mha': {'label': r'\textit{SDPA}', 'group': None},
        'lsh_h2_c64': {'label': r'$N_h{=}2$, $C{=}64$', 'group': 'LSH'},
        'lsh_h2_c128': {'label': r'$N_h{=}2$, $C{=}128$', 'group': 'LSH'},
        'lsh_h4_c64': {'label': r'$N_h{=}4$, $C{=}64$', 'group': 'LSH'},
        'lsh_h4_c128': {'label': r'$N_h{=}4$, $C{=}128$', 'group': 'LSH'},
        'favor_nb0.125': {'label': r'$N_f{=}0.125$', 'group': 'FAVOR+'},
        'favor_nb0.25': {'label': r'$N_f{=}0.25$', 'group': 'FAVOR+'},
        'favor_nb0.5': {'label': r'$N_f{=}0.5$', 'group': 'FAVOR+'},
        'favor_nb1': {'label': r'$N_f{=}1.0$', 'group': 'FAVOR+'},
    }
    
    arch_order = ['mha', 'lsh_h2_c64', 'lsh_h2_c128', 'lsh_h4_c64', 'lsh_h4_c128',
                  'favor_nb0.125', 'favor_nb0.25', 'favor_nb0.5', 'favor_nb1']
    
    arch_order_no_mha = ['lsh_h2_c64', 'lsh_h2_c128', 'lsh_h4_c64', 'lsh_h4_c128',
                         'favor_nb0.125', 'favor_nb0.25', 'favor_nb0.5', 'favor_nb1']
    
    n_metrics = len(metrics)
    n_datasets = len(datasets)
    
    has_extra = extra_col is not None
    extra_cols = 1 if has_extra else 0
    
    n_cols = n_metrics * n_datasets + 1 + extra_cols
    
    lines = []
    lines.append(r'\begin{tabular}{@{}l' + 'c' * (n_cols - 1) + '@{}}')
    lines.append(r'\toprule')
    
    if has_extra:
        metric_header = ' & '.join([
            f'\\multicolumn{{{n_datasets}}}{{c}}{{\\textbf{{{m["label"]}}}}}'
            for m in metrics
        ])
        lines.append(f'& \\textbf{{{extra_col["label"]}}} & {metric_header} \\\\')
        
        lines.append(r'\cmidrule(lr){2-2}')
        for i, _ in enumerate(metrics):
            start = 3 + i * n_datasets
            end = start + n_datasets - 1
            lines.append(f'\\cmidrule(lr){{{start}-{end}}}')
        
        dataset_header = ' & '.join([f'\\textbf{{{dataset_labels[d]}}}' for d in datasets] * n_metrics)
        lines.append(f'\\textbf{{Model}} & \\textbf{{Wikipedia}} & {dataset_header} \\\\')
    else:
        if n_metrics > 1:
            metric_header = ' & '.join([
                f'\\multicolumn{{{n_datasets}}}{{c}}{{\\textbf{{{m["label"]}}}}}'
                for m in metrics
            ])
            lines.append(f'& {metric_header} \\\\')
            
            for i, _ in enumerate(metrics):
                start = 2 + i * n_datasets
                end = start + n_datasets - 1
                lines.append(f'\\cmidrule(lr){{{start}-{end}}}')
            
            dataset_header = ' & '.join([f'\\textbf{{{dataset_labels[d]}}}' for d in datasets] * n_metrics)
            lines.append(f'\\textbf{{Model}} & {dataset_header} \\\\')
        else:

            dataset_header = ' & '.join([f'\\textbf{{{dataset_labels[d]}}}' for d in datasets])
            lines.append(f'\\textbf{{Model}} & {dataset_header} \\\\')
    
    lines.append(r'\midrule')
    
    def get_val(arch, dataset, col):
        row = df[(df['arch'] == arch) & (df['dataset'] == dataset)]
        if row.empty:
            return None
        return row[col].values[0]
    
    if show_baseline:
        baseline_cells = []
        if has_extra:
            baseline_cells.append('-')
        for m in metrics:
            for d in datasets:
                val = get_val('baseline', d, m['col'])
                if val is not None and not pd.isna(val):
                    suffix = m.get('suffix', '')
                    baseline_cells.append(m['fmt'].format(val) + suffix)
                else:
                    baseline_cells.append('-')
        lines.append(f'\\textit{{TF-IDF + LR}} & {" & ".join(baseline_cells)} \\\\')
        lines.append(r'\midrule')
    
    if show_sdpa:
        sdpa_cells = []
        if has_extra:
            sdpa_cells.append('-')
        for m in metrics:
            for d in datasets:
                val = get_val('mha', d, m['col'])
                diff = get_val('mha', d, m['col'] + '_pp_vs_baseline')
                if val is not None and not pd.isna(val):
                    suffix = m.get('suffix', '')
                    cell = m['fmt'].format(val) + suffix
                    if diff is not None and not pd.isna(diff):
                        cell += f' ({diff:+.1f})'
                    sdpa_cells.append(cell)
                else:
                    sdpa_cells.append('-')
        lines.append(f'\\textit{{SDPA}} (vs TF-IDF+LR) & {" & ".join(sdpa_cells)} \\\\')
    
    use_arch_order = arch_order_no_mha if show_sdpa else arch_order
    
    current_group = None
    for arch in use_arch_order:
        if arch not in arch_config:
            continue
        
        cfg = arch_config[arch]
        
        if cfg['group'] != current_group:
            if cfg['group'] is not None:
                lines.append(r'\midrule')
                if combined_cell:
                    group_label = f'{cfg["group"]} (vs SDPA / vs TF-IDF+LR)'
                else:
                    group_label = cfg['group']
                lines.append(f'\\multicolumn{{{n_cols}}}{{l}}{{\\textit{{{group_label}}}}} \\\\')
            current_group = cfg['group']
        
        values = []
        
        if has_extra:
            extra_m = extra_col['metrics'][0]
            val = get_val(arch, datasets[0], extra_m['col'])
            if val is not None and not pd.isna(val):
                suffix = extra_m.get('suffix', '')
                values.append(extra_m['fmt'].format(val) + suffix)
            else:
                values.append('-')
        
        if combined_cell:
            for d in datasets:
                parts = []
                for col in combined_cell['cols']:
                    val = get_val(arch, d, col)
                    if val is not None and not pd.isna(val):
                        parts.append(combined_cell['fmt'].format(val))
                    else:
                        parts.append('-')
                values.append(combined_cell['sep'].join(parts))
        else:
            for m in metrics:
                for d in datasets:
                    val = get_val(arch, d, m['col'])
                    if val is not None and not pd.isna(val):
                        suffix = m.get('suffix', '')
                        values.append(m['fmt'].format(val) + suffix)
                    else:
                        values.append('-')
        
        lines.append(f'{cfg["label"]} & {" & ".join(values)} \\\\')
    
    lines.append(r'\bottomrule')
    lines.append(r'\end{tabular}')
    
    return '\n'.join(lines)


In [98]:
all_experiments = get_experiments_dict()

In [99]:
all_experiments

{'hd228t3k': 'E1_pretraining_wikipedia_bertsmall_mha',
 'ghydwtq8': 'E1_pretraining_imdb_bertsmall_mha',
 'gnty8r83': 'E1_finetuning_imdb_bertsmall_mha_f0_d0.1_cls',
 'dz8ut8xo': 'E1_finetuning_imdb_bertsmall_mha_f0_d0.1_mean',
 'xzv6zdrh': 'E1_finetuning_imdb_bertsmall_mha_f0_d0.2_cls',
 'izheqn5h': 'E1_finetuning_imdb_bertsmall_mha_f0_d0.2_mean',
 'y6w5l42k': 'E1_finetuning_imdb_bertsmall_mha_f1_d0.1_cls',
 'dtucpxrs': 'E1_finetuning_imdb_bertsmall_mha_f1_d0.1_mean',
 'gkglmyzr': 'E1_finetuning_imdb_bertsmall_mha_f1_d0.2_cls',
 'haph0z0a': 'E1_finetuning_imdb_bertsmall_mha_f1_d0.2_mean',
 'dqnki4qe': 'E1_finetuning_imdb_bertsmall_mha_f2_d0.1_cls',
 'sj6wkwyl': 'E1_finetuning_imdb_bertsmall_mha_f2_d0.1_mean',
 'd8wyaq7o': 'E1_finetuning_imdb_bertsmall_mha_f2_d0.2_cls',
 'l2jq2x7g': 'E1_finetuning_imdb_bertsmall_mha_f2_d0.2_mean',
 'z28fmtgo': 'E2_pretraining_wikipedia_bertsmall_favor_nb0.125',
 '0itox8yh': 'E2_pretraining_wikipedia_bertsmall_favor_nb0.25',
 '0e2zxgor': 'E2_pretraining

# main_result_tables

In [100]:
main_results_runs = {
 'hd228t3k': 'E1_pretraining_wikipedia_bertsmall_mha',
 'ghydwtq8': 'E1_pretraining_imdb_bertsmall_mha',
 'l2jq2x7g': 'E1_finetuning_imdb_bertsmall_mha_f2_d0.2_mean',
 'z28fmtgo': 'E2_pretraining_wikipedia_bertsmall_favor_nb0.125',
 '0itox8yh': 'E2_pretraining_wikipedia_bertsmall_favor_nb0.25',
 '0e2zxgor': 'E2_pretraining_wikipedia_bertsmall_lsh_h4_c128',
 'yjcapkj3': 'E2_pretraining_wikipedia_bertsmall_favor_nb0.5',
 'wi223o9o': 'E2_pretraining_wikipedia_bertsmall_lsh_h4_c64',
 'noug0x9s': 'E2_pretraining_wikipedia_bertsmall_favor_nb1',
 'mzlsnnzy': 'E2_pretraining_imdb_bertsmall_favor_nb0.125',
 '1qxi1tf7': 'E2_pretraining_imdb_bertsmall_favor_nb0.25',
 '1crsv1ae': 'E2_pretraining_imdb_bertsmall_favor_nb0.5',
 'eudij3yf': 'E2_pretraining_wikipedia_bertsmall_lsh_h2_c64',
 'hu06m597': 'E2_pretraining_wikipedia_bertsmall_lsh_h2_c128',
 'd659uj2j': 'E2_finetuning_imdb_bertsmall_favor_nb0.125',
 'if2ozrk2': 'E2_finetuning_imdb_bertsmall_favor_nb0.25',
 'hhoyoxfp': 'E2_finetuning_imdb_bertsmall_favor_nb0.5',
 'jucxpr34': 'E2_pretraining_imdb_bertsmall_favor_nb1',
 'u44pgkly': 'E2_finetuning_imdb_bertsmall_favor_nb1',
 'a16qafd4': 'E2_pretraining_imdb_bertsmall_lsh_h2_c64',
 'o031jp0p': 'E2_pretraining_imdb_bertsmall_lsh_h2_c128',
 '9fnn58aq': 'E2_pretraining_imdb_bertsmall_lsh_h4_c128',
 '2fpocsb8': 'E2_pretraining_imdb_bertsmall_lsh_h4_c64',
 'tq5q132s': 'E2_finetuning_imdb_bertsmall_lsh_h2_c128',
 'j2x1q39c': 'E2_finetuning_imdb_bertsmall_lsh_h2_c64',
 's1c2rnaf': 'E2_finetuning_imdb_bertsmall_lsh_h4_c128',
 'u262vax9': 'E2_finetuning_imdb_bertsmall_lsh_h4_c64',
 'jibomvsj': 'E1_pretraining_arxiv_bertsmall_mha',
 'tzihl99m': 'E2_pretraining_hyperpartisan_bertsmall_favor_nb0.125',
 'vi5hkih4': 'E1_pretraining_hyperpartisan_bertsmall_mha',
 '71eagzdx': 'E2_pretraining_hyperpartisan_bertsmall_favor_nb0.25',
 'pu4bht10': 'E2_pretraining_hyperpartisan_bertsmall_favor_nb0.5',
 'vyeowm82': 'E2_pretraining_hyperpartisan_bertsmall_favor_nb1',
 '2g7hhgqh': 'E2_pretraining_hyperpartisan_bertsmall_lsh_h2_c64',
 'i1bylyot': 'E2_pretraining_hyperpartisan_bertsmall_lsh_h2_c128',
 'm6psyu6o': 'E2_pretraining_hyperpartisan_bertsmall_lsh_h4_c64',
 'o9riawb7': 'E2_pretraining_hyperpartisan_bertsmall_lsh_h4_c128',
 'kb5ewjpp': 'E2_pretraining_arxiv_bertsmall_lsh_h2_c64',
 '4h910kzj': 'E2_pretraining_arxiv_bertsmall_lsh_h2_c128',
 '9hb1xk49': 'E2_pretraining_arxiv_bertsmall_lsh_h4_c64',
 '7olzfob3': 'E2_pretraining_arxiv_bertsmall_lsh_h4_c128',
 't67wmtc1': 'E2_pretraining_arxiv_bertsmall_favor_nb0.125',
 'aplguq3f': 'E2_pretraining_arxiv_bertsmall_favor_nb0.25',
 'k9km9nuj': 'E2_pretraining_arxiv_bertsmall_favor_nb0.5',
 'p0sm9w3v': 'E2_pretraining_arxiv_bertsmall_favor_nb1',
 'ulsu61q4': 'E1_finetuning_hyperpartisan_bertsmall_mha_f1_d0.1_mean',
 'xhbujbys': 'E2_finetuning_hyperpartisan_bertsmall_favor_nb0.125',
 '3klrqv94': 'E2_finetuning_hyperpartisan_bertsmall_favor_nb0.25',
 '3jtkh8iw': 'E2_finetuning_hyperpartisan_bertsmall_favor_nb0.5',
 'k2j8xkd0': 'E2_finetuning_hyperpartisan_bertsmall_favor_nb1',
 'ys8qnftm': 'E2_finetuning_hyperpartisan_bertsmall_lsh_h2_c64',
 'yjqias1k': 'E2_finetuning_hyperpartisan_bertsmall_lsh_h2_c128',
 '16tul7l4': 'E2_finetuning_hyperpartisan_bertsmall_lsh_h4_c64',
 'ied1ulsn': 'E1_finetuning_arxiv_bertsmall_mha_f1_d0.2_mean',
 'gyaw279z': 'E2_finetuning_hyperpartisan_bertsmall_lsh_h4_c128',
 'fs5plm2w': 'E2_finetuning_arxiv_bertsmall_favor_nb0.125',
 'xf2puymg': 'E2_finetuning_arxiv_bertsmall_lsh_h2_c128',
 'tug4ff1q': 'E2_finetuning_arxiv_bertsmall_favor_nb0.25',
 'ebmfez6l': 'E2_finetuning_arxiv_bertsmall_lsh_h2_c64',
 '3ksmzwr0': 'E2_finetuning_arxiv_bertsmall_favor_nb0.5',
 'wqah0rrq': 'E2_finetuning_arxiv_bertsmall_lsh_h4_c128',
 '0zr9387l': 'E2_finetuning_arxiv_bertsmall_favor_nb1',
 'v0prpm6r': 'E2_finetuning_arxiv_bertsmall_lsh_h4_c64'}

In [101]:
ids = []
names = []

for run_id, name in main_results_runs.items():
    if  'pretraining' in name:
        ids.append(run_id)
        names.append(name)
        print(name, run_id)



E1_pretraining_wikipedia_bertsmall_mha hd228t3k
E1_pretraining_imdb_bertsmall_mha ghydwtq8
E2_pretraining_wikipedia_bertsmall_favor_nb0.125 z28fmtgo
E2_pretraining_wikipedia_bertsmall_favor_nb0.25 0itox8yh
E2_pretraining_wikipedia_bertsmall_lsh_h4_c128 0e2zxgor
E2_pretraining_wikipedia_bertsmall_favor_nb0.5 yjcapkj3
E2_pretraining_wikipedia_bertsmall_lsh_h4_c64 wi223o9o
E2_pretraining_wikipedia_bertsmall_favor_nb1 noug0x9s
E2_pretraining_imdb_bertsmall_favor_nb0.125 mzlsnnzy
E2_pretraining_imdb_bertsmall_favor_nb0.25 1qxi1tf7
E2_pretraining_imdb_bertsmall_favor_nb0.5 1crsv1ae
E2_pretraining_wikipedia_bertsmall_lsh_h2_c64 eudij3yf
E2_pretraining_wikipedia_bertsmall_lsh_h2_c128 hu06m597
E2_pretraining_imdb_bertsmall_favor_nb1 jucxpr34
E2_pretraining_imdb_bertsmall_lsh_h2_c64 a16qafd4
E2_pretraining_imdb_bertsmall_lsh_h2_c128 o031jp0p
E2_pretraining_imdb_bertsmall_lsh_h4_c128 9fnn58aq
E2_pretraining_imdb_bertsmall_lsh_h4_c64 2fpocsb8
E1_pretraining_arxiv_bertsmall_mha jibomvsj
E2_pretrain

In [102]:
df_pretrain = parse_run_name(build_metric_runtime_df(ids, names, [
    {'metric_name': 'avg_epoch_loss', 'metric_scope': 'train', 'agg': 'min'},
    {'metric_name': 'gpu_mem_peak_mb', 'metric_scope': 'train', 'agg': 'max'}
]))

In [103]:
df_pretrain

Unnamed: 0,run_id,run_name,duration_s,avg_epoch_time_s,train/avg_epoch_loss,train/gpu_mem_peak_mb,stage,dataset,arch
0,hd228t3k,E1_pretraining_wikipedia_bertsmall_mha,4728.551459,472.855146,2.156817,14785.845215,pretrain,wikipedia,mha
1,ghydwtq8,E1_pretraining_imdb_bertsmall_mha,1113.179355,74.211957,2.387619,14784.345215,tapt,imdb,mha
2,z28fmtgo,E2_pretraining_wikipedia_bertsmall_favor_nb0.125,6593.419117,659.341912,3.069484,17017.976074,pretrain,wikipedia,favor_nb0.125
3,0itox8yh,E2_pretraining_wikipedia_bertsmall_favor_nb0.25,7271.608264,727.160826,2.694388,18842.976074,pretrain,wikipedia,favor_nb0.25
4,0e2zxgor,E2_pretraining_wikipedia_bertsmall_lsh_h4_c128,17036.51155,1703.651155,2.248216,29574.151855,pretrain,wikipedia,lsh_h4_c128
5,yjcapkj3,E2_pretraining_wikipedia_bertsmall_favor_nb0.5,9173.221722,917.322172,2.666425,22493.976074,pretrain,wikipedia,favor_nb0.5
6,wi223o9o,E2_pretraining_wikipedia_bertsmall_lsh_h4_c64,10765.664537,1076.566454,2.285827,24964.151855,pretrain,wikipedia,lsh_h4_c64
7,noug0x9s,E2_pretraining_wikipedia_bertsmall_favor_nb1,13037.930545,1303.793054,2.578758,29795.487793,pretrain,wikipedia,favor_nb1
8,mzlsnnzy,E2_pretraining_imdb_bertsmall_favor_nb0.125,1365.71701,91.047801,3.199574,17015.669434,tapt,imdb,favor_nb0.125
9,1qxi1tf7,E2_pretraining_imdb_bertsmall_favor_nb0.25,1540.193664,102.679578,2.865847,18840.782715,tapt,imdb,favor_nb0.25


In [104]:
ids = []
names = []

for run_id, name in main_results_runs.items():
    if  'pretraining' not in name:
        ids.append(run_id)
        names.append(name)
        print(name, run_id)



E1_finetuning_imdb_bertsmall_mha_f2_d0.2_mean l2jq2x7g
E2_finetuning_imdb_bertsmall_favor_nb0.125 d659uj2j
E2_finetuning_imdb_bertsmall_favor_nb0.25 if2ozrk2
E2_finetuning_imdb_bertsmall_favor_nb0.5 hhoyoxfp
E2_finetuning_imdb_bertsmall_favor_nb1 u44pgkly
E2_finetuning_imdb_bertsmall_lsh_h2_c128 tq5q132s
E2_finetuning_imdb_bertsmall_lsh_h2_c64 j2x1q39c
E2_finetuning_imdb_bertsmall_lsh_h4_c128 s1c2rnaf
E2_finetuning_imdb_bertsmall_lsh_h4_c64 u262vax9
E1_finetuning_hyperpartisan_bertsmall_mha_f1_d0.1_mean ulsu61q4
E2_finetuning_hyperpartisan_bertsmall_favor_nb0.125 xhbujbys
E2_finetuning_hyperpartisan_bertsmall_favor_nb0.25 3klrqv94
E2_finetuning_hyperpartisan_bertsmall_favor_nb0.5 3jtkh8iw
E2_finetuning_hyperpartisan_bertsmall_favor_nb1 k2j8xkd0
E2_finetuning_hyperpartisan_bertsmall_lsh_h2_c64 ys8qnftm
E2_finetuning_hyperpartisan_bertsmall_lsh_h2_c128 yjqias1k
E2_finetuning_hyperpartisan_bertsmall_lsh_h4_c64 16tul7l4
E1_finetuning_arxiv_bertsmall_mha_f1_d0.2_mean ied1ulsn
E2_finetuning_

In [105]:
df_finetune = parse_run_name(build_metric_runtime_df(ids, names, [
    {'metric_name': 'f1_macro', 'metric_scope': 'test', 'agg': 'at_index',"select_at": {
            "metric_name": "f1_macro",
            "metric_scope": "eval",
            "agg": "idxmax",
        } },
    {'metric_name': 'gpu_mem_peak_mb', 'metric_scope': 'train', 'agg': 'max'}
]))

In [106]:
df_finetune

Unnamed: 0,run_id,run_name,duration_s,avg_epoch_time_s,test/f1_macro,train/gpu_mem_peak_mb,stage,dataset,arch
0,l2jq2x7g,E1_finetuning_imdb_bertsmall_mha_f2_d0.2_mean,309.168199,38.646025,0.928599,3399.745117,finetune,imdb,mha
1,d659uj2j,E2_finetuning_imdb_bertsmall_favor_nb0.125,452.629664,56.578708,0.911595,5633.739258,finetune,imdb,favor_nb0.125
2,if2ozrk2,E2_finetuning_imdb_bertsmall_favor_nb0.25,535.890604,66.986325,0.909774,7456.000977,finetune,imdb,favor_nb0.25
3,hhoyoxfp,E2_finetuning_imdb_bertsmall_favor_nb0.5,728.522018,91.065252,0.916397,11106.000977,finetune,imdb,favor_nb0.5
4,u44pgkly,E2_finetuning_imdb_bertsmall_favor_nb1,1232.003859,154.000482,0.917398,18846.411133,finetune,imdb,favor_nb1
5,tq5q132s,E2_finetuning_imdb_bertsmall_lsh_h2_c128,790.832176,98.854022,0.926597,12042.296387,finetune,imdb,lsh_h2_c128
6,j2x1q39c,E2_finetuning_imdb_bertsmall_lsh_h2_c64,646.792933,80.849117,0.9278,9258.34668,finetune,imdb,lsh_h2_c64
7,s1c2rnaf,E2_finetuning_imdb_bertsmall_lsh_h4_c128,1298.198816,162.274852,0.9298,21841.253418,finetune,imdb,lsh_h4_c128
8,u262vax9,E2_finetuning_imdb_bertsmall_lsh_h4_c64,1020.584711,127.573089,0.925,16270.889648,finetune,imdb,lsh_h4_c64
9,ulsu61q4,E1_finetuning_hyperpartisan_bertsmall_mha_f1_d...,4338.320566,619.760081,0.645501,3518.334961,finetune,hyperpartisan,mha


In [107]:
df_tapt = df_pretrain.loc[df_pretrain['stage'] == 'tapt']
df_pretrain = df_pretrain.loc[df_pretrain['stage'] == 'pretrain']

In [108]:
merged = merge_stages(
    df_pretrain,
    df_tapt,
    df_finetune
)

In [109]:
merged

Unnamed: 0,tapt/run_id,tapt/run_name,tapt/duration_s,tapt/avg_epoch_time_s,tapt/train/avg_epoch_loss,tapt/train/gpu_mem_peak_mb,tapt/stage,dataset,arch,pretrain/run_id,...,pretrain/train/avg_epoch_loss,pretrain/train/gpu_mem_peak_mb,pretrain/stage,finetune/run_id,finetune/run_name,finetune/duration_s,finetune/avg_epoch_time_s,finetune/test/f1_macro,finetune/train/gpu_mem_peak_mb,finetune/stage
0,t67wmtc1,E2_pretraining_arxiv_bertsmall_favor_nb0.125,4140.682855,2070.341427,5.032768,16991.537598,tapt,arxiv,favor_nb0.125,z28fmtgo,...,3.069484,17017.976074,pretrain,fs5plm2w,E2_finetuning_arxiv_bertsmall_favor_nb0.125,5314.20782,1328.551955,0.860208,5725.700195,finetune
1,aplguq3f,E2_pretraining_arxiv_bertsmall_favor_nb0.25,4649.542211,2324.771106,4.796945,18783.696777,tapt,arxiv,favor_nb0.25,0itox8yh,...,2.694388,18842.976074,pretrain,tug4ff1q,E2_finetuning_arxiv_bertsmall_favor_nb0.25,6291.965838,1572.991459,0.865219,7517.092773,finetune
2,k9km9nuj,E2_pretraining_arxiv_bertsmall_favor_nb0.5,5686.842252,2843.421126,4.63172,22371.815918,tapt,arxiv,favor_nb0.5,yjcapkj3,...,2.666425,22493.976074,pretrain,3ksmzwr0,E2_finetuning_arxiv_bertsmall_favor_nb0.5,8319.524132,2079.881033,0.865297,11104.350586,finetune
3,p0sm9w3v,E2_pretraining_arxiv_bertsmall_favor_nb1,7790.235805,3895.117902,4.316468,29547.308105,tapt,arxiv,favor_nb1,noug0x9s,...,2.578758,29795.487793,pretrain,0zr9387l,E2_finetuning_arxiv_bertsmall_favor_nb1,12386.017915,3096.504479,0.855688,18783.64502,finetune
4,4h910kzj,E2_pretraining_arxiv_bertsmall_lsh_h2_c128,5230.441202,2615.220601,2.223732,21855.970215,tapt,arxiv,lsh_h2_c128,hu06m597,...,2.27783,21848.345215,pretrain,xf2puymg,E2_finetuning_arxiv_bertsmall_lsh_h2_c128,7525.100944,1881.275236,0.872998,12174.626465,finetune
5,kb5ewjpp,E2_pretraining_arxiv_bertsmall_lsh_h2_c64,4578.586053,2289.293026,2.299913,19551.526855,tapt,arxiv,lsh_h2_c64,eudij3yf,...,2.344709,19543.765137,pretrain,ebmfez6l,E2_finetuning_arxiv_bertsmall_lsh_h2_c64,6224.091129,1556.022782,0.868965,9389.609375,finetune
6,7olzfob3,E2_pretraining_arxiv_bertsmall_lsh_h4_c128,7900.804413,3950.402206,2.023962,29580.39502,tapt,arxiv,lsh_h4_c128,0e2zxgor,...,2.248216,29574.151855,pretrain,wqah0rrq,E2_finetuning_arxiv_bertsmall_lsh_h4_c128,12944.44154,3236.110385,0.874043,21973.819824,finetune
7,9hb1xk49,E2_pretraining_arxiv_bertsmall_lsh_h4_c64,6600.884504,3300.442252,2.072415,24974.400879,tapt,arxiv,lsh_h4_c64,wi223o9o,...,2.285827,24964.151855,pretrain,v0prpm6r,E2_finetuning_arxiv_bertsmall_lsh_h4_c64,10335.92594,2583.981485,0.866779,16404.60498,finetune
8,jibomvsj,E1_pretraining_arxiv_bertsmall_mha,8411.599251,4205.799625,1.672165,14793.368652,tapt,arxiv,mha,hd228t3k,...,2.156817,14785.845215,pretrain,ied1ulsn,E1_finetuning_arxiv_bertsmall_mha_f1_d0.2_mean,13235.081686,3308.770421,0.883798,3524.024414,finetune
9,tzihl99m,E2_pretraining_hyperpartisan_bertsmall_favor_n...,5584.716981,930.786164,4.505261,16990.050293,tapt,hyperpartisan,favor_nb0.125,z28fmtgo,...,3.069484,17017.976074,pretrain,xhbujbys,E2_finetuning_hyperpartisan_bertsmall_favor_nb...,3936.268652,562.324093,0.534071,5727.854492,finetune


In [110]:
baseline_data = pd.DataFrame({
    'dataset': ['hyperpartisan', 'imdb', 'arxiv'],
    'arch': ['baseline', 'baseline', 'baseline'],
    'finetune/test/f1_macro': [0.4223, 0.895, 0.8362] 
})

for col in merged.columns:
    if col not in baseline_data.columns:
        baseline_data[col] = 0

merged = pd.concat([merged, baseline_data], ignore_index=True)

In [111]:
merged

Unnamed: 0,tapt/run_id,tapt/run_name,tapt/duration_s,tapt/avg_epoch_time_s,tapt/train/avg_epoch_loss,tapt/train/gpu_mem_peak_mb,tapt/stage,dataset,arch,pretrain/run_id,...,pretrain/train/avg_epoch_loss,pretrain/train/gpu_mem_peak_mb,pretrain/stage,finetune/run_id,finetune/run_name,finetune/duration_s,finetune/avg_epoch_time_s,finetune/test/f1_macro,finetune/train/gpu_mem_peak_mb,finetune/stage
0,t67wmtc1,E2_pretraining_arxiv_bertsmall_favor_nb0.125,4140.682855,2070.341427,5.032768,16991.537598,tapt,arxiv,favor_nb0.125,z28fmtgo,...,3.069484,17017.976074,pretrain,fs5plm2w,E2_finetuning_arxiv_bertsmall_favor_nb0.125,5314.20782,1328.551955,0.860208,5725.700195,finetune
1,aplguq3f,E2_pretraining_arxiv_bertsmall_favor_nb0.25,4649.542211,2324.771106,4.796945,18783.696777,tapt,arxiv,favor_nb0.25,0itox8yh,...,2.694388,18842.976074,pretrain,tug4ff1q,E2_finetuning_arxiv_bertsmall_favor_nb0.25,6291.965838,1572.991459,0.865219,7517.092773,finetune
2,k9km9nuj,E2_pretraining_arxiv_bertsmall_favor_nb0.5,5686.842252,2843.421126,4.63172,22371.815918,tapt,arxiv,favor_nb0.5,yjcapkj3,...,2.666425,22493.976074,pretrain,3ksmzwr0,E2_finetuning_arxiv_bertsmall_favor_nb0.5,8319.524132,2079.881033,0.865297,11104.350586,finetune
3,p0sm9w3v,E2_pretraining_arxiv_bertsmall_favor_nb1,7790.235805,3895.117902,4.316468,29547.308105,tapt,arxiv,favor_nb1,noug0x9s,...,2.578758,29795.487793,pretrain,0zr9387l,E2_finetuning_arxiv_bertsmall_favor_nb1,12386.017915,3096.504479,0.855688,18783.64502,finetune
4,4h910kzj,E2_pretraining_arxiv_bertsmall_lsh_h2_c128,5230.441202,2615.220601,2.223732,21855.970215,tapt,arxiv,lsh_h2_c128,hu06m597,...,2.27783,21848.345215,pretrain,xf2puymg,E2_finetuning_arxiv_bertsmall_lsh_h2_c128,7525.100944,1881.275236,0.872998,12174.626465,finetune
5,kb5ewjpp,E2_pretraining_arxiv_bertsmall_lsh_h2_c64,4578.586053,2289.293026,2.299913,19551.526855,tapt,arxiv,lsh_h2_c64,eudij3yf,...,2.344709,19543.765137,pretrain,ebmfez6l,E2_finetuning_arxiv_bertsmall_lsh_h2_c64,6224.091129,1556.022782,0.868965,9389.609375,finetune
6,7olzfob3,E2_pretraining_arxiv_bertsmall_lsh_h4_c128,7900.804413,3950.402206,2.023962,29580.39502,tapt,arxiv,lsh_h4_c128,0e2zxgor,...,2.248216,29574.151855,pretrain,wqah0rrq,E2_finetuning_arxiv_bertsmall_lsh_h4_c128,12944.44154,3236.110385,0.874043,21973.819824,finetune
7,9hb1xk49,E2_pretraining_arxiv_bertsmall_lsh_h4_c64,6600.884504,3300.442252,2.072415,24974.400879,tapt,arxiv,lsh_h4_c64,wi223o9o,...,2.285827,24964.151855,pretrain,v0prpm6r,E2_finetuning_arxiv_bertsmall_lsh_h4_c64,10335.92594,2583.981485,0.866779,16404.60498,finetune
8,jibomvsj,E1_pretraining_arxiv_bertsmall_mha,8411.599251,4205.799625,1.672165,14793.368652,tapt,arxiv,mha,hd228t3k,...,2.156817,14785.845215,pretrain,ied1ulsn,E1_finetuning_arxiv_bertsmall_mha_f1_d0.2_mean,13235.081686,3308.770421,0.883798,3524.024414,finetune
9,tzihl99m,E2_pretraining_hyperpartisan_bertsmall_favor_n...,5584.716981,930.786164,4.505261,16990.050293,tapt,hyperpartisan,favor_nb0.125,z28fmtgo,...,3.069484,17017.976074,pretrain,xhbujbys,E2_finetuning_hyperpartisan_bertsmall_favor_nb...,3936.268652,562.324093,0.534071,5727.854492,finetune


In [112]:
merged['finetune/train/gpu_mem_peak_gb'] = merged['finetune/train/gpu_mem_peak_mb']/1024
merged['tapt/train/gpu_mem_peak_gb'] = merged['tapt/train/gpu_mem_peak_mb']/1024
merged['pretrain/train/gpu_mem_peak_gb'] = merged['pretrain/train/gpu_mem_peak_mb']/1024

merged['finetune/duration_min'] = merged['finetune/duration_s']/60
merged['tapt/duration_min'] = merged['tapt/duration_s']/60
merged['pretrain/duration_min'] = merged['pretrain/duration_s']/60

merged['pretrain/avg_epoch_time_min'] = merged['pretrain/avg_epoch_time_s']/60
merged['tapt/avg_epoch_time_min'] = merged['tapt/avg_epoch_time_s']/60
merged['finetune/avg_epoch_time_min'] = merged['finetune/avg_epoch_time_s']/60

merged['finetune/test/f1_macro'] = merged['finetune/test/f1_macro'] * 100

merged['finetune_tapt/duration_min'] = merged['finetune/duration_min'] + merged['tapt/duration_min']
merged['finetune_tapt/train/gpu_mem_peak_gb'] = merged[['finetune/train/gpu_mem_peak_gb', 'tapt/train/gpu_mem_peak_gb']].max(axis=1)

In [113]:
merged.columns

Index(['tapt/run_id', 'tapt/run_name', 'tapt/duration_s',
       'tapt/avg_epoch_time_s', 'tapt/train/avg_epoch_loss',
       'tapt/train/gpu_mem_peak_mb', 'tapt/stage', 'dataset', 'arch',
       'pretrain/run_id', 'pretrain/run_name', 'pretrain/duration_s',
       'pretrain/avg_epoch_time_s', 'pretrain/train/avg_epoch_loss',
       'pretrain/train/gpu_mem_peak_mb', 'pretrain/stage', 'finetune/run_id',
       'finetune/run_name', 'finetune/duration_s', 'finetune/avg_epoch_time_s',
       'finetune/test/f1_macro', 'finetune/train/gpu_mem_peak_mb',
       'finetune/stage', 'finetune/train/gpu_mem_peak_gb',
       'tapt/train/gpu_mem_peak_gb', 'pretrain/train/gpu_mem_peak_gb',
       'finetune/duration_min', 'tapt/duration_min', 'pretrain/duration_min',
       'pretrain/avg_epoch_time_min', 'tapt/avg_epoch_time_min',
       'finetune/avg_epoch_time_min', 'finetune_tapt/duration_min',
       'finetune_tapt/train/gpu_mem_peak_gb'],
      dtype='object')

In [114]:
final = calculate_relative_metrics(merged)

In [None]:
for dataset in final['dataset'].unique():
    mask = final['dataset'] == dataset
    mha_mask = mask & (final['arch'] == 'mha')
    
    if mha_mask.sum() > 0:
        mha_vram = final.loc[mha_mask, 'pretrain/train/gpu_mem_peak_gb'].values[0]
        if mha_vram != 0:
            final.loc[mask, 'pretrain/train/gpu_mem_peak_gb_pct_vs_mha'] = \
                ((final.loc[mask, 'pretrain/train/gpu_mem_peak_gb'] - mha_vram) / mha_vram) * 100
        
        mha_dur = final.loc[mha_mask, 'pretrain/avg_epoch_time_min'].values[0]
        if mha_dur != 0:
            final.loc[mask, 'pretrain/avg_epoch_time_min_pct_vs_mha'] = \
                ((final.loc[mask, 'pretrain/avg_epoch_time_min'] - mha_dur) / mha_dur) * 100

In [116]:
final

Unnamed: 0,tapt/run_id,tapt/run_name,tapt/duration_s,tapt/avg_epoch_time_s,tapt/train/avg_epoch_loss,tapt/train/gpu_mem_peak_mb,tapt/stage,dataset,arch,pretrain/run_id,...,tapt/avg_epoch_time_min,finetune/avg_epoch_time_min,finetune_tapt/duration_min,finetune_tapt/train/gpu_mem_peak_gb,finetune_tapt/train/gpu_mem_peak_gb_pct_vs_mha,finetune_tapt/duration_min_pct_vs_mha,finetune/test/f1_macro_pp_vs_mha,finetune/test/f1_macro_pp_vs_baseline,pretrain/train/gpu_mem_peak_gb_pct_vs_mha,pretrain/avg_epoch_time_min_pct_vs_mha
0,t67wmtc1,E2_pretraining_arxiv_bertsmall_favor_nb0.125,4140.682855,2070.341427,5.032768,16991.537598,tapt,arxiv,favor_nb0.125,z28fmtgo,...,34.50569,22.142533,157.581511,16.593298,14.859151,-56.321753,-2.359026,2.400755,15.096404,39.438455
1,aplguq3f,E2_pretraining_arxiv_bertsmall_favor_nb0.25,4649.542211,2324.771106,4.796945,18783.696777,tapt,arxiv,favor_nb0.25,0itox8yh,...,38.746185,26.216524,182.358467,18.343454,26.973762,-49.454108,-1.857847,2.901934,27.439289,53.780885
2,k9km9nuj,E2_pretraining_arxiv_bertsmall_favor_nb0.5,5686.842252,2843.421126,4.63172,22371.815918,tapt,arxiv,favor_nb0.5,yjcapkj3,...,47.390352,34.664684,233.43944,21.847476,51.228678,-35.295548,-1.850099,2.909682,52.131824,93.996445
3,p0sm9w3v,E2_pretraining_arxiv_bertsmall_favor_nb1,7790.235805,3895.117902,4.316468,29547.308105,tapt,arxiv,favor_nb1,noug0x9s,...,64.918632,51.608408,336.270895,28.854793,99.733467,-6.792853,-2.810935,1.948846,101.513592,175.727792
4,4h910kzj,E2_pretraining_arxiv_bertsmall_lsh_h2_c128,5230.441202,2615.220601,2.223732,21855.970215,tapt,arxiv,lsh_h2_c128,hu06m597,...,43.58701,31.354587,212.592369,21.343721,47.741672,-41.073913,-1.079961,3.67982,47.765278,130.278624
5,kb5ewjpp,E2_pretraining_arxiv_bertsmall_lsh_h2_c64,4578.586053,2289.293026,2.299913,19551.526855,tapt,arxiv,lsh_h2_c64,eudij3yf,...,38.154884,25.933713,180.04462,19.093288,32.164129,-50.095457,-1.483286,3.276495,32.178884,61.739014
6,7olzfob3,E2_pretraining_arxiv_bertsmall_lsh_h4_c128,7900.804413,3950.402206,2.023962,29580.39502,tapt,arxiv,lsh_h4_c128,0e2zxgor,...,65.840037,53.935173,347.420766,28.887105,99.957128,-3.702346,-0.975513,3.784268,100.016647,260.290285
7,9hb1xk49,E2_pretraining_arxiv_bertsmall_lsh_h4_c64,6600.884504,3300.442252,2.072415,24974.400879,tapt,arxiv,lsh_h4_c64,wi223o9o,...,55.007371,43.066358,282.280174,24.389063,68.821595,-21.757934,-1.70187,3.057911,68.838179,127.673625
8,jibomvsj,E1_pretraining_arxiv_bertsmall_mha,8411.599251,4205.799625,1.672165,14793.368652,tapt,arxiv,mha,hd228t3k,...,70.09666,55.146174,360.778016,14.446649,0.0,0.0,0.0,4.759781,0.0,0.0
9,tzihl99m,E2_pretraining_hyperpartisan_bertsmall_favor_n...,5584.716981,930.786164,4.505261,16990.050293,tapt,hyperpartisan,favor_nb0.125,z28fmtgo,...,15.513103,9.372068,158.683094,16.591846,14.905592,-7.855353,-11.142997,11.177103,15.096404,39.438455


In [117]:
final.to_csv('final_results.csv', index=False)

In [None]:
print("% === Tab 4.8: Pretraining Wikipedia ===")
print(generate_latex_table_simple(
    df=final,
    metrics=[
        {'col': 'pretrain/train/gpu_mem_peak_gb', 'label': 'Max VRAM [GB]', 'fmt': '{:.2f}'},
        {'col': 'pretrain/avg_epoch_time_min', 'label': 'Czas/epoka [min]', 'fmt': '{:.2f}'},
        {'col': 'pretrain/train/avg_epoch_loss', 'label': 'Min. loss', 'fmt': '{:.3f}'},
    ],
    filter_col='dataset',
    filter_val='imdb', 
))
print("\n")


print("% === Tab 4.9: TAPT wszystkie datasety ===")
print(generate_latex_table(
    df=final,
    metrics=[
        {'col': 'tapt/train/gpu_mem_peak_gb', 'label': r'\textbf{VRAM [GB]}', 'fmt': '{:.2f}'},
        {'col': 'tapt/avg_epoch_time_min', 'label': r'\textbf{Czas [min]}', 'fmt': '{:.2f}'},
        {'col': 'tapt/train/avg_epoch_loss', 'label': r'\textbf{Min loss}', 'fmt': '{:.3f}'},
    ],
))
print("\n")


print("% === Tab 4.10: Finetune wszystkie datasety ===")
print(generate_latex_table(
    df=final,
    metrics=[
        {'col': 'finetune/train/gpu_mem_peak_gb', 'label': r'\textbf{VRAM [GB]}', 'fmt': '{:.2f}'},
        {'col': 'finetune/avg_epoch_time_min', 'label': r'\textbf{Czas [min]}', 'fmt': '{:.2f}'},
        {'col': 'finetune/test/f1_macro', 'label': r'\textbf{F1 [\%]}', 'fmt': '{:.2f}'},
    ],
    show_baseline=True,
))
print("\n")


print("% === Tab 4.15: VRAM % vs SDPA ===")
print(generate_latex_table(
    df=final,
    metrics=[
        {'col': 'finetune_tapt/train/gpu_mem_peak_gb_pct_vs_mha', 'label': '', 'fmt': '{:+.1f}', 'suffix': r'\%'},
    ],
    extra_col={
        'label': 'Pretraining',
        'metrics': [{'col': 'finetune_tapt/train/gpu_mem_peak_gb_pct_vs_mha', 'fmt': '{:+.1f}', 'suffix': r'\%'}]
    },
))
print("\n")


print("% === Tab 4.16: Czas % vs SDPA ===")
print(generate_latex_table(
    df=final,
    metrics=[
        {'col': 'finetune_tapt/duration_min_pct_vs_mha', 'label': '', 'fmt': '{:+.1f}', 'suffix': r'\%'},
    ],
    extra_col={
        'label': 'Pretraining',
        'metrics': [{'col': 'finetune_tapt/duration_min_pct_vs_mha', 'fmt': '{:+.1f}', 'suffix': r'\%'}]
    },
))
print("\n")


print("% === Tab 4.17: F1 comparison ===")
print(generate_latex_table(
    df=final,
    metrics=[
        {'col': 'finetune/test/f1_macro', 'label': '', 'fmt': '{:.2f}'},
    ],
    show_baseline=True,
    show_sdpa=True,
    combined_cell={
        'cols': ['finetune/test/f1_macro_pp_vs_mha', 'finetune/test/f1_macro_pp_vs_baseline'],
        'fmt': '{:+.1f}',
        'sep': ' / '
    },
))
print("\n")

% === Tab 4.8: Pretraining Wikipedia ===
\begin{tabular}{@{}lccc@{}}
\toprule
\textbf{Konfiguracja} & \textbf{Max VRAM [GB]} & \textbf{Czas/epoka [min]} & \textbf{Min. loss} \\
\midrule
\textit{SDPA} & 14.44 & 7.88 & 2.157 \\
\midrule
\multicolumn{4}{l}{\textit{LSH}} \\
$N_h{=}2$, $C{=}64$ & 19.09 & 12.75 & 2.345 \\
$N_h{=}2$, $C{=}128$ & 21.34 & 18.15 & 2.278 \\
$N_h{=}4$, $C{=}64$ & 24.38 & 17.94 & 2.286 \\
$N_h{=}4$, $C{=}128$ & 28.88 & 28.39 & 2.248 \\
\midrule
\multicolumn{4}{l}{\textit{FAVOR+}} \\
$N_f{=}0.125$ & 16.62 & 10.99 & 3.069 \\
$N_f{=}0.25$ & 18.40 & 12.12 & 2.694 \\
$N_f{=}0.5$ & 21.97 & 15.29 & 2.666 \\
$N_f{=}1.0$ & 29.10 & 21.73 & 2.579 \\
\bottomrule
\end{tabular}


% === Tab 4.9: TAPT wszystkie datasety ===
\begin{tabular}{@{}lccccccccc@{}}
\toprule
& \multicolumn{3}{c}{\textbf{\textbf{VRAM [GB]}}} & \multicolumn{3}{c}{\textbf{\textbf{Czas [min]}}} & \multicolumn{3}{c}{\textbf{\textbf{Min loss}}} \\
\cmidrule(lr){2-4}
\cmidrule(lr){5-7}
\cmidrule(lr){8-10}
\textbf