In [8]:
# autoreload magic
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import seaborn as sns

paths = [
    'outputs_done/tabPFN_finetune_1k',
    'outputs_done/tabPFN_finetune_10k',
    'outputs_done/tabPFN_scratch',
    'outputs_done/tabPFN_zeroshot_1k',
    'outputs_done/tabPFN_zeroshot_10k',
]

folders = [
    'categorical_classification_TABPFN_FINETUNE_DEFAULT',
    'numerical_classification_TABPFN_FINETUNE_DEFAULT',
    'categorical_regression_TABPFN_FINETUNE_DEFAULT',
    'numerical_regression_TABPFN_FINETUNE_DEFAULT'
]

m = ["RandomForest", "GradientBoostingTree", "XGBoost", "MLP", "Resnet", "SAINT", "FT Transformer", "TabPFN Finetune 1k", "TabPFN Finetune 10k", "TabPFN Scratch", "TabPFN Zeroshot 1k", "TabPFN Zeroshot 10k"]
colors = sns.color_palette('colorblind', len(m))
color_dict = dict(zip(m, colors))

In [10]:
import logging
from pathlib import Path

import pandas as pd
from tabularbench.results.random_sweep_plots import make_combined_dataset_plot_, scores_min_max
from tabularbench.results.reformat_benchmark import get_benchmark_csv_reformatted
from tabularbench.sweeps.sweep_config import create_sweep_config_list_from_main_config
from tabularbench.sweeps.sweep_start import add_device_and_seed_to_cfg, get_config
from tabularbench.sweeps.writer import StandardWriter

import seaborn as sns

df_benchmark = get_benchmark_csv_reformatted()

results_dict = {}

for folder in folders:
        
    bench_name = '_'.join(folder.split('_')[:2])

    results_dict[bench_name] = {}



    for path in paths:
        cfg = get_config(path)
        add_device_and_seed_to_cfg(cfg, gpu=0, seed=0)
        cfg['benchmarks'] = [bench_name]

        writer = StandardWriter()
        logger = logging.getLogger()
        sweep_config = create_sweep_config_list_from_main_config(cfg)[0]

        model_plots_name = cfg['model_plot_names'][0]
        
        results = pd.read_csv( Path(path) / folder / 'default_results.csv', index_col=0)

        def get_normalized_results():
            for dataset_name in results.columns:
                score_min, score_max = scores_min_max(sweep_config, dataset_name)

                r = (results[dataset_name] - score_min) / (score_max - score_min)
                yield max(r[-1], 0)

        results_average = sum(get_normalized_results()) / len(results.columns)

        results_dict[bench_name][model_plots_name] = results_average



results_dict

df = pd.DataFrame(results_dict)
df



Unnamed: 0,categorical_classification,numerical_classification,categorical_regression,numerical_regression
TabPFN Finetune 1k,0.622096,0.707268,0.067706,0.087726
TabPFN Finetune 10k,0.759808,0.805663,0.03534,0.091287
TabPFN Scratch,0.500788,0.605689,0.219901,0.29056
TabPFN Zeroshot 1k,0.445054,0.542058,0.0,0.0
TabPFN Zeroshot 10k,0.598883,0.660935,0.0,0.0


In [11]:
print(df.to_latex(index=True, float_format="{:.4f}".format))  

\begin{tabular}{lrrrr}
\toprule
 & categorical_classification & numerical_classification & categorical_regression & numerical_regression \\
\midrule
TabPFN Finetune 1k & 0.6221 & 0.7073 & 0.0677 & 0.0877 \\
TabPFN Finetune 10k & 0.7598 & 0.8057 & 0.0353 & 0.0913 \\
TabPFN Scratch & 0.5008 & 0.6057 & 0.2199 & 0.2906 \\
TabPFN Zeroshot 1k & 0.4451 & 0.5421 & 0.0000 & 0.0000 \\
TabPFN Zeroshot 10k & 0.5989 & 0.6609 & 0.0000 & 0.0000 \\
\bottomrule
\end{tabular}

