In [41]:
from pathlib import Path
from os import SEEK_CUR, SEEK_END
from datetime import datetime

def readlast(f):
    try:
        f.seek(-2, SEEK_END)     
        while f.read(1) != b"\n":
            f.seek(-2, SEEK_CUR)  
    except OSError:               
        f.seek(0)                  
    return f.read()     


def get_runtimes(path: Path) -> dict[int, dict[str, float]]:

    results = {}

    for sweep_folder in path.iterdir():
        if not sweep_folder.is_dir():
            # We are only interested in the folders, which represent openml datasets
            continue

        dataset_id = int(sweep_folder.name)

        run_times = []

        for run_folder in sweep_folder.iterdir():
            log_path = run_folder / 'log.log'

            with open(log_path, 'rb') as f:
                first = f.readline()
                last  = readlast(f)

            # Format is: 2024-03-26 21:00:42.190 | INFO   | etc....
            start_time_str = first.split(b' | ')[0].decode('utf-8')
            end_time_str = last.split(b' | ')[0].decode('utf-8')

            start_time = datetime.strptime(start_time_str, '%Y-%m-%d %H:%M:%S.%f')
            end_time = datetime.strptime(end_time_str, '%Y-%m-%d %H:%M:%S.%f')

            run_time = end_time - start_time
            run_time = run_time.total_seconds()
            run_times.append(run_time)

        results[dataset_id] = {
            'mean': sum(run_times) / len(run_times),
            'min': min(run_times),
            'max': max(run_times),
            'total': sum(run_times)
        }

    return results




In [42]:
benchmark_sweep_path = Path('outputs_done/foundation_mix_600k_finetune/test_categorical_classification')
whytrees_cat = get_runtimes(benchmark_sweep_path)
whytrees_cat

{44159: {'mean': 526.1546999999999,
  'min': 398.27,
  'max': 611.301,
  'total': 5261.547},
 45036: {'mean': 414.68320000000006,
  'min': 292.876,
  'max': 640.828,
  'total': 4146.832},
 45035: {'mean': 139.1746, 'min': 90.147, 'max': 215.634, 'total': 1391.746},
 45038: {'mean': 371.35970000000003,
  'min': 276.996,
  'max': 464.653,
  'total': 3713.597},
 45039: {'mean': 211.12329999999997,
  'min': 139.973,
  'max': 309.448,
  'total': 2111.2329999999997},
 44156: {'mean': 375.22630000000004,
  'min': 292.177,
  'max': 462.344,
  'total': 3752.2630000000004},
 44157: {'mean': 906.1018, 'min': 489.352, 'max': 1094.059, 'total': 9061.018}}

In [43]:
benchmark_sweep_path = Path('outputs_done/foundation_mix_600k_finetune/test_numerical_classification')
whytrees_num = get_runtimes(benchmark_sweep_path)
whytrees_num

{44130: {'mean': 770.7071, 'min': 520.841, 'max': 961.256, 'total': 7707.071},
 44128: {'mean': 192.35139999999998,
  'min': 126.634,
  'max': 258.731,
  'total': 1923.514},
 45021: {'mean': 214.88209999999998,
  'min': 169.311,
  'max': 301.676,
  'total': 2148.821},
 44089: {'mean': 291.7275, 'min': 190.771, 'max': 366.218, 'total': 2917.275},
 44125: {'mean': 737.7062, 'min': 624.381, 'max': 876.276, 'total': 7377.062},
 44121: {'mean': 402.4937, 'min': 342.648, 'max': 463.837, 'total': 4024.937},
 45020: {'mean': 382.9895, 'min': 281.837, 'max': 524.163, 'total': 3829.895},
 44122: {'mean': 588.3584, 'min': 333.218, 'max': 874.349, 'total': 5883.584},
 44120: {'mean': 324.60529999999994,
  'min': 246.015,
  'max': 467.284,
  'total': 3246.0529999999994},
 44123: {'mean': 464.7566, 'min': 298.085, 'max': 580.832, 'total': 4647.566},
 45022: {'mean': 134.0908, 'min': 91.444, 'max': 195.926, 'total': 1340.908},
 44126: {'mean': 475.5148000000001,
  'min': 324.611,
  'max': 555.224,
  

In [44]:
benchmark_sweep_path = Path('outputs_done/foundation_mix_600k_finetune/test_tabzilla_has_completed_runs')
tabzilla = get_runtimes(benchmark_sweep_path)
tabzilla

{11: {'mean': 1823.794, 'min': 1823.794, 'max': 1823.794, 'total': 1823.794},
 168911: {'mean': 513.812, 'min': 513.812, 'max': 513.812, 'total': 513.812},
 125920: {'mean': 668.486, 'min': 668.486, 'max': 668.486, 'total': 668.486},
 40: {'mean': 481.437, 'min': 481.437, 'max': 481.437, 'total': 481.437},
 39: {'mean': 946.295, 'min': 946.295, 'max': 946.295, 'total': 946.295},
 9957: {'mean': 516.386, 'min': 516.386, 'max': 516.386, 'total': 516.386},
 146024: {'mean': 602.063, 'min': 602.063, 'max': 602.063, 'total': 602.063},
 9946: {'mean': 440.163, 'min': 440.163, 'max': 440.163, 'total': 440.163},
 3902: {'mean': 425.059, 'min': 425.059, 'max': 425.059, 'total': 425.059},
 45: {'mean': 1121.869, 'min': 1121.869, 'max': 1121.869, 'total': 1121.869},
 146607: {'mean': 965.821, 'min': 965.821, 'max': 965.821, 'total': 965.821},
 3549: {'mean': 2219.822, 'min': 2219.822, 'max': 2219.822, 'total': 2219.822},
 360948: {'mean': 501.722, 'min': 501.722, 'max': 501.722, 'total': 501.722}

In [45]:
results = {**whytrees_cat, **whytrees_num, **tabzilla}

whytrees_cat_total = sum([v['total'] for v in whytrees_cat.values()])
whytrees_num_total = sum([v['total'] for v in whytrees_num.values()])
tabzilla_total = sum([v['total'] for v in tabzilla.values()])
all_total = whytrees_cat_total + whytrees_num_total + tabzilla_total

whytrees_cat_total / 3600, whytrees_num_total / 3600, tabzilla_total / 3600, all_total / 3600

(8.177287777777776, 17.372976111111115, 32.544359444444446, 58.09462333333334)

In [46]:
from tabularbench.core.enums import BenchmarkOrigin
from tabularbench.data.metadata import create_metadata
from tabularbench.data.benchmarks import BENCHMARKS, BenchmarkName

metadata_whytrees = create_metadata(benchmark_origin=BenchmarkOrigin.WHYTREES)

ids_used_in_paper_numerical = BENCHMARKS[BenchmarkName.NUMERICAL_CLASSIFICATION].openml_dataset_ids
ids_used_in_paper_categorical = BENCHMARKS[BenchmarkName.CATEGORICAL_CLASSIFICATION].openml_dataset_ids

ids_used_in_paper = ids_used_in_paper_numerical + ids_used_in_paper_categorical
ids_used_in_paper.sort()

metadata_whytrees = metadata_whytrees.loc[ids_used_in_paper]

metadata_whytrees['runtime'] = [results[dataset_id]['mean'] for dataset_id in metadata_whytrees.index]
metadata_whytrees['runtime_per_cv'] = metadata_whytrees['runtime'] / metadata_whytrees['n_splits']

metadata_whytrees['runtime'] = metadata_whytrees['runtime'].round(decimals=1)
metadata_whytrees['runtime_per_cv'] = metadata_whytrees['runtime_per_cv'].round(decimals=1)

metadata_whytrees

Unnamed: 0_level_0,openml_dataset_name,n_observations,n_train,n_val,n_test,n_features,n_splits,n_classes,runtime,runtime_per_cv
openml_dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
44089,credit,16714,10000,2014,4700,10,2,2,291.7,145.9
44120,electricity,38474,10000,8542,19932,7,1,2,324.6,324.6
44121,covertype,566602,10000,50000,50000,10,1,2,402.5,402.5
44122,pol,10082,7057,907,2118,26,3,2,588.4,196.1
44123,house_16H,13488,9441,1214,2833,16,3,2,464.8,154.9
44125,MagicTelescope,13376,9363,1203,2810,10,3,2,737.7,245.9
44126,bank-marketing,10578,7404,952,2222,7,3,2,475.5,158.5
44128,MiniBooNE,72998,10000,18899,44099,50,1,2,192.4,192.4
44129,Higgs,940160,10000,50000,50000,24,1,2,208.1,208.1
44130,eye_movements,7608,5325,684,1599,20,3,2,770.7,256.9


In [47]:
ids_used_in_paper = BENCHMARKS[BenchmarkName.TABZILLA_HAS_COMPLETED_RUNS].openml_dataset_ids

metadata_tabzilla = create_metadata(BenchmarkOrigin.TABZILLA)
metadata_tabzilla = metadata_tabzilla.loc[ids_used_in_paper]

metadata_tabzilla['runtime'] = [results[dataset_id]['mean'] for dataset_id in metadata_tabzilla.index]
metadata_tabzilla['runtime_per_cv'] = metadata_tabzilla['runtime'] / metadata_tabzilla['n_splits']

metadata_tabzilla['runtime'] = metadata_tabzilla['runtime'].round(decimals=1)
metadata_tabzilla['runtime_per_cv'] = metadata_tabzilla['runtime_per_cv'].round(decimals=1)

metadata_tabzilla

Unnamed: 0_level_0,openml_dataset_name,n_observations,n_train,n_val,n_test,n_features,n_splits,n_classes,runtime,runtime_per_cv
openml_dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
3,kr-vs-kp,3196,2556,320,320,36,10,2,1225.6,122.6
4,labor,57,45,6,6,16,10,2,1926.4,192.6
9,autos,205,163,21,21,25,10,6,739.8,74.0
10,lymph,148,118,15,15,18,10,4,542.7,54.3
11,balance-scale,625,499,63,63,4,10,3,1823.8,182.4
...,...,...,...,...,...,...,...,...,...,...
167141,churn,5000,4000,500,500,20,10,2,1906.0,190.6
167211,Satellite,5100,4080,510,510,36,10,2,695.7,69.6
168911,jasmine,2984,2386,299,299,144,10,2,513.8,51.4
190408,Click_prediction_small,39948,31958,3995,3995,11,10,2,2598.4,259.8


In [48]:
import pandas as pd

metadata_total = pd.concat([metadata_whytrees, metadata_tabzilla])

max_runtime = metadata_total['runtime'].max()
max_runtime_per_cv = metadata_total['runtime_per_cv'].max()

max_runtime, max_runtime_per_cv

(5992.8, 599.3)

In [50]:
total_number_of_cv_splits = metadata_total['n_splits'].sum()

total_number_of_cv_splits, all_total / total_number_of_cv_splits



NameError: name 'all_total' is not defined