# Initialization. Load previous state. Load modules

In [1]:
import dill
# Restore the entire session
#dill.load_session('PTRMS_tun_class_all.db')

In [2]:
#check last result on disk
#dir()

In [3]:
import json

import pandas as pd
import numpy as np
import numpy as cp
#import cupy as cp

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix

from tabpfn import TabPFNRegressor

from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNRegressor
import hyperopt

from finetuning_scripts.finetune_tabpfn_main import fine_tune_tabpfn
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

In [4]:
def predict_reg_by_label(IDs, y, X, regressor, param_grid={}, cv=5, gpu=False, verbose=True):
    """
    Calculates predictions for test sets, each with a unique label in IDs, and selects the best parameters using GridSearchCV.

    Args:
        IDs: A pandas Series containing labels.
        y: A pandas Series or numpy array containing target labels.
        X: A pandas DataFrame or numpy array containing features.
        regressor: A scikit-learn regressor class.
        param_grid: A dictionary specifying parameter options for GridSearchCV.
        cv: Number of folds for cross-validation.
        verbose: If True, prints progress and metrics.

    Returns:
        predictions: Predictions for y as test sets.
        arv.
        best_params_per_label: Dictionary with the best parameters for each label.
    """
    unique_labels = IDs.unique()
    best_params_per_label = {}
    
    # Ensure X and y are numpy arrays for compatibility
    #if not isinstance(X, np.ndarray):
    #    X = X.to_numpy()
    #if not isinstance(y, np.ndarray):
    #    y = y.to_numpy()

    predictions = y[:]*0

    for label in unique_labels:
        mask = (IDs == label)
        X_train, X_test = X[~mask], X[mask]
        y_train, y_test = y[~mask], y[mask]

        # Use GridSearchCV to find the best parameters
        if cv>1:
            grid_search = GridSearchCV(
                estimator=regressor(),
                param_grid=param_grid,
                scoring='neg_mean_squared_error',
                cv=cv,
                verbose=verbose
            )
            grid_search.fit(X_train, y_train)
            best_model = grid_search.best_estimator_
            
            # Store the best parameters
            best_params_per_label[label] = grid_search.best_params_
            
        else:
            #fine tune tabPFN if selected
            if regressor is TabPFNRegressor and param_grid.get("model_path","auto") != "auto":
                finetunemymodel(X_train,y_train,param_grid,verbose)

            best_model = regressor(**param_grid)
            best_model.fit(X_train, y_train)
            best_params_per_label[label] = param_grid
                            
        # Predict on the test set
        predictions[mask] = best_model.predict(X_test)
        
        if verbose:
            print(f"Label: {label}, Best Params: {best_params_per_label[label]}")
            print(f"Test Set MSE for label {label}: {round(mean_squared_error(y_test, predictions[mask]), 4)}")

    # Final metrics
    final_mse = mean_squared_error(y, predictions)
    final_arv = final_mse / np.var(y)
    
    if verbose:
        print(f"\nOverall Mean Squared Error: {round(final_mse, 4)}")
        print(f"Overall Adjusted Residual Variance (ARV): {round(final_arv, 4)}")

    return predictions, final_arv, best_params_per_label

# Load datasets froms json file

In [5]:
# Load data from JSON
with open('data_reg.json', 'r') as json_file:
    loaded_datasets = json.load(json_file)


# Train methods

## All datasets

In [8]:
datasets = ["Gum1b","Gum2b","Gum3b","Gum1s","Gum2s","Gum3s","Noc_S","Noc_O","Noc_3T"]
datasets = ["Gum2s","Gum3s"]

# Dictionary to store results
results = {dataset: {} for dataset in datasets}

# Define model parameters
models = {
    "auto_tabPFN": {
        "regressor": AutoTabPFNRegressor,
        "param_grid": {
            'device': 'cuda',
            'max_time':75,
            'preset': 'default',
            'phe_init_args': {
                'n_repeats': 1,
                'holdout_fraction': 0.5,
                'ges_n_iterations': 1,
            },
            'ges_scoring_string': 'rmse',
            'ignore_pretraining_limits':True
        },
        "cv": 0,
        "gpu": False
    }
}



from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# Loop through datasets
for dataset in datasets:
    this_data = loaded_datasets[dataset]
    
    IDs = pd.Series(this_data["IDs"])
    y = np.array(this_data["y"])
    X = pd.DataFrame(this_data["X"])
    columns_to_keep = X.columns
    X = pd.DataFrame(scaler.fit_transform(X) , columns=columns_to_keep)
    
    for model_name, model_info in models.items():
        predict = predict_reg_by_label(
            cv=model_info["cv"], 
            gpu=model_info["gpu"], 
            IDs=IDs, 
            y=y, 
            X=X, 
            regressor=model_info["regressor"], 
            param_grid=model_info["param_grid"], 
            verbose=False
        )
        results[dataset][model_name] = predict
        print(f"{dataset}-{model_name}: {round(predict[1],4)}")

2025-07-01 09:25:06 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-01 09:25:06 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-01 09:25:06 INFO     Using task type: regression
2025-07-01 09:25:06 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-01 09:25:07 INFO     Using 52 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_3', 'random_tabpfn_model_8', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_14', 'random_tabpfn_model_15', 'random_tabpfn_model_17', 'random_tabpfn_model_18', 'random_tabpfn_model_22', 'random_tabpfn_model_24', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_30', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_35', 'random_tabpfn_model_38', 'random_tabpfn_model_40', 'random_tabpfn_model_41', 'random_tabpfn_model_42', 'random_tabpfn_model_44', 'random_tabpfn_mo

2025-07-01 09:29:24 INFO     Yield data for model random_tabpfn_model_35 and split 0 (repeat=1).
2025-07-01 09:29:26 INFO     Likely not enough time left for another model.
2025-07-01 09:29:26 INFO     Stop validation of all models after 19 models in repeat 1.
2025-07-01 09:29:26 INFO     As this is the first repeat, we trim down the models to all so-far run models!
2025-07-01 09:29:26 INFO     Order of selections: [0]
2025-07-01 09:29:26 INFO     Val loss over iterations: [0.6859985521772274]
2025-07-01 09:29:26 INFO     Model losses: [0.68599855 0.72621136 0.74145378 0.7418359  0.74301866 0.74774284
 0.76670063 0.77400446 0.80553062 0.81659838 0.8292659  0.82941349
 0.85300858 0.85569776 0.8605737  0.93860046 0.94988143 0.96822509
 2.60422953]
2025-07-01 09:29:26 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
2025-07-01 09:31:19 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-01 09:31:19 INFO     No categorical_feature_indices given. 

2025-07-01 09:35:07 INFO     Yield data for model random_tabpfn_model_25 and split 0 (repeat=1).
2025-07-01 09:35:09 INFO     Yield data for model random_tabpfn_model_27 and split 0 (repeat=1).
2025-07-01 09:35:11 INFO     Yield data for model random_tabpfn_model_28 and split 0 (repeat=1).
2025-07-01 09:35:15 INFO     Yield data for model random_tabpfn_model_30 and split 0 (repeat=1).
2025-07-01 09:35:18 INFO     Yield data for model random_tabpfn_model_32 and split 0 (repeat=1).
2025-07-01 09:35:28 INFO     Yield data for model random_tabpfn_model_33 and split 0 (repeat=1).
2025-07-01 09:35:30 INFO     Yield data for model random_tabpfn_model_35 and split 0 (repeat=1).
2025-07-01 09:35:31 INFO     Yield data for model random_tabpfn_model_38 and split 0 (repeat=1).
2025-07-01 09:35:35 INFO     Likely not enough time left for another model.
2025-07-01 09:35:35 INFO     Stop validation of all models after 20 models in repeat 1.
2025-07-01 09:35:35 INFO     As this is the first repeat, we

2025-07-01 09:40:44 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 09:40:50 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 09:40:54 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 09:40:58 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 09:41:02 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 09:41:08 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 09:41:10 INFO     Yield data for model random_tabpfn_model_17 and split 0 (repeat=1).
2025-07-01 09:41:13 INFO     Yield data for model random_tabpfn_model_18 and split 0 (repeat=1).
2025-07-01 09:41:19 INFO     Yield data for model random_tabpfn_model_22 and split 0 (repeat=1).
2025-07-01 09:41:22 INFO     Yield data for model random_tabpfn_model_24 and split 0 (repeat=1).
2025-07-01 09:41:25 INFO     Yiel

2025-07-01 09:46:38 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 09:46:38 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 09:46:45 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 09:46:47 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 09:46:54 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 09:46:57 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 09:47:02 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 09:47:06 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 09:47:12 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 09:47:14 INFO     Yield data for model random_tabpfn_model_17 and split 0 (repeat=1).
2025-07-01 09:47:17 INFO     Yi

2025-07-01 09:52:46 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 09:52:46 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 09:52:46 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 09:52:53 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 09:52:55 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 09:53:01 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 09:53:05 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 09:53:09 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 09:53:14 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 09:53:20 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 09:53:21 INFO     Yield da

2025-07-01 09:58:54 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 09:58:54 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 09:58:54 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 09:59:00 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 09:59:02 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 09:59:08 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 09:59:12 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 09:59:15 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 09:59:20 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 09:59:25 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 09:59:27 INFO     Yield da

2025-07-01 10:05:03 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 10:05:03 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 10:05:03 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 10:05:09 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 10:05:11 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 10:05:16 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 10:05:20 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 10:05:24 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 10:05:28 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 10:05:33 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 10:05:35 INFO     Yield da

2025-07-01 10:11:11 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 10:11:11 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 10:11:11 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 10:11:17 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 10:11:19 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 10:11:25 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 10:11:29 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 10:11:32 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 10:11:36 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 10:11:42 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 10:11:43 INFO     Yield da

2025-07-01 10:17:21 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 10:17:21 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 10:17:21 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 10:17:28 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 10:17:29 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 10:17:36 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 10:17:40 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 10:17:44 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 10:17:48 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 10:17:54 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 10:17:56 INFO     Yield da

2025-07-01 10:23:29 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 10:23:29 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 10:23:29 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 10:23:35 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 10:23:37 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 10:23:43 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 10:23:47 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 10:23:51 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 10:23:55 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 10:24:00 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 10:24:02 INFO     Yield da

2025-07-01 10:29:36 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 10:29:36 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 10:29:36 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 10:29:42 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 10:29:44 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 10:29:49 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 10:29:53 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 10:29:57 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 10:30:01 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 10:30:06 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 10:30:08 INFO     Yield da

2025-07-01 10:35:51 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 10:35:51 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 10:35:51 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 10:35:57 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 10:35:59 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 10:36:04 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 10:36:08 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 10:36:12 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 10:36:16 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 10:36:21 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 10:36:23 INFO     Yield da

2025-07-01 10:41:59 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 10:41:59 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 10:41:59 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 10:42:05 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 10:42:07 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 10:42:13 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 10:42:17 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 10:42:20 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 10:42:25 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 10:42:30 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 10:42:32 INFO     Yield da

Gum2s-auto_tabPFN: 0.3893


2025-07-01 10:48:24 INFO     Using 52 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_3', 'random_tabpfn_model_8', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_14', 'random_tabpfn_model_15', 'random_tabpfn_model_17', 'random_tabpfn_model_18', 'random_tabpfn_model_22', 'random_tabpfn_model_24', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_30', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_35', 'random_tabpfn_model_38', 'random_tabpfn_model_40', 'random_tabpfn_model_41', 'random_tabpfn_model_42', 'random_tabpfn_model_44', 'random_tabpfn_model_45', 'random_tabpfn_model_47', 'random_tabpfn_model_49', 'random_tabpfn_model_50', 'random_tabpfn_model_51', 'random_tabpfn_model_52', 'random_tabpfn_model_54', 'random_tabpfn_model_55', 'random_tabpfn_model_59', 'random_tabpfn_model_61', 'random_tabpfn_model_64', 'random_tabpfn_model_68', 'random_tabpfn_m

2025-07-01 10:52:43 INFO     As this is the first repeat, we trim down the models to all so-far run models!
2025-07-01 10:52:43 INFO     Order of selections: [0]
2025-07-01 10:52:43 INFO     Val loss over iterations: [0.18103807107099187]
2025-07-01 10:52:43 INFO     Model losses: [0.18103807 0.19806494 0.20465205 0.20598041 0.21169792 0.22364802
 0.23181231 0.23691794 0.24998522 0.27367097 0.27837043 0.2869519
 0.40688275 0.43984969 0.5695375  0.5732446  0.58810178 0.60840708
 1.44188154]
2025-07-01 10:52:43 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
2025-07-01 10:54:37 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-01 10:54:37 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-01 10:54:37 INFO     Using task type: regression
2025-07-01 10:54:37 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-01 10:54:38 INFO     Using 52 base models: ['default_tabpfn_model_0', 'random_tabpfn

2025-07-01 10:58:35 INFO     Yield data for model random_tabpfn_model_30 and split 0 (repeat=1).
2025-07-01 10:58:38 INFO     Yield data for model random_tabpfn_model_32 and split 0 (repeat=1).
2025-07-01 10:58:48 INFO     Yield data for model random_tabpfn_model_33 and split 0 (repeat=1).
2025-07-01 10:58:50 INFO     Yield data for model random_tabpfn_model_35 and split 0 (repeat=1).
2025-07-01 10:58:52 INFO     Yield data for model random_tabpfn_model_38 and split 0 (repeat=1).
2025-07-01 10:58:56 INFO     Time limit reached.
2025-07-01 10:58:56 INFO     Stop validation of all models after 20 models in repeat 1.
2025-07-01 10:58:56 INFO     As this is the first repeat, we trim down the models to all so-far run models!
2025-07-01 10:58:56 INFO     Order of selections: [0]
2025-07-01 10:58:56 INFO     Val loss over iterations: [0.1299641588865097]
2025-07-01 10:58:56 INFO     Model losses: [0.12996416 0.15049243 0.1514101  0.1532064  0.15675603 0.15941605
 0.16552623 0.16742344 0.16906

2025-07-01 11:04:19 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 11:04:23 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 11:04:29 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 11:04:31 INFO     Yield data for model random_tabpfn_model_17 and split 0 (repeat=1).
2025-07-01 11:04:34 INFO     Yield data for model random_tabpfn_model_18 and split 0 (repeat=1).
2025-07-01 11:04:40 INFO     Yield data for model random_tabpfn_model_22 and split 0 (repeat=1).
2025-07-01 11:04:43 INFO     Yield data for model random_tabpfn_model_24 and split 0 (repeat=1).
2025-07-01 11:04:46 INFO     Yield data for model random_tabpfn_model_25 and split 0 (repeat=1).
2025-07-01 11:04:49 INFO     Yield data for model random_tabpfn_model_27 and split 0 (repeat=1).
2025-07-01 11:04:51 INFO     Yield data for model random_tabpfn_model_28 and split 0 (repeat=1).
2025-07-01 11:04:54 INFO     Y

2025-07-01 11:10:08 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 11:10:15 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 11:10:18 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 11:10:23 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 11:10:27 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 11:10:33 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 11:10:35 INFO     Yield data for model random_tabpfn_model_17 and split 0 (repeat=1).
2025-07-01 11:10:38 INFO     Yield data for model random_tabpfn_model_18 and split 0 (repeat=1).
2025-07-01 11:10:44 INFO     Yield data for model random_tabpfn_model_22 and split 0 (repeat=1).
2025-07-01 11:10:47 INFO     Yield data for model random_tabpfn_model_24 and split 0 (repeat=1).
2025-07-01 11:10:50 INFO     Yiel

2025-07-01 11:16:08 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 11:16:08 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 11:16:08 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 11:16:15 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 11:16:17 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 11:16:24 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 11:16:27 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 11:16:32 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 11:16:36 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 11:16:42 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 11:16:44 INFO     Yield da

2025-07-01 11:22:16 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 11:22:16 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 11:22:16 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 11:22:22 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 11:22:24 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 11:22:30 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 11:22:34 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 11:22:37 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 11:22:42 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 11:22:47 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 11:22:49 INFO     Yield da

2025-07-01 11:28:25 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 11:28:25 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 11:28:25 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 11:28:31 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 11:28:33 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 11:28:39 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 11:28:43 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 11:28:46 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 11:28:51 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 11:28:56 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 11:28:58 INFO     Yield da

2025-07-01 11:34:33 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 11:34:33 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 11:34:33 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 11:34:39 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 11:34:41 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 11:34:47 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 11:34:50 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 11:34:54 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 11:34:58 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 11:35:04 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 11:35:05 INFO     Yield da

2025-07-01 11:40:42 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 11:40:42 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 11:40:42 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 11:40:49 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 11:40:51 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 11:40:58 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 11:41:01 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 11:41:06 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 11:41:10 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 11:41:16 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 11:41:18 INFO     Yield da

2025-07-01 11:46:53 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 11:46:53 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 11:46:53 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 11:46:59 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 11:47:01 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 11:47:06 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 11:47:10 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 11:47:14 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 11:47:18 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 11:47:23 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 11:47:25 INFO     Yield da

2025-07-01 11:53:00 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 11:53:00 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 11:53:00 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 11:53:06 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 11:53:08 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 11:53:13 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 11:53:17 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 11:53:21 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 11:53:25 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 11:53:30 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 11:53:32 INFO     Yield da

2025-07-01 11:59:05 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 11:59:05 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 11:59:05 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 11:59:11 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 11:59:13 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 11:59:19 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 11:59:23 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 11:59:26 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 11:59:30 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 11:59:36 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 11:59:38 INFO     Yield da

2025-07-01 12:05:12 INFO     Starting 1-repeated holdout validation with holdout_frac=0.5.
2025-07-01 12:05:12 INFO     Set time limit to 75 seconds. We will early stop validation if needed.
2025-07-01 12:05:12 INFO     Yield data for model default_tabpfn_model_0 and split 0 (repeat=1).
2025-07-01 12:05:18 INFO     Yield data for model random_tabpfn_model_1 and split 0 (repeat=1).
2025-07-01 12:05:20 INFO     Yield data for model random_tabpfn_model_3 and split 0 (repeat=1).
2025-07-01 12:05:26 INFO     Yield data for model random_tabpfn_model_8 and split 0 (repeat=1).
2025-07-01 12:05:30 INFO     Yield data for model random_tabpfn_model_9 and split 0 (repeat=1).
2025-07-01 12:05:33 INFO     Yield data for model random_tabpfn_model_12 and split 0 (repeat=1).
2025-07-01 12:05:37 INFO     Yield data for model random_tabpfn_model_14 and split 0 (repeat=1).
2025-07-01 12:05:43 INFO     Yield data for model random_tabpfn_model_15 and split 0 (repeat=1).
2025-07-01 12:05:45 INFO     Yield da

Gum3s-auto_tabPFN: 0.7663


In [None]:
dill.dump_session('autotabpfn_reg_1.db')

In [9]:
# Print header
print("Dataset".ljust(12) + " | " + " | ".join(model_name.ljust(15) for model_name in models))

# Print separator line
print("-" * (12 + 3 + len(models) * 18))

# Print results
for dataset in datasets:
    result_line = f"{dataset.ljust(12)} | " + " | ".join(f"{round(results[dataset][model_name][1],4):<15}" for model_name in models)
    print(result_line)


Dataset      | auto_tabPFN    
---------------------------------
Gum2s        | 0.3893         
Gum3s        | 0.7663         
