# Initialization. Load previous state. Load modules

In [1]:
import dill
# Restore the entire session
#dill.load_session('PTRMS_tun_class_all.db')

In [2]:
#check last result on disk
#dir()

In [3]:
import json

import pandas as pd
import numpy as np
import numpy as cp
#import cupy as cp

from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix

from tabpfn import TabPFNClassifier

from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNClassifier
import hyperopt

from finetuning_scripts.finetune_tabpfn_main import fine_tune_tabpfn
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

In [4]:
def finetunemymodel(X, y, param_grid , verbose=False, time_limit=60, learning_rate=0.00002, batch_size=5):
    #X_train, X_test, y_train, y_test = train_test_split(
    #    X,
    #    y,
    #    test_size=0.33
    #)


    save_path_to_fine_tuned_model = param_grid.get("model_path")
    fine_tune_tabpfn(
        path_to_base_model="auto",
        save_path_to_fine_tuned_model=save_path_to_fine_tuned_model,
        # Finetuning HPs
        time_limit=time_limit,
        finetuning_config={"learning_rate": learning_rate, "batch_size": batch_size},
        validation_metric="log_loss",
        # Input Data
        #X_train=X_train,
        #y_train=y_train,
        X_train=pd.DataFrame(X),
        y_train=pd.Series(y),
        categorical_features_index=None,
        device=param_grid.get('device'), 
        task_type="multiclass",
        # Optional
        show_training_curve=True,  # Shows a final report after finetuning.
        logger_level=0,  # Shows all logs, higher values shows less
        use_wandb=False,  # Init wandb yourself, and set to True
    )


In [5]:
def predict_class_by_label(IDs, y, X, classifier, param_grid={}, cv=5, gpu=False, verbose=True):
    """
    Calculates predictions for test sets, each with a unique label in IDs, and selects the best parameters using GridSearchCV.

    Args:
        IDs: A pandas Series containing labels.
        y: A pandas Series or numpy array containing target labels.
        X: A pandas DataFrame or numpy array containing features.
        classifier: A scikit-learn classifier class.
        param_grid: A dictionary specifying parameter options for GridSearchCV.
        cv: Number of folds for cross-validation. If cv=0 it directly tunes a single model
        gpu: If True, move X to gpu before training.
        verbose: If True, prints progress and metrics.

    Returns:
        predictions: Predictions for y as test sets.
        classification error.
        best_params_per_label: Dictionary with the best parameters for each label.
    """
    unique_labels = IDs.unique()
    best_params_per_label = {}
    
    # Ensure X and y are numpy arrays for compatibility
    #if not isinstance(X, np.ndarray):
    #    X = X.to_numpy()
    #if not isinstance(y, np.ndarray):
    #    y = y.to_numpy()

    predictions = y[:]*0

    for label in unique_labels:
        mask = (IDs == label)
            
        if gpu:
            X_train, X_test = cp.array(X[~mask]), cp.array(X[mask])
        else:
            X_train, X_test = X[~mask], X[mask]            
        y_train, y_test = y[~mask], y[mask]
        
        if cv>1:
            # Use GridSearchCV to find the best parameters
            grid_search = GridSearchCV(
                estimator=classifier(),
                param_grid=param_grid,
                scoring='accuracy',
                cv=cv,
                verbose=verbose
            )
            grid_search.fit(X_train, y_train)
            best_model = grid_search.best_estimator_

            # Store the best parameters
            best_params_per_label[label] = grid_search.best_params_

        else:
            #fine tune tabPFN if selected
            if classifier is TabPFNClassifier and param_grid.get("model_path","auto") != "auto":
                finetunemymodel(X_train,y_train,param_grid,verbose)
            best_model = classifier(**param_grid)
            best_model.fit(X_train, y_train)
            best_params_per_label[label] = param_grid
    
        # Predict on the test set
        predictions[mask] = best_model.predict(X_test)
 
        if verbose:
            print(f"Label: {label}, Best Params: {best_params_per_label[label]}")
            print(f"Test Set error for label {label}: {round(1.0-accuracy_score(y_test, predictions[mask]), 4)}")

    # Final metrics
    final_error = 1.0-accuracy_score(y, predictions)
    
    if verbose:
        print(f"\nOverall Classification Error: {round(final_error, 4)}")

    return predictions, final_error, best_params_per_label


# Load datasets froms json file

In [6]:
# Load data from JSON
with open('data_bees_classif.json', 'r') as json_file:
    loaded_datasets = json.load(json_file)


In [7]:
# Load data from JSON
with open('data_gc_classif.json', 'r') as json_file:
    loaded_datasets = json.load(json_file)


In [7]:
# Load data from JSON
with open('data_classif_breath.json', 'r') as json_file:
    loaded_datasets = json.load(json_file)


# Train methods

In [8]:
"""
#Debugge warnings

import warnings
import traceback

def mostrar_warning(message, category, filename, lineno, file=None, line=None):
    print(f"¡Warning Capturado!: {message} ({filename}:{lineno})")
    traceback.print_stack()

warnings.showwarning = mostrar_warning
"""


for dataset in loaded_datasets:
    print(dataset)

Mentine
Forest
Etnia


In [10]:
#xx=loaded_datasets["live7_n"]
#xxx=xx["X"]
#print(pd.DataFrame(xxx).shape)
#print(np.array(xx["y"]))
#print((pd.Series(xx["IDs"])).unique())

## All datasets

In [9]:
#PTRMS
#datasets = ["Tea", "Gum2", "Gum3", "Cafe", "Ham", "Pesce", "Spinaci", "Peperoncini", "Funghi13", "Funghi20", "Funghi21","Urine","Smoker","CancerTypes","Lab"]
#GC
#datasets = ["Funghi", "Peperoncini", "Spinaci", "Fragole","Nocciole"]
#Breath
#datasets = ["Mentine","Forest","Etnia"]

datasets = ["Etnia"]

# Dictionary to store results
results = {dataset: {} for dataset in datasets}

# Define model parameters
models = {
    "auto_tabPFN": {
        "classifier": AutoTabPFNClassifier,
        "param_grid": {
            'device': 'cuda',
            'max_time':75,
            'preset': 'default',
            'phe_init_args': {
                'n_repeats': 1,
                'holdout_fraction': 0.5,
                'ges_n_iterations': 1,
            },
            'ges_scoring_string': 'accuracy',
            'ignore_pretraining_limits':True
        },
        "cv": 0,
        "gpu": False
    }
}



from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# Loop through datasets
for dataset in datasets:
    this_data = loaded_datasets[dataset]
    
    IDs = pd.Series(this_data["IDs"])
    y = np.array(this_data["y"])
    X = pd.DataFrame(this_data["X"])
    columns_to_keep = X.columns
    X = pd.DataFrame(scaler.fit_transform(X) , columns=columns_to_keep)
    
    for model_name, model_info in models.items():
        predict = predict_class_by_label(
            cv=model_info["cv"], 
            gpu=model_info["gpu"], 
            IDs=IDs, 
            y=y, 
            X=X, 
            classifier=model_info["classifier"], 
            param_grid=model_info["param_grid"], 
            verbose=False
        )
        results[dataset][model_name] = predict
        print(f"{dataset}-{model_name}: {round(predict[1],4)}")

2025-07-16 13:15:41 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:15:41 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:15:41 INFO     Using task type: binary_classification
2025-07-16 13:15:41 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:15:42 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_model_35', 'random_tabpfn_model_37', 'random_tabpfn_model_39', 'random_tabpfn_model_40', 'random_tabpfn_model_42', 'rando

2025-07-16 13:16:15 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:16:31 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:16:31 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:16:31 INFO     Using task type: binary_classification
2025-07-16 13:16:31 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:16:32 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:17:03 INFO     Best weights: [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:17:14 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:17:14 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:17:14 INFO     Using task type: binary_classification
2025-07-16 13:17:14 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:17:14 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:17:45 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:18:02 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:18:02 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:18:02 INFO     Using task type: binary_classification
2025-07-16 13:18:02 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:18:02 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:18:33 INFO     Best weights: [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:18:49 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:18:49 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:18:49 INFO     Using task type: binary_classification
2025-07-16 13:18:49 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:18:49 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:19:20 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:19:35 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:19:35 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:19:35 INFO     Using task type: binary_classification
2025-07-16 13:19:35 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:19:36 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:20:07 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:20:26 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:20:26 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:20:26 INFO     Using task type: binary_classification
2025-07-16 13:20:26 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:20:26 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:20:58 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:21:13 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:21:13 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:21:13 INFO     Using task type: binary_classification
2025-07-16 13:21:13 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:21:14 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:21:44 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:22:01 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:22:01 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:22:01 INFO     Using task type: binary_classification
2025-07-16 13:22:01 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:22:02 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:22:32 INFO     Best weights: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:22:48 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:22:48 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:22:48 INFO     Using task type: binary_classification
2025-07-16 13:22:48 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:22:48 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:23:19 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:23:35 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:23:35 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:23:35 INFO     Using task type: binary_classification
2025-07-16 13:23:35 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:23:36 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:24:07 INFO     Best weights: [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:24:24 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:24:24 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:24:24 INFO     Using task type: binary_classification
2025-07-16 13:24:24 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:24:24 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:24:55 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:25:12 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:25:12 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:25:12 INFO     Using task type: binary_classification
2025-07-16 13:25:12 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:25:12 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:25:43 INFO     Best weights: [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:25:59 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:25:59 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:25:59 INFO     Using task type: binary_classification
2025-07-16 13:25:59 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:26:00 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:26:31 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:26:45 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:26:45 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:26:45 INFO     Using task type: binary_classification
2025-07-16 13:26:45 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:26:45 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:27:16 INFO     Best weights: [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:27:30 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:27:30 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:27:30 INFO     Using task type: binary_classification
2025-07-16 13:27:30 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:27:31 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:28:02 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:28:21 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:28:21 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:28:21 INFO     Using task type: binary_classification
2025-07-16 13:28:21 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:28:22 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:28:53 INFO     Best weights: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:29:08 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:29:08 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:29:08 INFO     Using task type: binary_classification
2025-07-16 13:29:08 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:29:09 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:29:40 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:30:00 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:30:00 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:30:00 INFO     Using task type: binary_classification
2025-07-16 13:30:00 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:30:00 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:30:31 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:30:44 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:30:44 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:30:44 INFO     Using task type: binary_classification
2025-07-16 13:30:44 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:30:45 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:31:15 INFO     Best weights: [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:31:36 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:31:36 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:31:36 INFO     Using task type: binary_classification
2025-07-16 13:31:36 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:31:37 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:32:08 INFO     Best weights: [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:32:23 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:32:23 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:32:23 INFO     Using task type: binary_classification
2025-07-16 13:32:23 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:32:23 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:32:54 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:33:11 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:33:11 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:33:11 INFO     Using task type: binary_classification
2025-07-16 13:33:11 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:33:11 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:33:42 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:33:56 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:33:56 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:33:56 INFO     Using task type: binary_classification
2025-07-16 13:33:56 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:33:57 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:34:27 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:34:44 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:34:44 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:34:44 INFO     Using task type: binary_classification
2025-07-16 13:34:44 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:34:45 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:35:16 INFO     Best weights: [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:35:29 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:35:29 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:35:29 INFO     Using task type: binary_classification
2025-07-16 13:35:29 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:35:30 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:36:00 INFO     Best weights: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:36:20 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:36:20 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:36:20 INFO     Using task type: binary_classification
2025-07-16 13:36:20 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:36:21 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:36:52 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:37:10 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:37:10 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:37:10 INFO     Using task type: binary_classification
2025-07-16 13:37:10 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:37:10 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:37:41 INFO     Best weights: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]
2025-07-16 13:37:58 INFO     Using `default` preset for Post Hoc Ensemble.
2025-07-16 13:37:58 INFO     No categorical_feature_indices given. Assuming no categorical features.
2025-07-16 13:37:58 INFO     Using task type: binary_classification
2025-07-16 13:37:58 INFO     Obtaining TabPFN models from a random portfolio.
2025-07-16 13:37:59 INFO     Using 58 base models: ['default_tabpfn_model_0', 'random_tabpfn_model_1', 'random_tabpfn_model_6', 'random_tabpfn_model_7', 'random_tabpfn_model_9', 'random_tabpfn_model_12', 'random_tabpfn_model_13', 'random_tabpfn_model_17', 'random_tabpfn_model_19', 'random_tabpfn_model_22', 'random_tabpfn_model_25', 'random_tabpfn_model_27', 'random_tabpfn_model_28', 'random_tabpfn_model_29', 'random_tabpfn_model_30', 'random_tabpfn_model_31', 'random_tabpfn_model_32', 'random_tabpfn_model_33', 'random_tabpfn_model_34', 'random_tabpfn_

2025-07-16 13:38:30 INFO     Best weights: [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0.]


Etnia-auto_tabPFN: 0.3506


In [10]:
dill.dump_session('autotabpfn_class_breath_3.db')

In [11]:
# Print header
print("Dataset".ljust(12) + " | " + " | ".join(model_name.ljust(15) for model_name in models))

# Print separator line
print("-" * (12 + 3 + len(models) * 18))

# Print results
for dataset in datasets:
    result_line = f"{dataset.ljust(12)} | " + " | ".join(f"{round(results[dataset][model_name][1],4):<15}" for model_name in models)
    print(result_line)


Dataset      | auto_tabPFN    
---------------------------------
Etnia        | 0.3506         


In [12]:
print("---------------------------")
print("Confusion matrices")
print("---------------------------")

for dataset in datasets:
    this_data = loaded_datasets[dataset]    
    y = np.array(this_data["y"])
    for model_name in models:
        confusion_matrix = pd.crosstab(y, results[dataset][model_name][0], rownames=['Actual'], colnames=['Predicted'])
        # Print the confusion matrix with a title
        print(f"Confusion Matrix for {dataset}-{model_name}:")
        print(confusion_matrix)
        print("\n")  # Add a newline for better readability


---------------------------
Confusion matrices
---------------------------
Confusion Matrix for Etnia-auto_tabPFN:
Predicted   0   1
Actual           
0          25  14
1          13  25


