In [1]:
import openml
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from tqdm import tqdm
openml.config.cache_directory = os.path.expanduser('/storage/store/work/lgrinszt/openml_cache')


In [2]:
from tabpfn.scripts.transformer_prediction_interface import TabPFNClassifier

model = TabPFNClassifier()

Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cpu device
Using a Transformer with 25.82 M parameters


In [3]:
import openml
from sklearn.preprocessing import LabelEncoder
import pandas as pd
def balance_data(x, y):
    rng = np.random.RandomState(0)
    print("Balancing")
    print(x.shape)
    indices = [(y == i) for i in np.unique(y)]
    sorted_classes = np.argsort(
        list(map(sum, indices)))  # in case there are more than 2 classes, we take the two most numerous

    n_samples_min_class = sum(indices[sorted_classes[-2]])
    print("n_samples_min_class", n_samples_min_class)
    indices_max_class = rng.choice(np.where(indices[sorted_classes[-1]])[0], n_samples_min_class, replace=False)
    indices_min_class = np.where(indices[sorted_classes[-2]])[0]
    total_indices = np.concatenate((indices_max_class, indices_min_class))
    y = y[total_indices]
    indices_first_class = (y == sorted_classes[-1])
    indices_second_class = (y == sorted_classes[-2])
    y[indices_first_class] = 0
    y[indices_second_class] = 1

    return x.iloc[total_indices], y
def import_open_ml_data(dataset_id=None, task_id=None, remove_nans=None, impute_nans=None, categorical=False, regression=False, balance=False, rng=None) -> pd.DataFrame:
    """
    Import data from openML
    :param int openml_task_id:
    :param path_to_file:
    :return:
    """
    if task_id is not None:
        task = openml.tasks.get_task(task_id)  # download the OpenML task
        dataset = task.get_dataset()
    elif dataset_id is not None:
        dataset = openml.datasets.get_dataset(dataset_id)
    # retrieve categorical data for encoding
    X, y, categorical_indicator, attribute_names = dataset.get_data(
        dataset_format="dataframe", target=dataset.default_target_attribute
    )
    categorical_indicator = np.array(categorical_indicator)
    print("{} categorical columns".format(sum(categorical_indicator)))
    print("{} columns".format(X.shape[1]))
    y_encoder = LabelEncoder()

    # Replace categorical values by integers for each categorical column
    for i, categorical in enumerate(categorical_indicator):
        # X.iloc[:, i] = X.iloc[:, i].astype('category')
        # X.iloc[:, i] = X.iloc[:, i].cat.codes
        # X.iloc[:, i] = X.iloc[:, i].astype('int64')
        # prevent warning
        X[X.columns[i]] = X[X.columns[i]].astype('category')
        X[X.columns[i]] = X[X.columns[i]].cat.codes
        X[X.columns[i]] = X[X.columns[i]].astype('int64')

    # remove missing values
    assert remove_nans or impute_nans, "You need to remove or impute nans"
    if remove_nans:
        missing_rows_mask = X.isnull().any(axis=1)
        if sum(missing_rows_mask) > X.shape[0] / 5:
            print("Removed {} rows with missing values on {} rows".format(
                sum(missing_rows_mask), X.shape[0]))
        X = X[~missing_rows_mask]
        y = y[~missing_rows_mask]
        n_rows_non_missing = X.shape[0]
        if n_rows_non_missing == 0:
            print("Removed all rows")
            return None
    elif impute_nans:
        from sklearn.impute import SimpleImputer
        # Impute numerical columns with mean and categorical columns with most frequent
        categorical_imputer = SimpleImputer(strategy="most_frequent")
        numerical_imputer = SimpleImputer(strategy="mean")
        # check that there a > 0 categorical columns
        if sum(categorical_indicator) > 0:
            X.iloc[:, categorical_indicator] = categorical_imputer.fit_transform(X.iloc[:, categorical_indicator])
        # check that there a > 0 numerical columns
        if sum(~categorical_indicator) > 0:
            X.iloc[:, ~categorical_indicator] = numerical_imputer.fit_transform(X.iloc[:, ~categorical_indicator])




    # print("removing {} categorical features among {} features".format(sum(categorical_indicator), X.shape[1]))
    # X = X.to_numpy()[:, ~categorical_indicator]  # remove all categorical columns
    # if X.shape[1] == 0:
    #     print("removed all features, skipping this task")
    #     return None

    y = y_encoder.fit_transform(y)



    if regression:
        y = y.astype(np.float64)
    else:
        y = y.astype(np.int64)

    if balance:
        X, y = balance_data(X, y)

    X = X.to_numpy()

    if categorical:
        return X, y, categorical_indicator

    return X, y, None

In [4]:
# Create a scikit-learn compatible classifier called TabPFNClassifierEnsemble
#TODO: use sklearn ensemble classes
from sklearn.base import BaseEstimator, ClassifierMixin
# Import decision tree classifier
from sklearn.tree import DecisionTreeClassifier
import copy
import warnings
class TabPFNClassifierEnsemble(BaseEstimator, ClassifierMixin):
    def __init__(self, subsample_size=10, n_ensemble=10, device="cpu"):
        super().__init__()
        self.base_model_class = TabPFNClassifier
        self.kwargs = {"device": device, "N_ensemble_configurations": 32}
        self.subsamble_size = subsample_size
        self.n_ensemble = n_ensemble

    def fit(self, X, y):
        # for each ensemble member, fit a model on a random subsample of the data
        self.ensemble = []
        if self.subsamble_size > X.shape[0]:
            subsamble_size = X.shape[0]
            warnings.warn("subsample_size is larger than the number of samples, using all samples and 1 ensemble member")
            n_ensembles = 1
        else:
            subsamble_size = self.subsamble_size
            n_ensembles = self.n_ensemble
        for i in range(n_ensembles):
            idx = np.random.choice(X.shape[0], subsamble_size, replace=False)
            new_model = self.base_model_class(**self.kwargs)
            self.ensemble.append(new_model.fit(X[idx], y[idx]))
        return self
    def predict(self, X, n_ensemble=None):
        # majority vote
        if n_ensemble is None:
            n_ensemble = len(self.ensemble)
        if n_ensemble > len(self.ensemble):
            warnings.warn("n_ensemble is larger than the number of ensemble members, using all ensemble members")
            n_ensemble = len(self.ensemble)
        predictions = np.zeros((X.shape[0],n_ensemble))
        for i, model in enumerate(self.ensemble[:n_ensemble]):
            predictions[:, i] = model.predict(X)
        return np.mean(predictions, axis=1) > 0.5
    def return_all_predictions(self, X):
        predictions = np.zeros((X.shape[0], len(self.ensemble)))
        for i, model in enumerate(self.ensemble):
            predictions[:, i] = model.predict(X)
        return predictions
    def return_all_predictions_proba(self, X):
        predictions = np.zeros((X.shape[0], len(self.ensemble)))
        for i, model in enumerate(self.ensemble):
            predictions[:, i] = model.predict_proba(X)[:, 1]
        return predictions
    

In [5]:
suite = openml.study.get_suite(337)
tasks = suite.tasks

n_repeat = 3

train_size = [100, 1000, 5000, 10000]
n_ensemble_max = 10
max_test_size = 10000

models = {
    "tabpfn_ensemble": TabPFNClassifierEnsemble(n_ensemble=n_ensemble_max, subsample_size=1024, device="cuda:0"),
    "rf": RandomForestClassifier(),
}

results = pd.DataFrame(columns=["task_id", "accuracy", "model"])
for task_id in tqdm(tasks):
    print("Task id: {}".format(task_id))
    X, y, _ = import_open_ml_data(task_id=task_id, remove_nans=True, impute_nans=False, categorical=False, regression=False, balance=True, rng=None)
    print("X shape: {}".format(X.shape))
    for n in train_size:
        for i in range(n_repeat):
            for model_name, model in models.items():
                X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=n, random_state=i)
                # Truncate test set
                if X_test.shape[0] > max_test_size:
                    X_test = X_test[:max_test_size]
                    y_test = y_test[:max_test_size]
                model.fit(X_train, y_train)
                if model_name == "tabpfn_ensemble":
                    predictions_all_ensemble = model.return_all_predictions(X_test)
                    #print("predictions_all_ensemble", predictions_all_ensemble)
                    # print("predictions_all_ensemble shape: {}".format(predictions_all_ensemble.shape))
                    # # print accuracy for each model and for the ensemble
                    # for k in range(predictions_all_ensemble.shape[1]):
                    #     y_pred = predictions_all_ensemble[:, k] > 0.5
                    #     print("First 10 predictions for model {}: {}".format(k, y_pred[:10]))
                    #     print("Accuracy for model {}: {}".format(k, np.mean(y_pred == y_test)))
                    # # print accuracy for the ensemble
                    # y_pred = np.mean(predictions_all_ensemble, axis=1) > 0.5
                    # # Print number of disagreements
                    # for k in range(predictions_all_ensemble.shape[1]):
                    #     for l in range(k+1, predictions_all_ensemble.shape[1]):
                    #         print("Number of disagreements for models {} and {}: {}".format(k, l, np.sum(predictions_all_ensemble[:, k] != predictions_all_ensemble[:, l])))
                    # print("Accuracy for ensemble: {}".format(np.mean(y_pred == y_test)))
                    # print("Prop class 1", np.mean(y_pred))
                    #for k in [1, predictions_all_ensemble.shape[1] - 1]:
                    for k in range(predictions_all_ensemble.shape[1]):
                        y_pred = np.mean(predictions_all_ensemble[:, :k+1], axis=1) > 0.5
                        results = pd.concat([results, pd.DataFrame({"task_id": [task_id], "accuracy": [np.mean(y_pred == y_test)], "n_train":n, "n_ensemble":k + 1, "model": [model_name]})], ignore_index=True)
                else:
                    y_pred = model.predict(X_test)
                    results = pd.concat([results, pd.DataFrame({"task_id": [task_id], "accuracy": [np.mean(y_pred == y_test)], "n_train":n, "n_ensemble":1, "model": [model_name]})], ignore_index=True)


    

  0%|          | 0/16 [00:00<?, ?it/s]

Task id: 361055
0 categorical columns
10 columns
Balancing
(16714, 10)
n_samples_min_class 8357
X shape: (16714, 10)




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using 

  6%|▋         | 1/16 [03:08<47:03, 188.20s/it]

Task id: 361060
0 categorical columns
7 columns
Balancing
(38474, 7)
n_samples_min_class 19237
X shape: (38474, 7)
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device




Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using 

 12%|█▎        | 2/16 [06:11<43:16, 185.48s/it]

Task id: 361061
0 categorical columns
10 columns
Balancing
(566602, 10)
n_samples_min_class 283301
X shape: (566602, 10)
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device




Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using 

 19%|█▉        | 3/16 [09:46<43:03, 198.74s/it]

Task id: 361062
0 categorical columns
26 columns
Balancing
(10082, 26)
n_samples_min_class 5041
X shape: (10082, 26)
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device




Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using 

 25%|██▌       | 4/16 [11:34<32:36, 163.02s/it]

Task id: 361063
0 categorical columns
16 columns
Balancing
(13488, 16)
n_samples_min_class 6744
X shape: (13488, 16)




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using 

 31%|███▏      | 5/16 [14:15<29:44, 162.26s/it]

Task id: 361065
0 categorical columns
10 columns
Balancing
(13376, 10)
n_samples_min_class 6688
X shape: (13376, 10)




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using 

 38%|███▊      | 6/16 [16:51<26:41, 160.10s/it]

Task id: 361066
0 categorical columns
7 columns
Balancing
(10578, 7)
n_samples_min_class 5289
X shape: (10578, 7)
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device




Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using 

 44%|████▍     | 7/16 [18:31<21:03, 140.34s/it]

Task id: 361068
0 categorical columns
50 columns
Balancing
(72998, 50)
n_samples_min_class 36499
X shape: (72998, 50)
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device




Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using 

 50%|█████     | 8/16 [22:38<23:15, 174.46s/it]

Task id: 361069
0 categorical columns
24 columns
Balancing
(940160, 24)
n_samples_min_class 470080
X shape: (940160, 24)




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using 

 56%|█████▋    | 9/16 [26:37<22:42, 194.62s/it]

Task id: 361070
0 categorical columns
20 columns
Balancing
(7608, 20)
n_samples_min_class 3804
X shape: (7608, 20)
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device




Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters




Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using style prior: True
{'recompute_attn': True}
<module 'wandb' (namespace)>
Using cuda:0 device
Using a Transformer with 25.82 M parameters
Using 

 56%|█████▋    | 9/16 [27:39<21:30, 184.36s/it]


ValueError: train_size=10000 should be either positive and smaller than the number of samples 7608 or a float in the (0, 1) range

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")
# stripplot
# # Create a new result df with n_ensemble in model_name
# n_ensemble_clean = results.n_ensemble
# # Convert to str and replace nan with empty string
# n_ensemble_clean = n_ensemble_clean.astype(str).replace("nan", "")
# # Concatenate model_name and n_ensemble
# results["model_name"] = results.model + "_" + n_ensemble_clean

# Convert n_ensemble to int
results.n_ensemble = results.n_ensemble.astype(float)

for task in results.task_id.unique():
    # Use a continuous color palette
    g = sns.catplot(x="n_train", y="accuracy", hue="n_ensemble", col="model", data=results[results.task_id == task], dodge=True, palette="Blues", kind="point", legend_out=True)
    # fig sizer
    g.fig.set_size_inches(20, 5)
    g.fig.suptitle("Task {}".format(task))
    g.fig.tight_layout()
    g.fig.subplots_adjust(top=0.9)
    # g.fig.savefig("task_{}.png".format(task))
    # plt.close(g.fig)
# ax = sns.stripplot(x="n_train", y="accuracy", hue="model", data=results, jitter=0.05, dodge=True)
# Plot
plt.show()

In [None]:
# boxplot
ax = sns.boxplot(x="n_train", y="accuracy", hue="model", data=results)


In [None]:
results[(results["model"] == "tabpfn_ensemble") & (results["n_train"] == 5000)]