***Note: remember to restart the runtime after the installation.***

In [34]:
# Standard Library Imports

# TabPFN and Extensions

try:
    from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import (
        AutoTabPFNClassifier,
    )

    from tabpfn import TabPFNClassifier, TabPFNRegressor
except ImportError:
    raise ImportError(
        "Warning: Could not import TabPFN / TabPFN extensions. Please run installation above and restart the session afterwards (Runtime > Restart Session)."
    )

# Data Science & Visualization
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch

# Other ML Models
from catboost import CatBoostClassifier, CatBoostRegressor

# Notebook UI/Display
from IPython.display import Markdown, display
from rich.console import Console
from rich.panel import Panel
from rich.prompt import Prompt
from rich.rule import Rule
from sklearn.compose import make_column_selector, make_column_transformer

# Scikit-Learn: Data & Preprocessing
from sklearn.datasets import fetch_openml, load_breast_cancer

# Scikit-Learn: Models
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import mean_squared_error, roc_auc_score
from sklearn.model_selection import (
    KFold,
    StratifiedKFold,
    cross_val_score,
    train_test_split,
)
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from xgboost import XGBClassifier, XGBRegressor

# This transformer will be used to handle categorical features for the baseline models
column_transformer = make_column_transformer(
    (
        OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1),
        make_column_selector(dtype_include=["object", "category"]),
    ),
    remainder="passthrough",
)

In [35]:
console = Console()

console.print(Panel.fit("[bold magenta]TabPFN Demo: Backend Selection[/bold magenta]"))
console.print("\nThis script can run TabPFN using one of two backends:")
console.print("  [bold]1. local:[/bold] Uses a local GPU (NVIDIA). Requires CUDA.")
console.print(
    "  [bold]2. client:[/bold] Uses the TabPFN API. Requires an internet connection and a free account."
)

backend = Prompt.ask(
    "\n[bold]Choose your backend[/bold] - If not field to enter is shown restart the cell.",
    choices=["client", "local"],
    default="client",
)

console.print(
    f"\n✅ You have selected the '[bold green]{backend}[/bold green]' backend."
)

console.print(Rule(f"[bold]Setting up [cyan]{backend}[/cyan] backend[/bold]"))

if backend == "local":
    console.print("Attempting local backend setup...")
    import torch

    if not torch.cuda.is_available():
        console.print(
            "[bold red]Error:[/bold red] GPU device not found. For fast training, please enable GPU.",
            style="red",
        )
        console.print(
            "In Colab: Go to [bold]Runtime -> Change runtime type -> Hardware accelerator -> GPU.[/bold]",
            style="yellow",
        )
        raise SystemError("GPU device not found.")
    console.print(
        "[bold green]✅ GPU is available.[/bold green] Importing local TabPFN library..."
    )
    from tabpfn import TabPFNClassifier, TabPFNRegressor

    console.print("[bold green]✅ TabPFN (local) imported successfully.[/bold green]")
elif backend == "client":
    console.print("Attempting client backend setup...")
    console.print("Importing TabPFN client library...")
    from tabpfn_client import TabPFNClassifier, TabPFNRegressor, init

    init()
    console.print("[bold green]✅ TabPFN (client) initialized.[/bold green]")

# Classification with TabPFN <a name="classification"></a>

Now, let's dive into a practical example of using TabPFN for a classification task. We will use the well-known Parkinson's Disease dataset. The goal is to predict the presence of Parkinson's disease based on various voice measurements.

We will compare TabPFN's performance against other popular machine learning models: RandomForest, XGBoost, and CatBoost. The performance metric we will use is the [ROC AUC](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html) score.


In [36]:
# Cell 1: core imports and config
import pandas as pd
import numpy as np
from pathlib import Path
import logging

# reproducibility & logging
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
logging.getLogger().setLevel(logging.INFO)

print('imports OK')


imports OK


In [37]:
# Reloadable data-load cell: respects SAMPLE_LIMIT and forces reload when changed
from pathlib import Path
from IPython.display import Markdown, display

SAMPLE_LIMIT = 10000         # set to an int (max rows) or None to load all rows
SAMPLE_MODE = "sample"       # "sample" or "head"
RANDOM_SEED = 3

csv_path = Path("/Users/Scott/Documents/Data Science/ADSWP/TabPFN/BaselineExperiments/data/freMTPL2freq.csv")
if not csv_path.exists():
    raise FileNotFoundError(f"CSV not found: {csv_path}")

_last_limit = globals().get("LAST_SAMPLE_LIMIT", None)
_last_mode = globals().get("LAST_SAMPLE_MODE", None)
_data_loaded = globals().get("DATA_LOADED", False)
_df_exists = "df" in globals()

reload_needed = (not _data_loaded) or (not _df_exists) or (_last_limit != SAMPLE_LIMIT) or (_last_mode != SAMPLE_MODE)

if reload_needed:
    df = pd.read_csv(csv_path)
    if SAMPLE_LIMIT is not None:
        if SAMPLE_MODE == "sample":
            df = df.sample(n=min(SAMPLE_LIMIT, len(df)), random_state=RANDOM_SEED).reset_index(drop=True)
        else:
            df = df.head(SAMPLE_LIMIT).copy().reset_index(drop=True)
    DATA_LOADED = True
    LAST_SAMPLE_LIMIT = SAMPLE_LIMIT
    LAST_SAMPLE_MODE = SAMPLE_MODE
    display(Markdown(f"### Loaded dataset: {csv_path.name} (SAMPLE_LIMIT={SAMPLE_LIMIT}, SAMPLE_MODE='{SAMPLE_MODE}')"))
else:
    display(Markdown(f"### Using already-loaded dataset in memory (SAMPLE_LIMIT={LAST_SAMPLE_LIMIT}, SAMPLE_MODE='{LAST_SAMPLE_MODE}')"))

display(Markdown(f"- shape: {df.shape}"))
display(Markdown("**Columns:**"))
print(list(df.columns))
display(Markdown("**First rows:**"))
display(df.head())

if "TARGET_COLUMN" not in globals() or TARGET_COLUMN is None or TARGET_COLUMN not in df.columns:
    TARGET_COLUMN = "ClaimNb"

if TARGET_COLUMN is None or TARGET_COLUMN not in df.columns:
    raise ValueError(
        "Please set TARGET_COLUMN to the column name you want to predict. "
        "Available columns printed above."
    )

X, y = df.drop(columns=[TARGET_COLUMN]), df[TARGET_COLUMN]

display(Markdown("### Feature preview"))
display(X.head())
display(Markdown("### Target preview"))
display(y.value_counts() if y.dtype.kind in "O" or y.dtype.name == "category" else y.describe())
print('data load OK')

### Using already-loaded dataset in memory (SAMPLE_LIMIT=10000, SAMPLE_MODE='sample')

- shape: (10000, 12)

**Columns:**

['IDpol', 'ClaimNb', 'Exposure', 'Area', 'VehPower', 'VehAge', 'DrivAge', 'BonusMalus', 'VehBrand', 'VehGas', 'Density', 'Region']


**First rows:**

Unnamed: 0,IDpol,ClaimNb,Exposure,Area,VehPower,VehAge,DrivAge,BonusMalus,VehBrand,VehGas,Density,Region
0,1117193.0,0,0.62,B,5,5,38,50,B1,Diesel,91,R24
1,3133836.0,0,1.0,D,6,11,50,50,B3,Regular,1485,R82
2,4053839.0,0,0.57,C,8,1,42,50,B12,Diesel,116,R93
3,4146268.0,0,1.0,D,7,16,45,50,B4,Regular,1203,R54
4,85539.0,0,0.85,B,6,8,49,50,B1,Diesel,78,R82


### Feature preview

Unnamed: 0,IDpol,Exposure,Area,VehPower,VehAge,DrivAge,BonusMalus,VehBrand,VehGas,Density,Region
0,1117193.0,0.62,B,5,5,38,50,B1,Diesel,91,R24
1,3133836.0,1.0,D,6,11,50,50,B3,Regular,1485,R82
2,4053839.0,0.57,C,8,1,42,50,B12,Diesel,116,R93
3,4146268.0,1.0,D,7,16,45,50,B4,Regular,1203,R54
4,85539.0,0.85,B,6,8,49,50,B1,Diesel,78,R82


### Target preview

count    10000.000000
mean         0.056600
std          0.242079
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          2.000000
Name: ClaimNb, dtype: float64

data load OK


In [38]:
# Alternative datasets (commented for reference):

# German Credit Fraud (ID: 31)
# Samples: 1,000
# Features: 20 (account info, credit history, employment)
# Target: Good/Bad credit risk
# df = fetch_openml(data_id=31)

# Cholesterol dataset: Predict cholesterol levels
# Features: Patient characteristics, medical measurements
# Samples: 303 patients
# Target: Cholesterol levels in mg/dl
# df = fetch_openml('cholesterol', version=2, as_frame=True)

# Heart Disease dataset (Statlog): Predict presence of heart disease
# Features: Clinical and test measurements
# Samples: 270 patients
# Target: Binary heart disease diagnosis
# df = fetch_openml("heart-statlog", version=1)

# Diabetes dataset: Predict diabetes presence
# Features: Medical measurements, patient history
# Samples: 768 patients
# Target: Binary diabetes diagnosis
# df = fetch_openml("diabetes", version=1)

In [39]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=45
)

# Train and evaluate the TabPFN classifier
tabpfn_classifier = TabPFNClassifier(random_state=42)
tabpfn_classifier.fit(X_train, y_train)
y_pred_proba = tabpfn_classifier.predict_proba(X_test)

# Calculate the ROC AUC score
roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class="ovr", average="weighted")
print(f"TabPFN ROC AUC Score: {roc_auc:.4f}")

Processing: 100%|██████████| [00:03<00:00]

TabPFN ROC AUC Score: 0.7833





In [40]:
# New comparison cell: train baseline models and compare ROC AUCs
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Ensure target is numeric for ROC AUC
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)

models = {
    "RandomForest": make_pipeline(column_transformer, RandomForestClassifier(n_estimators=200, random_state=RANDOM_SEED, n_jobs=-1)),
    "XGBoost": make_pipeline(column_transformer, XGBClassifier(n_estimators=200, random_state=RANDOM_SEED, use_label_encoder=False, eval_metric="logloss", n_jobs=-1)),
    "CatBoost": make_pipeline(column_transformer, CatBoostClassifier(iterations=500, random_state=RANDOM_SEED, verbose=0, thread_count=4)),
}

results = []
for name, model in models.items():
    print(f"Training {name}...")
    model.fit(X_train, y_train_enc)
    proba = model.predict_proba(X_test)
    # If binary classification, use proba[:, 1], else use multiclass ROC AUC
    if proba.shape[1] == 2:
        auc = roc_auc_score(y_test_enc, proba[:, 1])
    else:
        auc = roc_auc_score(y_test_enc, proba, multi_class="ovr", average="weighted")
    results.append({"model": name, "roc_auc": float(auc)})

# include TabPFN result computed earlier (variable `roc_auc`)
results.append({"model": "TabPFN", "roc_auc": float(roc_auc)})

results_df = pd.DataFrame(results).sort_values("roc_auc", ascending=False).reset_index(drop=True)
print("\nModel comparison (sorted by ROC AUC):")
print(results_df)

Training RandomForest...
Training XGBoost...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Training CatBoost...

Model comparison (sorted by ROC AUC):
          model   roc_auc
0        TabPFN  0.783305
1  RandomForest  0.740722
2       XGBoost  0.737317
3      CatBoost  0.735375


In [33]:
# Fine-tune (best-effort single step) and recompute TabPFN ROC AUC
import numpy as np
import torch
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

FINETUNE_CONFIG = dict(epochs=1, learning_rate=1e-5, meta_batch_size=1, batch_size=256, device="cpu")
N_SMALL = min(256, len(X_train))

# small random subset for quick finetune
X_small = X_train.sample(n=N_SMALL, random_state=RANDOM_SEED)
y_small = y_train.loc[X_small.index]

print(f"Attempting finetune on {len(X_small)} samples (best-effort)...")
finetuned = False

if not finetuned:
    try:
        tabpfn_classifier.fit(X_small, y_small)
        finetuned = True
        print("Retrained on small subset as proxy finetune.")
    except Exception as e:
        print("Retrain failed:", e)

# Recompute predictions & ROC AUC if finetune occurred
if finetuned:
    try:
        y_pred_proba = tabpfn_classifier.predict_proba(X_test)
        # try to use same label encoding as baselines if available (le may exist)
        try:
            if "le" in globals():
                y_test_enc = le.transform(y_test)
                # binary vs multiclass handling
                if y_pred_proba.ndim == 2 and y_pred_proba.shape[1] == 2:
                    roc_auc_post = roc_auc_score(y_test_enc, y_pred_proba[:, 1])
                else:
                    roc_auc_post = roc_auc_score(y_test_enc, y_pred_proba, multi_class="ovr", average="weighted")
            else:
                # assume binary labels
                roc_auc_post = roc_auc_score(y_test, y_pred_proba[:, 1])
        except Exception:
            # final fallback
            roc_auc_post = roc_auc_score(y_test, y_pred_proba[:, 1])
        print(f"Post-finetune TabPFN ROC AUC: {roc_auc_post:.4f}")
        # update variable for later comparison cells
        roc_auc = roc_auc_post
    except Exception as e:
        print("Predict after finetune failed:", e)
else:
    print("No finetune performed (no supported API found).")

Attempting finetune on 256 samples (best-effort)...
Retrained on small subset as proxy finetune.


Processing: 100%|██████████| [00:01<00:00]

Post-finetune TabPFN ROC AUC: 0.6897



