In [None]:
%pip uninstall -y lightgbm pyarrow cffi pycparser autogluon autogluon.tabular
%pip install -U pip setuptools wheel
%pip install -U --no-cache-dir cffi pycparser
%pip install -U --no-cache-dir pyarrow lightgbm
%pip install -U --no-cache-dir autogluon.tabular
!pip install autogluon.tabular[realmlp]==1.5.0

Collecting cffi
  Downloading cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.6 kB)
Collecting pycparser
  Downloading pycparser-3.0-py3-none-any.whl.metadata (8.2 kB)
Downloading cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (219 kB)
Downloading pycparser-3.0-py3-none-any.whl (48 kB)
Installing collected packages: pycparser, cffi
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [cffi]
[1A[2KSuccessfully installed cffi-2.0.0 pycparser-3.0
Collecting pyarrow
  Downloading pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.0 kB)
Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl.metadata (17 kB)
Downloading pyarrow-23.0.0-cp312-cp312-manylinux_2_28_x86_64.whl (47.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.6/47.6 MB[0m [31m86.4 MB/s[0m  [33m0:00:00[0m
[?25hDownloading lightgbm-4.6.0-py3-none-manylinux_2_28_x86_64.whl (3.6 MB)
[2

In [None]:
import pandas as pd

In [None]:
# Load data

PATH = '/data/processed'

df_train = pd.read_csv(PATH+'synt-df_train.csv')
df_val = pd.read_csv(PATH+'synt-df_val.csv')
df_test = pd.read_csv(PATH+'synt-df_test.csv')

In [None]:
# rename 'Unnamed: 0' to 'id' if needed

for df in [df_train, df_val, df_test]:
  df.rename(columns={'Unnamed: 0': 'id'}, inplace=True)

In [None]:
# Data types

for t in ['card_id', 'merchant_id', 'merchant_city']:
    df_train[t] = df_train[t].astype('category')
    df_val[t] = df_val[t].astype('category')
    df_test[t] = df_test[t].astype('category')

In [None]:
tasks = ['unique_merchant_id', 'count_gt_12_card_id',
       'count_eq_3_card_id', 'double_card_id_merchant_city_ONLINE',
       'duplicate_card_id_merchant_city']

In [None]:
def scores_dict_to_df(scores_dict):
    """
    Convert a nested scores dictionary into a pandas DataFrame.

    Parameters
    ----------
    scores_dict : dict
        Dictionary of the form:
        {
            "test_i": [ {metric: value, ...}, {metric: value, ...} ],
            ...
        }

    Returns
    -------
    pd.DataFrame
        DataFrame with one row per (task, run) and one column per metric.
    """
    rows = []

    for task, results in scores_dict.items():
        for run_idx, metrics in enumerate(results):
            row = {
                "task": task,
                "run": run_idx,
            }
            # Ensure numpy scalars are converted to Python scalars
            row.update({k: float(v) for k, v in metrics.items()})
            rows.append(row)

    return pd.DataFrame(rows)


# Training models

In [None]:
df_train_task = df_train.drop(tasks, axis=1)
df_val_task = df_val.drop(tasks, axis=1)
df_test_task = df_test.drop(tasks, axis=1)

In [None]:
col = "merchant_city"

# build a single shared category vocabulary across all splits
all_cats = pd.Index(
    pd.concat([
        df_train_task[col].astype("string"),
        df_val_task[col].astype("string"),
        df_test_task[col].astype("string"),
    ], axis=0).dropna().unique()
)

def set_shared_category(df):
    df[col] = pd.Categorical(df[col].astype("string"), categories=all_cats)
    return df

df_train_task = set_shared_category(df_train_task)
df_val_task   = set_shared_category(df_val_task)
df_test_task  = set_shared_category(df_test_task)

## Train LightGBM and RealMLP, using AutoGLuon

In [None]:
from autogluon.tabular import TabularPredictor
from autogluon.features.generators import IdentityFeatureGenerator

OUTPUT_PATH_RESULTS = '/runs/'

t = 'unique_merchant_id' #['unique_merchant_id', 'count_gt_15_card_id','count_eq_9_card_id','double_card_id_merchant_city_ONLINE','duplicate_card_id_merchant_id']

df_train_task[t] = df_train[t]
df_val_task[t] = df_val[t]
df_test_task[t] = df_test[t]

predictor_ag = TabularPredictor(
    label=t,
    eval_metric="f1" #or roc_auc
).fit(
    train_data=df_train_task,
    tuning_data=df_val_task,
    time_limit=300,
    hyperparameters={
        "GBM": {},       # LightGBM
        "REALMLP": {},   # RealMLP
    },
    presets=None,
)

lb_ag = predictor_ag.leaderboard(df_test_task, silent=True, extra_metrics=["precision", "roc_auc"])

df_train_task.drop(t, axis=1, inplace=True)
df_val_task.drop(t, axis=1, inplace=True)
df_test_task.drop(t, axis=1, inplace=True)


scores_dict ={}

scores_lgbm = predictor_ag.evaluate(df_test_task, "LightGBM")
scores_mlp = predictor_ag.evaluate(df_test_task, "RealMLP")

scores_dict[f"test"] = [scores_lgbm, scores_mlp]

results = scores_dict_to_df(scores_dict)

results.to_csv(OUTPUT_PATH_RESULTS+f"Synthetic-Tabular_{t}.csv")



No path specified. Models will be saved in: "AutogluonModels/ag-20260124_164216"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.5.0
Python Version:     3.12.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Thu Oct  2 10:42:05 UTC 2025
CPU Count:          2
Pytorch Version:    2.9.0+cpu
CUDA Version:       CUDA is not available
Memory Avail:       10.92 GB / 12.67 GB (86.2%)
Disk Space Avail:   85.44 GB / 107.72 GB (79.3%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets. Defaulting to `'medium'`...
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='extreme'  : New in v1.5: The state-of-the-art for tabular data. Massively better than 'best' on datasets <100000 samples by using new Tabular Foundation Models (TFMs) meta-learned on https://tabarena.ai: TabPFNv2, TabICL, Mitra, TabDPT, and TabM. Require

Unnamed: 0,task,run,f1,accuracy,balanced_accuracy,mcc,roc_auc,precision,recall
0,test_0,0,0.160074,0.087,0.5,0.0,0.22298,0.087,1.0
1,test_0,1,0.05598,0.629,0.401663,-0.120282,0.290176,0.035948,0.126437


## TabPFN

In [None]:
## Base library Installation

# Install the TabPFN Client library
!uv pip install tabpfn-client # To use TabPFN GPUs (account needed)

# Install TabPFN extensions for additional functionalities
!uv pip install 'tabpfn-extensions[all]'

# Install tabpfn
!uv pip install tabpfn

[2mUsing Python 3.12.12 environment at: /usr[0m
[2K[2mResolved [1m33 packages[0m [2min 318ms[0m[0m
[2K[2mPrepared [1m4 packages[0m [2min 65ms[0m[0m
[2K[2mInstalled [1m4 packages[0m [2min 4ms[0m[0m
 [32m+[39m [1mbackoff[0m[2m==2.2.1[0m
 [32m+[39m [1mpassword-strength[0m[2m==0.0.3.post2[0m
 [32m+[39m [1msseclient-py[0m[2m==1.8.0[0m
 [32m+[39m [1mtabpfn-client[0m[2m==0.2.8[0m
[2mUsing Python 3.12.12 environment at: /usr[0m
[2K[2mResolved [1m96 packages[0m [2min 700ms[0m[0m
[2K[2mPrepared [1m19 packages[0m [2min 538ms[0m[0m
[2mUninstalled [1m1 package[0m [2min 2ms[0m[0m
[2K[2mInstalled [1m19 packages[0m [2min 76ms[0m[0m
 [32m+[39m [1mautogluon-common[0m[2m==1.4.0[0m
 [32m+[39m [1mautogluon-core[0m[2m==1.4.0[0m
 [32m+[39m [1mautogluon-features[0m[2m==1.4.0[0m
 [32m+[39m [1mautogluon-tabular[0m[2m==1.4.0[0m
 [32m+[39m [1mboto3[0m[2m==1.42.35[0m
 [32m+[39m [1mbotocore[0m[2m==1.42.35

In [None]:
from tabpfn_client import TabPFNClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score, f1_score

print("TabPFNClassifier imported successfully.")

TabPFNClassifier imported successfully.


In [None]:
X_train = df_train.drop(tasks, axis=1)
X_test = df_test.drop(tasks, axis=1)

In [None]:
t = 'double_card_id_merchant_city_ONLINE'
y_train = df_train[t]
y_test = df_test[t]

tabpfn_classifier = TabPFNClassifier(random_state=42)

In [None]:
tabpfn_classifier.fit(X_train, y_train)

In [None]:
# .predict() and .predict_proba() will use the tuned settings
preds = tabpfn_classifier.predict(X_test)
preds_proba = tabpfn_classifier.predict_proba(X_test)

f1_tabpfn = f1_score(y_test, preds)
roc_auc_tabpfn = roc_auc_score(y_test, preds_proba[:,1])

print("F1 score", f1_tabpfn)
print("roc_auc", roc_auc_tabpfn)

Processing: 100%|██████████| [00:07<00:00]
Processing: 100%|██████████| [00:01<00:00]


In [None]:
results_tabpfn = pd.DataFrame({
"model": ["TabPFN2.5"],
"f1_score": [f1_tabpfn],
"roc_auc": [roc_auc_tabpfn]
})

results_tabpfn.to_csv(OUTPUT_PATH_RESULTS+f"Synthetic-TABPFN_{t}.csv")

## Optional: finetuning

In [None]:
# Requires agreeing to T&C from TabPFN
from huggingface_hub import login

hf_token = None
if hf_token:
    login(hf_token)
    print("Successfully logged in to Hugging Face!")
else:
    print("Token is not set. Please save the token first.")

Successfully logged in to Hugging Face!


In [None]:
import logging
import warnings

import numpy as np
import sklearn.datasets
import torch
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split

from tabpfn_client import TabPFNClassifier # Or from tabpfn if you want to use your own GPU
from tabpfn.finetuning.finetuned_classifier import (
    FinetunedTabPFNClassifier,
)

warnings.filterwarnings(
    "ignore",
    category=FutureWarning,
    module=r"google\.api_core\._python_version_support",
)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)

In [None]:
# Training hyperparameters
NUM_EPOCHS = 30
LEARNING_RATE = 2e-5


NUM_ESTIMATORS_FINETUNE = 2

NUM_ESTIMATORS_VALIDATION = 2

NUM_ESTIMATORS_FINAL_INFERENCE = 8

# Reproducibility
RANDOM_STATE = 0

In [None]:
from sklearn.metrics import precision_recall_curve

def calculate_roc_auc(y_true: np.ndarray, y_pred_proba: np.ndarray) -> float:
    """Calculate ROC AUC with binary vs. multiclass handling."""
    if len(np.unique(y_true)) == 2:
        return roc_auc_score(y_true, y_pred_proba[:, 1])  # pyright: ignore[reportReturnType]
    return roc_auc_score(y_true, y_pred_proba, multi_class="ovr")  # pyright: ignore[reportReturnType]


def main() -> None:

    base_clf = TabPFNClassifier(
        #device=[f"cuda:{i}" for i in range(torch.cuda.device_count())],
        n_estimators=NUM_ESTIMATORS_FINAL_INFERENCE,
        ignore_pretraining_limits=True,
        inference_config={"SUBSAMPLE_SAMPLES": 50_000},
    )
    base_clf.fit(X_train, y_train)

    base_pred_proba = base_clf.predict_proba(X_test)
    roc_auc = calculate_roc_auc(y_test, base_pred_proba)
    log_loss_score = log_loss(y_test, base_pred_proba)

    print(f"📊 Default TabPFN Test ROC: {roc_auc:.4f}")

    # 3. Initialize and run fine-tuning
    print("--- 2. Initializing and Fitting Model ---\n")

    # Instantiate the wrapper with your desired hyperparameters
    finetuned_clf = FinetunedTabPFNClassifier(
        device="cuda",
        epochs=NUM_EPOCHS,
        learning_rate=LEARNING_RATE,
        n_estimators_finetune=NUM_ESTIMATORS_FINETUNE,
        n_estimators_validation=NUM_ESTIMATORS_VALIDATION,
        n_estimators_final_inference=NUM_ESTIMATORS_FINAL_INFERENCE,
    )

    # 4. Call .fit() to start the fine-tuning process on the training data
    finetuned_clf.fit(X_train, y_train)
    print("\n")

    # 5. Evaluate the fine-tuned model
    print("--- 3. Evaluating Model on Held-out Test Set ---\n")
    y_pred_proba = finetuned_clf.predict_proba(X_test)
    y_pred = finetuned_clf.predict(X_test)

    f1 = f1_score(y_test, y_pred)

    print(f"📊 Finetuned TabPFN Test ROC: {roc_auc:.4f}")
    print(f"📊 Finetuned TabPFN Test F1: {f1:.4f}")

    return roc_auc, f1


In [None]:
from sklearn.metrics import precision_recall_curve

roc_auc_tabpfnFT, f1_tabpfnFT = main()

Processing: 100%|██████████| [00:02<00:00]
  super().fit(X, y, X_val=X_val, y_val=y_val, output_dir=output_dir)


📊 Default TabPFN Test ROC: 0.8261
📊 Default TabPFN Test Log Loss: 1.6451

--- 2. Initializing and Fitting Model ---



Finetuning Epoch 1/30:   0%|          | 0/1 [00:00<?, ?it/s]

Finetuning Epoch 2/30:   0%|          | 0/1 [00:00<?, ?it/s]

Finetuning Epoch 3/30:   0%|          | 0/1 [00:00<?, ?it/s]

Finetuning Epoch 4/30:   0%|          | 0/1 [00:00<?, ?it/s]

Finetuning Epoch 5/30:   0%|          | 0/1 [00:00<?, ?it/s]

Finetuning Epoch 6/30:   0%|          | 0/1 [00:00<?, ?it/s]

Finetuning Epoch 7/30:   0%|          | 0/1 [00:00<?, ?it/s]

Finetuning Epoch 8/30:   0%|          | 0/1 [00:00<?, ?it/s]



--- 3. Evaluating Model on Held-out Test Set ---





📊 Finetuned TabPFN Test ROC: 0.8261
📊 Finetuned TabPFN Test F1: 0.6684


In [None]:
results_tabpfnFT = pd.DataFrame({
"model": ["TabPFN2.5"],
"f1_score": [f1_tabpfnFT],
"roc_auc": [roc_auc_tabpfnFT]
})

results_tabpfnFT.to_csv(OUTPUT_PATH_RESULTS+f"Synthetic-TABPFN-FT_{t}.csv")