
# 🔍 Tabular Classification Benchmark: TabPFN vs Baselines

This notebook evaluates **TabPFN** against four popular tabular classification models:
- AutoGluon
- CatBoost
- LightGBM
- XGBoost

### 📊 Datasets:
- **Heart Disease** dataset (starter)
- Three additional benchmark datasets from the TabPFN paper (page 20).

### ⚙️ Features:
- Toggle for preprocessing (on/off)
- Uniform evaluation pipeline
- Dataset-agnostic design: simply plug in your dataset (must have a target column).

---


In [None]:
# --- Code with Comments ---
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra  # Import necessary library
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)  # Import necessary library

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os  # Import necessary library
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Installing Packages

In [None]:
# --- Code with Comments ---
from IPython.display import display, HTML
from tqdm.notebook import tqdm
import subprocess  # Import necessary library

# List of packages to install
packages = [
    "scikit-learn==1.5.2",
    "tabpfn",
    "catboost",
    "xgboost",
    "autogluon",
    "datasets",
    "--upgrade git+https://github.com/automl/TabPFN.git",
    "ucimlrepo"
]

# Store installation logs
logs = []

# Install packages with a progress bar
for pkg in tqdm(packages, desc="Installing Packages", unit="pkg"):
    log = subprocess.run(["pip", "install", pkg], capture_output=True, text=True)
    logs.append(log.stdout + log.stderr)

# Create a scrollable output box for installation logs
scrollable_logs = "<pre style='max-height: 300px; overflow-y: scroll; border: 1px solid #ccc; padding: 10px;'>" \
                  + "\n".join(logs) + "</pre>"

display(HTML(scrollable_logs))

In [None]:
# --- Code with Comments ---
# !pip install --upgrade --force-reinstall --no-cache-dir tabpfn

In [None]:
# --- Code with Comments ---
import pandas as pd  # Import necessary library
import numpy as np  # Import necessary library
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt  # Import necessary library
from tabpfn import TabPFNClassifier
from ucimlrepo import fetch_ucirepo

## Importing datasets

In [None]:
# --- Code with Comments ---
statlog_german_credit_data = fetch_ucirepo(id=144)

X = statlog_german_credit_data.data.features
y = statlog_german_credit_data.data.targets

In [None]:
# --- Code with Comments ---
X.head(5)

## Data Processing

### Convert all columns to numeric codes for TabPFN

In [None]:
# --- Code with Comments ---
# Step 1: Encode all features (TabPFN expects integers)
X_enc = X.copy()
for col in X_enc.columns:
    if not pd.api.types.is_numeric_dtype(X_enc[col]):
        X_enc[col] = LabelEncoder().fit_transform(X_enc[col].astype(str))  # Train the model on training data

In [None]:
# --- Code with Comments ---
# Step 2: Encode target
if isinstance(y, pd.DataFrame):
    y_vec = y.iloc[:, 0]
else:
    y_vec = y
if y_vec.dtype == "object" or str(y_vec.dtype).startswith("category"):
    y_enc = LabelEncoder().fit_transform(y_vec)  # Train the model on training data
else:
    y_enc = np.asarray(y_vec).ravel()

### Fit TabPFN and extract embeddings

In [None]:
# --- Code with Comments ---
# Step 3: Fit TabPFN Classifier (use CUDA if available)
clf = TabPFNClassifier(device="cuda")
clf.fit(X_enc.values, y_enc)  # Train the model on training data

In [None]:
# --- Code with Comments ---
# ! pip install --upgrade tabpfn

In [None]:
# --- Code with Comments ---
import tabpfn  # Import necessary library
print(tabpfn.__version__)

In [None]:
# --- Code with Comments ---
# Step 4: Use predict_proba() as proxy for embeddings  # Make predictions on test data
X_tabpfn_embed = clf.predict_proba(X_enc.values)  # Make predictions on test data

### Plot PCA comparison

In [None]:
# --- Code with Comments ---
import seaborn as sns  # Import necessary library

def create_tabpfn_style_plot(X_original, X_embedded, y, figsize=(12, 10)):
    """
    Create an enhanced PCA comparison plot with improved visual styling
    """
    # Set the style
    plt.style.use('default')
    sns.set_palette("husl")
    
    # Standardize the data
    scaler_orig = StandardScaler()
    scaler_emb = StandardScaler()
    X_original_scaled = scaler_orig.fit_transform(X_original)  # Train the model on training data
    X_embedded_scaled = scaler_emb.fit_transform(X_embedded)  # Train the model on training data
    
    # Apply PCA
    pca_orig = PCA(n_components=2)
    pca_emb = PCA(n_components=2)
    X_pca_orig = pca_orig.fit_transform(X_original_scaled)  # Train the model on training data
    X_pca_emb = pca_emb.fit_transform(X_embedded_scaled)  # Train the model on training data
    
    # Create figure with enhanced styling
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize, facecolor='white')
    fig.patch.set_facecolor('white')
    
    # Enhanced color palette with better contrast
    colors = [
        '#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7',
        '#DDA0DD', '#98D8C8', '#F7DC6F', '#BB8FCE', '#85C1E9'
    ]
    
    unique_labels = np.unique(y)
    
    # Plot TabPFN Embeddings
    for i, label in enumerate(unique_labels):
        mask = y == label
        scatter1 = ax1.scatter(X_pca_emb[mask, 0], X_pca_emb[mask, 1], 
                              c=colors[i % len(colors)], alpha=0.8, s=60, 
                              edgecolors='white', linewidth=0.5, label=f'Class {label}')
    
    # Enhanced styling for first subplot
    ax1.set_title('TabPFN Embeddings + PCA', fontsize=16, fontweight='bold', 
                  pad=20, color='#2C3E50')
    ax1.set_xlabel('First Principal Component (PC1)', fontsize=13, fontweight='500', color='#34495E')
    ax1.set_ylabel('Second Principal Component (PC2)', fontsize=13, fontweight='500', color='#34495E')
    
    # Grid and styling
    ax1.grid(True, alpha=0.3, linestyle='--', linewidth=0.8)
    ax1.set_facecolor('#FAFAFA')
    
    # Enhanced spines
    for spine in ax1.spines.values():
        spine.set_color('#BDC3C7')
        spine.set_linewidth(1.2)
    
    # Tick styling
    ax1.tick_params(axis='both', which='major', labelsize=11, colors='#34495E')
    ax1.tick_params(axis='both', which='minor', labelsize=9, colors='#7F8C8D')
    
    # Add panel label
    ax1.text(0.02, 0.98, 'A', transform=ax1.transAxes, fontsize=18, 
             fontweight='bold', va='top', ha='left', 
             bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
    
    # Plot Original Features
    for i, label in enumerate(unique_labels):
        mask = y == label
        scatter2 = ax2.scatter(X_pca_orig[mask, 0], X_pca_orig[mask, 1], 
                              c=colors[i % len(colors)], alpha=0.8, s=60, 
                              edgecolors='white', linewidth=0.5, label=f'Class {label}')
    
    # Enhanced styling for second subplot
    ax2.set_title('Original Features + PCA', fontsize=16, fontweight='bold', 
                  pad=20, color='#2C3E50')
    ax2.set_xlabel('First Principal Component (PC1)', fontsize=13, fontweight='500', color='#34495E')
    ax2.set_ylabel('Second Principal Component (PC2)', fontsize=13, fontweight='500', color='#34495E')
    
    # Grid and styling
    ax2.grid(True, alpha=0.3, linestyle='--', linewidth=0.8)
    ax2.set_facecolor('#FAFAFA')
    
    # Enhanced spines
    for spine in ax2.spines.values():
        spine.set_color('#BDC3C7')
        spine.set_linewidth(1.2)
    
    # Tick styling
    ax2.tick_params(axis='both', which='major', labelsize=11, colors='#34495E')
    ax2.tick_params(axis='both', which='minor', labelsize=9, colors='#7F8C8D')
    
    # Add panel label
    ax2.text(0.02, 0.98, 'B', transform=ax2.transAxes, fontsize=18, 
             fontweight='bold', va='top', ha='left',
             bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
    
    # Add legend with enhanced styling
    legend1 = ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left', 
                        frameon=True, fancybox=True, shadow=True, 
                        fontsize=10, title='Classes', title_fontsize=11)
    legend1.get_frame().set_facecolor('white')
    legend1.get_frame().set_alpha(0.9)
    
    legend2 = ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left', 
                        frameon=True, fancybox=True, shadow=True, 
                        fontsize=10, title='Classes', title_fontsize=11)
    legend2.get_frame().set_facecolor('white')
    legend2.get_frame().set_alpha(0.9)
    
    # Adjust layout
    plt.tight_layout()
    plt.subplots_adjust(hspace=0.4, right=0.85)
    
    # Add a subtle border around the entire figure
    fig.patch.set_edgecolor('#BDC3C7')
    fig.patch.set_linewidth(2)
    
    plt.show()
    
    # Enhanced variance explanation output
    print("=" * 60)
    print("📊 PCA VARIANCE EXPLANATION")
    print("=" * 60)
    print(f"🔹 TabPFN Embeddings:")
    print(f"   PC1: {pca_emb.explained_variance_ratio_[0]:.2%} | PC2: {pca_emb.explained_variance_ratio_[1]:.2%}")
    print(f"   Total: {sum(pca_emb.explained_variance_ratio_[:2]):.2%}")
    print()
    print(f"🔹 Original Features:")
    print(f"   PC1: {pca_orig.explained_variance_ratio_[0]:.2%} | PC2: {pca_orig.explained_variance_ratio_[1]:.2%}")
    print(f"   Total: {sum(pca_orig.explained_variance_ratio_[:2]):.2%}")
    print("=" * 60)
    
    return fig, (ax1, ax2), (pca_orig, pca_emb)

# Alternative version with dark theme
def create_tabpfn_style_plot_dark(X_original, X_embedded, y, figsize=(12, 10)):
    """
    Create a dark-themed PCA comparison plot
    """
    # Set dark style
    plt.style.use('dark_background')
    
    # Standardize the data
    scaler_orig = StandardScaler()
    scaler_emb = StandardScaler()
    X_original_scaled = scaler_orig.fit_transform(X_original)  # Train the model on training data
    X_embedded_scaled = scaler_emb.fit_transform(X_embedded)  # Train the model on training data
    
    # Apply PCA
    pca_orig = PCA(n_components=2)
    pca_emb = PCA(n_components=2)
    X_pca_orig = pca_orig.fit_transform(X_original_scaled)  # Train the model on training data
    X_pca_emb = pca_emb.fit_transform(X_embedded_scaled)  # Train the model on training data
    
    # Create figure
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize, facecolor='#1E1E1E')
    
    # Neon-inspired color palette
    colors = [
        '#FF0080', '#00FF80', '#0080FF', '#FF8000', '#8000FF',
        '#00FFFF', '#FFFF00', '#FF0040', '#40FF00', '#4000FF'
    ]
    
    unique_labels = np.unique(y)
    
    # Plot with glow effect
    for i, label in enumerate(unique_labels):
        mask = y == label
        # Add glow effect
        ax1.scatter(X_pca_emb[mask, 0], X_pca_emb[mask, 1], 
                   c=colors[i % len(colors)], alpha=0.3, s=80, edgecolors='none')
        ax1.scatter(X_pca_emb[mask, 0], X_pca_emb[mask, 1], 
                   c=colors[i % len(colors)], alpha=0.9, s=40, 
                   edgecolors='white', linewidth=0.5, label=f'Class {label}')
    
    ax1.set_title('TabPFN Embeddings + PCA', fontsize=16, fontweight='bold', 
                  pad=20, color='white')
    ax1.set_xlabel('PC1', fontsize=13, color='#CCCCCC')
    ax1.set_ylabel('PC2', fontsize=13, color='#CCCCCC')
    ax1.grid(True, alpha=0.2, color='white')
    ax1.set_facecolor('#0A0A0A')
    
    # Similar styling for second plot
    for i, label in enumerate(unique_labels):
        mask = y == label
        ax2.scatter(X_pca_orig[mask, 0], X_pca_orig[mask, 1], 
                   c=colors[i % len(colors)], alpha=0.3, s=80, edgecolors='none')
        ax2.scatter(X_pca_orig[mask, 0], X_pca_orig[mask, 1], 
                   c=colors[i % len(colors)], alpha=0.9, s=40, 
                   edgecolors='white', linewidth=0.5, label=f'Class {label}')
    
    ax2.set_title('Original Features + PCA', fontsize=16, fontweight='bold', 
                  pad=20, color='white')
    ax2.set_xlabel('PC1', fontsize=13, color='#CCCCCC')
    ax2.set_ylabel('PC2', fontsize=13, color='#CCCCCC')
    ax2.grid(True, alpha=0.2, color='white')
    ax2.set_facecolor('#0A0A0A')
    
    plt.tight_layout()
    plt.subplots_adjust(hspace=0.4)
    plt.show()
    
    return fig, (ax1, ax2), (pca_orig, pca_emb)

In [None]:
# --- Code with Comments ---
# Step 6: Plot!
create_tabpfn_style_plot(X_enc, X_tabpfn_embed, y_enc)

While working with the TabPFN model, an error was encountered when attempting to extract embeddings using `clf.transform(X_enc.values)`. This resulted in an `AttributeError` because the `TabPFNClassifier` does not implement a `.transform()` method. Unlike typical scikit-learn models or some transformer-based models that expose intermediate representations, TabPFN focuses on end-to-end prediction and does not provide direct access to internal embeddings. To address this, a workaround was used by leveraging the model’s `predict_proba()` output as a proxy for embeddings. Although not true internal features, the probability vectors still reflect the model’s learned structure and were suitable for visualization via PCA.

# **Compairing Different Models**

In [None]:
# 📥 Load dataset (user must specify path and target column)
import pandas as pd
DATA_PATH = '/kaggle/input/your-dataset.csv'  # <-- Change this
TARGET_COL = 'target'  # <-- Change this
df = pd.read_csv(DATA_PATH)
print(f'Dataset loaded: {df.shape[0]} rows, {df.shape[1]} columns')
df.head()

## **Data Processing**

In [None]:
# --- Code with Comments ---
import pandas as pd  # Import necessary library
import numpy as np  # Import necessary library
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

def preprocess_dataframe(df_2):
    """
    Preprocesses a DataFrame by:
    - Handling missing values
    - Normalizing numerical features
    - One-hot encoding categorical features
    - Label encoding the target variable (if present)

    Returns:
        - Processed feature DataFrame X
        - Processed target Series y (if 'target' column is present)
    """

    # Separate target if present
    y_processed = None
    if "target" in df_2.columns:
        le = LabelEncoder()
        y_processed = pd.Series(le.fit_transform(df_2["target"]), name="target")  # Train the model on training data
        df_2 = df_2.drop(columns=["target"])

    # Identify categorical and numerical columns
    categorical_cols = df_2.select_dtypes(include=["object", "category"]).columns.tolist()
    numerical_cols = df_2.select_dtypes(include=["int64", "float64"]).columns.tolist()

    # Define transformations
    numeric_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="mean")),
        ("scaler", MinMaxScaler())
    ])

    categorical_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("encoder", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
    ])

    # Create the preprocessor
    preprocessor = ColumnTransformer(transformers=[
        ("num", numeric_transformer, numerical_cols),
        ("cat", categorical_transformer, categorical_cols)
    ])

    # Fit and transform
    X_processed = preprocessor.fit_transform(df_2)  # Train the model on training data

    # Get transformed column names
    feature_names = preprocessor.get_feature_names_out()

    # Convert to DataFrame
    X_processed_df = pd.DataFrame(X_processed, columns=feature_names)

    return X_processed_df, y_processed

In [None]:
# --- Code with Comments ---
# Example usage
# Make sure 'df_2' is defined and has the correct format
X_processed_df, y_processed_series = preprocess_dataframe(df_2)
print("Preprocessing complete!")
X_processed_df.head()

In [None]:
# --- Code with Comments ---
if y_processed_series is not None:
    print(y_processed_series.head())

In [None]:
# --- Code with Comments ---
%matplotlib inline

In [None]:
# --- Code with Comments ---
import seaborn as sns  # Import necessary library
import matplotlib.pyplot as plt  # Import necessary library

x = df_2.target.value_counts()
p  = sns.countplot(data=df_2, x='target')
plt.show()

## **Train-Test Splitting**

In [None]:
# --- Code with Comments ---
from sklearn.model_selection import train_test_split  # Split dataset into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X_processed_df, y_processed_series, test_size=0.33, random_state=42)  # Split dataset into training and test sets

#### ***Distribution on Original Data***

In [None]:
# --- Code with Comments ---
df_2.hist(bins=50, figsize=(20,15))
plt.show()

#### ***Distribution on Processed Data***

In [None]:
# --- Code with Comments ---
X_processed_df.hist(bins=50, figsize=(20,15))
plt.show()

## **Deffining Model**

In [None]:
# --- Code with Comments ---
import pandas as pd  # Import necessary library
import numpy as np  # Import necessary library
import torch  # Import necessary library
import matplotlib.pyplot as plt  # Import necessary library
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score
from autogluon.tabular import TabularPredictor
from tabpfn import TabPFNClassifier

In [None]:
# --- Code with Comments ---
from sklearn.metrics import roc_auc_score

# Train and evaluate TabPFN
y_pred = TabPFNClassifier(random_state=42).fit(X_train, y_train).predict_proba(X_test)  # Train the model on training data

# Calculate ROC AUC (handles both binary and multiclass)
score = roc_auc_score(y_test, y_pred if len(np.unique(y)) > 2 else y_pred[:, 1])
print(f"TabPFN ROC AUC: {score:.4f}")

In [None]:
# --- Code with Comments ---
# # Define models
# models = [
#     ('TabPFN', TabPFNClassifier(random_state=42)),
#     ('RandomForest', RandomForestClassifier(random_state=42)),
#     ('XGBoost', XGBClassifier(random_state=42)),
#     ('CatBoost', CatBoostClassifier(random_state=42, verbose=0)),
#     ('LightGBM', LGBMClassifier(random_state=42)),  # Adding LightGBM
# ]

# # Convert y to Series
# y_series = pd.Series(y_train, name='target')

# # Prepare training data
# train_data = pd.concat([X_train, y_train], axis=1)

# # Fit AutoGluon
# autogluon_model = TabularPredictor(label='target').fit(train_data)  # Train the model on training data

# # Calculate scores
# scoring = 'roc_auc_ovr' if len(np.unique(y)) > 2 else 'roc_auc'
# scores = {name: cross_val_score(model, X_train, y_train, cv=5, scoring=scoring, n_jobs=1, verbose=1).mean()
#           for name, model in models}

# # Evaluate AutoGluon
# autogluon_score = autogluon_model.evaluate(train_data)
# scores['AutoGluon'] = autogluon_score.get(scoring, None)

# # Plot results
# df = pd.DataFrame(list(scores.items()), columns=['Model', 'ROC AUC'])
# ax = df.plot(x='Model', y='ROC AUC', kind='bar', figsize=(10, 6))
# ax.set_ylim(df['ROC AUC'].min() * 0.995, min(1.0, df['ROC AUC'].max() * 1.005))
# ax.set_title('Model Comparison - 5-fold Cross-validation')

In [None]:
# --- Code with Comments ---
# Define classical models
models = [
    ('RandomForest', RandomForestClassifier(random_state=42)),
    ('XGBoost', XGBClassifier(random_state=42)),
    ('CatBoost', CatBoostClassifier(random_state=42, verbose=0)),
    ('LightGBM', LGBMClassifier(random_state=42)),
]

# Scoring metric
scoring = 'roc_auc_ovr' if len(np.unique(y_train)) > 2 else 'roc_auc'
scores = {}

# ---------------------- Classical Models ----------------------
for name, model in models:
    score = cross_val_score(model, X_train, y_train, cv=5, scoring=scoring, n_jobs=1, verbose=1).mean()
    scores[name] = score

# ---------------------- AutoGluon ----------------------
train_data = X_train.copy()
train_data['target'] = y_train.values

test_data = X_test.copy()
test_data['target'] = y_test.values

autogluon_model = TabularPredictor(label='target').fit(train_data)  # Train the model on training data
autogluon_score = autogluon_model.evaluate(test_data)
scores['AutoGluon'] = autogluon_score.get(scoring, None)

# ---------------------- TabPFN (Standard) ----------------------
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tabpfn_model = TabPFNClassifier(device=device)

tabpfn_scores = []
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_idx, test_idx in kf.split(X_train, y_train):
    X_fold_train, X_fold_test = X_train.iloc[train_idx], X_train.iloc[test_idx]
    y_fold_train, y_fold_test = y_train.iloc[train_idx], y_train.iloc[test_idx]

    tabpfn_model.fit(X_fold_train.values, y_fold_train)  # Train the model on training data
    probas = tabpfn_model.predict_proba(X_fold_test.values)  # Make predictions on test data

    if len(np.unique(y_train)) > 2:
        auc = roc_auc_score(y_fold_test, probas, multi_class='ovr')
    else:
        auc = roc_auc_score(y_fold_test, probas[:, 1])
    
    tabpfn_scores.append(auc)

scores['TabPFN'] = np.mean(tabpfn_scores)

# ---------------------- TabPFN (PHE-style) ----------------------
phe_scores = []
for train_idx, test_idx in kf.split(X_train, y_train):
    fold_aucs = []
    for _ in range(3):  # simulate ensemble of 3 TabPFN models
        phe_model = TabPFNClassifier(device=device)
        phe_model.fit(X_train.iloc[train_idx].values, y_train.iloc[train_idx])  # Train the model on training data
        probas = phe_model.predict_proba(X_train.iloc[test_idx].values)  # Make predictions on test data

        if len(np.unique(y_train)) > 2:
            auc = roc_auc_score(y_train.iloc[test_idx], probas, multi_class='ovr')
        else:
            auc = roc_auc_score(y_train.iloc[test_idx], probas[:, 1])
        fold_aucs.append(auc)

    phe_scores.append(np.mean(fold_aucs))

scores['TabPFN (PHE)'] = np.mean(phe_scores)

# ---------------------- Plot Results ----------------------
df = pd.DataFrame(list(scores.items()), columns=['Model', 'ROC AUC'])
ax = df.plot(x='Model', y='ROC AUC', kind='bar', figsize=(12, 6), color='teal')
ax.set_ylim(df['ROC AUC'].min() * 0.995, min(1.0, df['ROC AUC'].max() * 1.005))
ax.set_title('Model Comparison - 5-fold Cross-validation on Train Set')
plt.grid(True)
plt.tight_layout()
plt.show()

## **AUC ROC Curve**

In [None]:
# --- Code with Comments ---
def run_complete_analysis_with_tabpfn(X_train, y_train):
    from sklearn.model_selection import StratifiedKFold
    from sklearn.metrics import roc_auc_score
    import time  # Import necessary library

    results = []
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scoring = 'roc_auc_ovr' if len(np.unique(y_train)) > 2 else 'roc_auc'
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    models = [
        ('RandomForest', RandomForestClassifier(random_state=42)),
        ('XGBoost', XGBClassifier(random_state=42, eval_metric='logloss')),
        ('CatBoost', CatBoostClassifier(random_state=42, verbose=0)),
        ('LightGBM', LGBMClassifier(random_state=42, verbose=-1)),
    ]

    for name, model in models:
        print(f"Evaluating {name}...")
        times, scores = [], []
        for train_idx, val_idx in kf.split(X_train, y_train):
            X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
            y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

            start_time = time.time()
            model.fit(X_tr, y_tr)  # Train the model on training data
            fit_time = time.time() - start_time  # Train the model on training data

            start_time = time.time()
            probs = model.predict_proba(X_val)  # Make predictions on test data
            pred_time = time.time() - start_time

            score = roc_auc_score(y_val, probs[:, 1] if probs.shape[1] == 2 else probs, multi_class='ovr')
            scores.append(score)
            times.append(fit_time + pred_time)  # Train the model on training data

        results.append({
            'model': name,
            'avg_time': np.mean(times),
            'roc_auc': np.mean(scores)
        })

    # --- AutoGluon ---
    print("Evaluating AutoGluon...")
    train_df = X_train.copy()
    train_df['target'] = y_train.values

    start = time.time()
    ag_model = TabularPredictor(label='target', verbosity=0).fit(train_df)  # Train the model on training data
    fit_time = time.time() - start  # Train the model on training data

    start = time.time()
    preds = ag_model.predict_proba(X_train)  # Make predictions on test data
    pred_time = time.time() - start

    auc_score = roc_auc_score(y_train, preds.iloc[:, 1] if preds.shape[1] == 2 else preds, multi_class='ovr')

    results.append({
        'model': 'AutoGluon',
        'avg_time': fit_time + pred_time,  # Train the model on training data
        'roc_auc': auc_score
    })

    # --- TabPFN ---
    print("Evaluating TabPFN...")
    tabpfn_times, tabpfn_scores = [], []
    for train_idx, val_idx in kf.split(X_train, y_train):
        model = TabPFNClassifier(device=device)
        X_tr, X_val = X_train.iloc[train_idx].values, X_train.iloc[val_idx].values
        y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

        start = time.time()
        model.fit(X_tr, y_tr)  # Train the model on training data
        fit_time = time.time() - start  # Train the model on training data

        start = time.time()
        probs = model.predict_proba(X_val)  # Make predictions on test data
        pred_time = time.time() - start

        auc = roc_auc_score(y_val, probs[:, 1] if probs.shape[1] == 2 else probs, multi_class='ovr')
        tabpfn_scores.append(auc)
        tabpfn_times.append(fit_time + pred_time)  # Train the model on training data

    results.append({
        'model': 'TabPFN',
        'avg_time': np.mean(tabpfn_times),
        'roc_auc': np.mean(tabpfn_scores)
    })

    # --- TabPFN (PHE-style) ---
    print("Evaluating TabPFN (PHE)...")
    phe_times, phe_scores = [], []
    for train_idx, val_idx in kf.split(X_train, y_train):
        aucs = []
        fold_time = 0
        for _ in range(3):  # Simulate ensemble of 3
            model = TabPFNClassifier(device=device)
            X_tr, X_val = X_train.iloc[train_idx].values, X_train.iloc[val_idx].values
            y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

            start = time.time()
            model.fit(X_tr, y_tr)  # Train the model on training data
            probs = model.predict_proba(X_val)  # Make predictions on test data
            fold_time += time.time() - start

            auc = roc_auc_score(y_val, probs[:, 1] if probs.shape[1] == 2 else probs, multi_class='ovr')
            aucs.append(auc)

        phe_scores.append(np.mean(aucs))
        phe_times.append(fold_time)

    results.append({
        'model': 'TabPFN (PHE)',
        'avg_time': np.mean(phe_times),
        'roc_auc': np.mean(phe_scores)
    })

    return pd.DataFrame(results)

In [None]:
# --- Code with Comments ---
def create_performance_time_plot_fixed(results_df):
    """
    Create a plot showing normalized ROC AUC vs average fit + predict time  # Train the model on training data
    """
    import matplotlib.pyplot as plt  # Import necessary library
    import numpy as np  # Import necessary library
    
    plt.figure(figsize=(12, 8))
    
    # Define styles per model
    model_styles = {
        'TabPFN': {'color': '#2E8B57', 'marker': '*', 'size': 150, 'alpha': 0.3},
        'TabPFN (PHE)': {'color': '#006400', 'marker': 'o', 'size': 100, 'alpha': 0.3},
        'RandomForest': {'color': '#1E90FF', 'marker': 'o', 'size': 100, 'alpha': 0.3},
        'XGBoost': {'color': '#8A2BE2', 'marker': 's', 'size': 100, 'alpha': 0.3},
        'CatBoost': {'color': '#DC143C', 'marker': 'D', 'size': 100, 'alpha': 0.3},
        'LightGBM': {'color': '#FF8C00', 'marker': '^', 'size': 100, 'alpha': 0.3},
        'AutoGluon': {'color': '#4682B4', 'marker': 'v', 'size': 100, 'alpha': 0.3}
    }
    
    # OPTION 1: Min-Max Normalization (recommended)
    min_auc = results_df['roc_auc'].min()
    max_auc = results_df['roc_auc'].max()
    
    for _, row in results_df.iterrows():
        model_name = row['model']
        style = model_styles.get(model_name, {'color': 'gray', 'marker': 'o', 'size': 100, 'alpha': 0.3})
        
        # Min-Max normalization
        norm_auc = (row['roc_auc'] - min_auc) / (max_auc - min_auc)
        
        # Simulated confidence band
        time_vals = np.logspace(np.log10(max(0.01, row['avg_time'] * 0.5)), 
                                np.log10(row['avg_time'] * 2), 50)
        auc_vals = np.full_like(time_vals, norm_auc)
        noise = np.random.normal(0, 0.02, len(time_vals))
        auc_upper = np.clip(auc_vals + abs(noise), 0, 1)
        auc_lower = np.clip(auc_vals - abs(noise), 0, 1)
        
        plt.fill_between(time_vals, auc_lower, auc_upper, 
                         color=style['color'], alpha=style['alpha'])
        
        plt.scatter(row['avg_time'], norm_auc,
                    color=style['color'], marker=style['marker'],
                    s=style['size'], label=model_name,
                    edgecolors='black', linewidth=1, zorder=5)
        
        plt.plot([row['avg_time'], row['avg_time']],
                 [plt.ylim()[0], norm_auc],
                 color=style['color'], linestyle=':', alpha=0.7)
    
    plt.axvline(x=10, color='black', linestyle='--', alpha=0.7)
    plt.xscale('log')
    plt.xlabel('Average Fit + Predict Time (s)', fontsize=12)
    plt.ylabel('Normalized ROC AUC', fontsize=12)
    plt.title('Normalized ROC AUC vs Inference Time', fontsize=14)
    plt.grid(True, alpha=0.3)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.xticks([0.01, 0.1, 1, 10, 100], ['0.01', '0.1', '1', '10', '100'])
    plt.tight_layout()
    plt.show()

In [None]:
# --- Code with Comments ---
results_df = run_complete_analysis_with_tabpfn(X_train, y_train)
create_performance_time_plot_fixed(results_df)

In [None]:
# --- Code with Comments ---
def create_performance_time_plot_fixed(results_df):
    """
    Create a plot showing normalized ROC AUC vs average fit + predict time  # Train the model on training data
    """
    import matplotlib.pyplot as plt  # Import necessary library
    import numpy as np  # Import necessary library
    
    plt.figure(figsize=(12, 8))
    
    # Define styles per model
    model_styles = {
        'TabPFN': {'color': '#2E8B57', 'marker': '*', 'size': 150, 'alpha': 0.3},
        'TabPFN (PHE)': {'color': '#006400', 'marker': 'o', 'size': 100, 'alpha': 0.3},
        'RandomForest': {'color': '#1E90FF', 'marker': 'o', 'size': 100, 'alpha': 0.3},
        'XGBoost': {'color': '#8A2BE2', 'marker': 's', 'size': 100, 'alpha': 0.3},
        'CatBoost': {'color': '#DC143C', 'marker': 'D', 'size': 100, 'alpha': 0.3},
        'LightGBM': {'color': '#FF8C00', 'marker': '^', 'size': 100, 'alpha': 0.3},
        'AutoGluon': {'color': '#4682B4', 'marker': 'v', 'size': 100, 'alpha': 0.3}
    }
    
    # OPTION 1: Min-Max Normalization (recommended)
    min_auc = results_df['roc_auc'].min()
    max_auc = results_df['roc_auc'].max()
    
    for _, row in results_df.iterrows():
        model_name = row['model']
        style = model_styles.get(model_name, {'color': 'gray', 'marker': 'o', 'size': 100, 'alpha': 0.3})
        
        # Min-Max normalization
        norm_auc = (row['roc_auc'] - min_auc) / (max_auc - min_auc)
        
        # Simulated confidence band
        time_vals = np.logspace(np.log10(max(0.01, row['avg_time'] * 0.5)), 
                                np.log10(row['avg_time'] * 2), 50)
        auc_vals = np.full_like(time_vals, norm_auc)
        noise = np.random.normal(0, 0.02, len(time_vals))
        auc_upper = np.clip(auc_vals + abs(noise), 0, 1)
        auc_lower = np.clip(auc_vals - abs(noise), 0, 1)
        
        plt.fill_between(time_vals, auc_lower, auc_upper, 
                         color=style['color'], alpha=style['alpha'])
        
        plt.scatter(row['avg_time'], norm_auc,
                    color=style['color'], marker=style['marker'],
                    s=style['size'], label=model_name,
                    edgecolors='black', linewidth=1, zorder=5)
        
        plt.plot([row['avg_time'], row['avg_time']],
                 [plt.ylim()[0], norm_auc],
                 color=style['color'], linestyle=':', alpha=0.7)
    
    plt.axvline(x=10, color='black', linestyle='--', alpha=0.7)
    plt.xscale('log')
    plt.xlabel('Average Fit + Predict Time (s)', fontsize=12)
    plt.ylabel('Normalized ROC AUC', fontsize=12)
    plt.title('Normalized ROC AUC vs Inference Time', fontsize=14)
    plt.grid(True, alpha=0.3)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.xticks([0.01, 0.1, 1, 10, 100], ['0.01', '0.1', '1', '10', '100'])
    plt.tight_layout()
    plt.show()

# ALTERNATIVE: Use raw ROC AUC values instead of normalization
def create_performance_time_plot_raw(results_df):
    """
    Create a plot showing raw ROC AUC vs average fit + predict time  # Train the model on training data
    """
    import matplotlib.pyplot as plt  # Import necessary library
    import numpy as np  # Import necessary library
    
    plt.figure(figsize=(12, 8))
    
    # Define styles per model
    model_styles = {
        'TabPFN': {'color': '#2E8B57', 'marker': '*', 'size': 150},
        'TabPFN (PHE)': {'color': '#006400', 'marker': 'o', 'size': 100},
        'RandomForest': {'color': '#1E90FF', 'marker': 'o', 'size': 100},
        'XGBoost': {'color': '#8A2BE2', 'marker': 's', 'size': 100},
        'CatBoost': {'color': '#DC143C', 'marker': 'D', 'size': 100},
        'LightGBM': {'color': '#FF8C00', 'marker': '^', 'size': 100},
        'AutoGluon': {'color': '#4682B4', 'marker': 'v', 'size': 100}
    }
    
    for _, row in results_df.iterrows():
        model_name = row['model']
        style = model_styles.get(model_name, {'color': 'gray', 'marker': 'o', 'size': 100})
        
        plt.scatter(row['avg_time'], row['roc_auc'],
                    color=style['color'], marker=style['marker'],
                    s=style['size'], label=model_name,
                    edgecolors='black', linewidth=1, alpha=0.8)
    
    plt.xscale('log')
    plt.xlabel('Average Fit + Predict Time (s)', fontsize=12)
    plt.ylabel('ROC AUC', fontsize=12)
    plt.title('ROC AUC vs Inference Time', fontsize=14)
    plt.grid(True, alpha=0.3)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

In [None]:
# --- Code with Comments ---
# Example usage:
results_df = run_complete_analysis_with_tabpfn(X_train, y_train)
create_performance_time_plot_fixed(results_df)  # Fixed normalization

In [None]:
# --- Code with Comments ---
create_performance_time_plot_raw(results_df)    # Raw values (cleaner)

## **Testing Models on Original Data**