In [1]:
from tabpfn import TabPFNClassifier
from sklearn.datasets import fetch_openml

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
import os
import warnings

# Suppress warnings
warnings.filterwarnings("ignore")

# Load the dataset
dataset = fetch_openml("creditcard", version=1, as_frame=True)
X = dataset.data
y = dataset.target

# Convert the target variable to binary
y = np.where(y == "1", 1, 0)
# Split the dataset into training and testing sets

# Sample 100 records from the dataset for faster processing
sample_size = 100
indices = np.random.RandomState(42).choice(len(X), sample_size, replace=False)
X = X.iloc[indices]
y = y[indices]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
# Initialize the TabPFNClassifier
model = TabPFNClassifier()

# Fit the model
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.95

In [2]:
from tools.hyperparameter_tuning import get_model_params

model_params = get_model_params(
    model,
    X_train,
    y_train,
    tune=True,
    tune_metric="f1",
    max_time=15,
    use_tensor=True,
)
model = TabPFNClassifier(**model_params)
# Fit the model
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
accuracy

  1%|          | 1/100 [00:21<35:35, 21.57s/trial, best loss: 0.0]
Best parameters: {'average_before_softmax': True, 'fit_mode': 'low_memory', 'memory_saving_mode': 'auto', 'n_estimators': 32, 'n_jobs': -1, 'random_state': 22, 'softmax_temperature': 0.8990300702670624}


0.95