In [None]:
import openml
from tqdm import tqdm
import math
import torch as th
from torch.utils.data import random_split, TensorDataset
from torch.nn import functional as F
import pandas as pd
from pandas.api.types import is_numeric_dtype

from tabpfn import TabPFNClassifier

from tab_pfn.networks import TabPFN
from tab_pfn.metrics import ConfusionMeter
from tab_pfn.metrics import AccuracyMeter
from tab_pfn.networks import pad_features

In [None]:
benchmark = openml.study.get_suite('OpenML-CC18')
tasks = openml.tasks.list_tasks(task_id=benchmark.tasks, output_format="dataframe")

retained_datasets = []

for _, row in tqdm(list(tasks.iterrows())):
    try:
        datasets = openml.tasks.get_task(row["tid"]).get_dataset()
    except Exception as e:
        print(e)
        print(row["tid"])
        continue
    
    if row["NumberOfInstances"] > 2000:
        continue
    if row["NumberOfNumericFeatures"] > 100:
        continue
    if datasets.qualities["NumberOfClasses"] > 10:
        continue
    
    retained_datasets.append(openml.tasks.get_task(row["tid"]).get_dataset())

In [None]:
len(retained_datasets)

In [None]:
tab_pfn = TabPFN(100, 10, 256, 512, 1024, 4, 12)
tab_pfn.load_state_dict(th.load("../resources/model_48127.pt"))
tab_pfn.eval()

tab_pfn_clf = TabPFNClassifier(device="cuda")

scores = {}

for dataset in tqdm(retained_datasets):
    x, y, _, _ = dataset.get_data(target=dataset.default_target_attribute)
    
    classes = {
        c: i for i, c in enumerate(y.unique())  
    }
    
    conf_meter = ConfusionMeter(len(classes), None)
    acc_meter = AccuracyMeter(None)
    
    y = y.apply(lambda c: classes[c])
    
    # ohe
    for c in x.columns:
        if not is_numeric_dtype(x[c]):
            ohe = pd.get_dummies(x[c], prefix=c, prefix_sep="_", dtype=float)
            x = x.drop(c, axis=1).join(ohe)
            if len(x.columns) > 100:
                break
    
    if len(x.columns) > 100:
        continue

    with th.no_grad():
        x = th.tensor(x.to_numpy())
        y = th.tensor(y.to_numpy())
        
        x, y = x[x.size(0) % 2:], y[y.size(0) % 2:]
        th_dataset = TensorDataset(x, y)
        data_train, data_test = random_split(th_dataset, [0.5, 0.5])
        
        x_train, y_train = zip(*[data_train[i] for i in range(len(data_train))])
        x_train, y_train = pad_features(th.stack(x_train, dim=0).to(th.float), 100), th.stack(y_train, dim=0).to(th.long)
        
        x_test, y_test = zip(*[data_test[i] for i in range(len(data_test))])
        x_test, y_test = pad_features(th.stack(x_test, dim=0).to(th.float), 100), th.stack(y_test, dim=0).to(th.long)
        
        out = tab_pfn(x_train[None], y_train[None], x_test[None])[0]
        #tab_pfn_clf.fit(x_train, y_train)
        #out = th.tensor(tab_pfn_clf.predict_proba(x_test))
        
        loss = F.cross_entropy(out, y_test, reduction='mean').cpu().item()
        conf_meter.add(out, y_test)
        acc_meter.add(out, y_test)
        
        scores[dataset.name] = {
            "loss": loss,
            "conf_meter": conf_meter,
            "acc_meter": acc_meter,
        }

In [None]:
for n, score in scores.items():
    conf_meter = score["conf_meter"]
    print(f"{n} : loss={score['loss']:.4f}, acc={score['acc_meter'].accuracy()}, prec={conf_meter.precision().mean().cpu().item():.4f} | rec={conf_meter.recall().mean().cpu().item():.4f}")

3 : loss=0.1690, acc=0.9507 | rec=0.9491
11 : loss=0.2526, acc=0.8263 | rec=0.8025
14 : loss=0.5841, acc=0.7704 | rec=0.7747
15 : loss=nan, acc=0.3295 | rec=0.5000
16 : loss=0.2647, acc=0.9336 | rec=0.9341
18 : loss=0.7649, acc=0.6760 | rec=0.6736
22 : loss=0.4107, acc=0.8285 | rec=0.8313
23 : loss=1.0401, acc=0.1406 | rec=0.3333
29 : loss=nan, acc=0.2377 | rec=0.5000
31 : loss=0.5340, acc=0.6851 | rec=0.5948
37 : loss=0.4828, acc=0.7703 | rec=0.7518
46 : loss=0.9109, acc=0.5742 | rec=0.5267
50 : loss=0.4127, acc=0.7752 | rec=0.7244
54 : loss=0.6417, acc=0.6987 | rec=0.7033
188 : loss=nan, acc=0.0652 | rec=0.2000
38 : loss=nan, acc=0.4706 | rec=0.5000
458 : loss=0.0191, acc=0.9958 | rec=0.9964
469 : loss=1.8085, acc=0.0327 | rec=0.1667
1049 : loss=0.2422, acc=0.9570 | rec=0.6187
1050 : loss=0.2954, acc=0.4501 | rec=0.5000
1063 : loss=0.4099, acc=0.7803 | rec=0.6860
1067 : loss=0.3358, acc=0.5825 | rec=0.5066
1068 : loss=0.2535, acc=0.7101 | rec=0.5101
1510 : loss=0.0973, acc=0.9519 | rec=0.9586
1494 : loss=0.3405, acc=0.8471 | rec=0.8348
1480 : loss=0.5533, acc=0.5745 | rec=0.5225
1487 : loss=0.1464, acc=0.8084 | rec=0.5150
1462 : loss=0.0508, acc=0.9887 | rec=0.9849
1464 : loss=0.5115, acc=0.8881 | rec=0.5174
6332 : loss=nan, acc=0.2185 | rec=0.5000
23381 : loss=0.7787, acc=0.5707 | rec=0.5681
40966 : loss=nan, acc=0.0188 | rec=0.1250
40982 : loss=0.8137, acc=0.7151 | rec=0.6522
40994 : loss=0.2078, acc=0.8597 | rec=0.6902
40975 : loss=0.2396, acc=0.8264 | rec=0.6853
40984 : loss=0.3858, acc=0.8246 | rec=0.8291
40978 : loss=0.2861, acc=0.9171 | rec=0.7535
40670 : loss=1.0561, acc=0.1668 | rec=0.3333

In [None]:
conf_meters = [s["conf_meter"] for _, s in scores.items()]
acc_meters = [s["acc_meter"] for _, s in scores.items()]
cross_entropies = [s["loss"] for _, s in scores.items()]

precision = sum(conf_meter.precision().mean().item() for conf_meter in conf_meters) / len(conf_meters)
recall = sum(conf_meter.recall().mean().item() for conf_meter in conf_meters) / len(conf_meters)
acc = sum(acc_meter.accuracy() for acc_meter in acc_meters) / len(acc_meters)


cross_entropies_without_nan = list(map(lambda c: math.log(10) if math.isnan(c) else c, cross_entropies))

print(f"precision = {precision}, recall = {recall}")
print("cross entropy:", sum(cross_entropies_without_nan) / len(cross_entropies_without_nan))
print("acc:", acc)

precision = 0.6425595247235737, recall = 0.6251027662503091
cross entropy: 0.7661994506457874

Author code :

precision = 0.801273051649332, recall = 0.77424192322152
cross entropy: 0.8216769727213042
acc: 0.837828790733493