In [1]:
from xgboost import XGBClassifier
import warnings
from tabpfn import TabPFNClassifier
import numpy as np
import pandas as pd
import os
from evaluate import scores
import sklearn
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, precision_score, roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score, cross_validate
from data_prep_utils import *
import matplotlib.pyplot as plt
import torch
import openml

## Openml datasets:

### 3:
Chess endgame, discrete board position (true/false) and binary class win/lose

### 13
Breastcancer recurrence from 10 categorical features

### 481
Identify genetic disorder from blood samples and other values

### 966
Some game

### 1444
"pizza cutter"

### 1498
Africa heart disease

### 1489:
Classify vowels into nasal/oral from 5 harmonic amplitudes

### 1494:
Classify chemical into non-/biodegradable, 41 features

### 1504:
Steel Plates faults, 27 features describing position and type of fault

### 4134
Predict binary biological response of molecules, features are chemical properties

In [2]:
data_id = 1489
dataset = openml.datasets.get_dataset(data_id, download_data=True, download_qualities=True, download_features_meta_data=True)
data_df = dataset.get_data()[0]
print(dataset.description)
print((data_df.head))
#print(dataset.row_id_attribute)

**Author**: Dominique Van Cappel, THOMSON-SINTRA  
**Source**: [KEEL](http://sci2s.ugr.es/keel/dataset.php?cod=105#sub2), [ELENA](https://www.elen.ucl.ac.be/neural-nets/Research/Projects/ELENA/databases/REAL/phoneme/) - 1993  
**Please cite**: None  

The aim of this dataset is to distinguish between nasal (class 0) and oral sounds (class 1). Five different attributes were chosen to characterize each vowel: they are the amplitudes of the five first harmonics AHi, normalised by the total energy Ene (integrated on all the frequencies): AHi/Ene. The phonemes are transcribed as follows: sh as in she, dcl as in dark, iy as the vowel in she, aa as the vowel in dark, and ao as the first vowel in water.  

### Source

The current dataset was formatted by the KEEL repository, but originally hosted by the [ELENA Project](https://www.elen.ucl.ac.be/neural-nets/Research/Projects/ELENA/elena.htm#stuff). The dataset originates from the European ESPRIT 5516 project: ROARS. The aim of this project was

In [3]:
import warnings
warnings.filterwarnings('always')

In [6]:
#openml_ids = [3,13,481,844,966,1494,1504, 4134]
openml_medical_id =     [13, 25, 49, 51, 53, 55, 336, 337, 446, 778, 798, 949, 1488, 4329, 41430]
openml_non_medical_id = [40, 43, 56, 59,333,334,335, 450, 451, 756, 872, 925,  1447, 40705, 40706]
metrics = ["accuracy", "precision", "roc_auc"]
n_models = 3
n_scores = 3
seed = 42
results_medical = np.empty((n_models,len(openml_medical_id), n_scores))
models = [XGBClassifier(n_estimators=5, max_depth=5, learning_rate=1, objective='binary:logistic'), 
          LogisticRegression(max_iter=500), 
          TabPFNClassifier(device='cpu', N_ensemble_configurations=3)]
print("Medical data:\n")
for mm, model in enumerate(models):
    for ii, data_id in enumerate(openml_medical_id):
        model_clean = sklearn.base.clone(model)
        dataset = openml.datasets.get_dataset(data_id, download_data=True, download_qualities=True, download_features_meta_data=True)
        X, y, categorical_indicator, attribute_names = dataset.get_data(dataset_format="dataframe", target=dataset.default_target_attribute)
        #print(y.value_counts(normalize=False))
        X, y = df_to_numpy(X), df_to_numpy(y)
        np.random.seed(seed)
        shuffle = np.random.permutation(X.shape[0])
        X, y = X[shuffle], y[shuffle]
        X, y = reduce_n_samples(X,y)
        X = reduce_n_features(X)
        X, _ = normalize(X)
        score = cross_validate(model_clean, X, y, scoring=metrics, cv=3)
        
        
        '''X_train, X_test, y_train, y_test = tabpfn_split(X, y)
        model_clean.fit(X_train,y_train)
        y_pred = model_clean.predict(X_test)'''
        for ss, metric in enumerate(metrics):
            results_medical[mm,ii,ss] = np.mean(score["test_"+metric])
results_avg_medical = np.mean(results_medical,axis=1)
for ss, metric in enumerate(metrics):
    for mm, model in enumerate(models):
        print(model.__class__.__name__, metric, results_avg[mm,ss])

results_non_medical = np.empty((n_models,len(openml_non_medical_id), n_scores))
print("Non-medical data:\n")
for mm, model in enumerate(models):
    for ii, data_id in enumerate(openml_non_medical_id):
        model_clean = sklearn.base.clone(model)
        dataset = openml.datasets.get_dataset(data_id, download_data=True, download_qualities=True, download_features_meta_data=True)
        X, y, categorical_indicator, attribute_names = dataset.get_data(dataset_format="dataframe", target=dataset.default_target_attribute)
        #print(y.value_counts(normalize=False))
        X, y = df_to_numpy(X), df_to_numpy(y)
        np.random.seed(seed)
        shuffle = np.random.permutation(X.shape[0])
        X, y = X[shuffle], y[shuffle]
        X, y = reduce_n_samples(X,y)
        X = reduce_n_features(X)
        X, _ = normalize(X)
        score = cross_validate(model_clean, X, y, scoring=metrics, cv=3)
        
        
        '''X_train, X_test, y_train, y_test = tabpfn_split(X, y)
        model_clean.fit(X_train,y_train)
        y_pred = model_clean.predict(X_test)'''
        for ss, metric in enumerate(metrics):
            results_non_medical[mm,ii,ss] = np.mean(score["test_"+metric])
results_avg_non_medical = np.mean(results_non_medical,axis=1)
for ss, metric in enumerate(metrics):
    for mm, model in enumerate(models):
        print(model.__class__.__name__, metric, results_avg[mm,ss])

Medical data:



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


XGBClassifier accuracy 0.8577530342739553
LogisticRegression accuracy 0.7696242821361939
TabPFNClassifier accuracy 0.8763079388786337
XGBClassifier precision 0.7979222839010743
LogisticRegression precision 0.6729456791201059
TabPFNClassifier precision 0.8042806738333679
XGBClassifier roc_auc 0.8633665712296613
LogisticRegression roc_auc 0.7884636678310548
TabPFNClassifier roc_auc 0.8921079832657959
Non-medical data:



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


XGBClassifier accuracy 0.8577530342739553
LogisticRegression accuracy 0.7696242821361939
TabPFNClassifier accuracy 0.8763079388786337
XGBClassifier precision 0.7979222839010743
LogisticRegression precision 0.6729456791201059
TabPFNClassifier precision 0.8042806738333679
XGBClassifier roc_auc 0.8633665712296613
LogisticRegression roc_auc 0.7884636678310548
TabPFNClassifier roc_auc 0.8921079832657959


In [9]:

print("\nMedical: \n")
for ss, metric in enumerate(metrics):
    for mm, model in enumerate(models):
        print(model.__class__.__name__, metric, results_avg_medical[mm,ss])
print("\nNon-medical: \n")
for ss, metric in enumerate(metrics):
    for mm, model in enumerate(models):
        print(model.__class__.__name__, metric, results_avg_non_medical[mm,ss])


Medical: 

XGBClassifier accuracy 0.7810770911401766
LogisticRegression accuracy 0.7680505927102819
TabPFNClassifier accuracy 0.7937648215192668
XGBClassifier precision 0.6478515012310297
LogisticRegression precision 0.6039409728945764
TabPFNClassifier precision 0.6658859949814047
XGBClassifier roc_auc 0.7785641088422334
LogisticRegression roc_auc 0.7457445978750333
TabPFNClassifier roc_auc 0.7890718374096928

Non-medical: 

XGBClassifier accuracy 0.8577530342739553
LogisticRegression accuracy 0.7696242821361939
TabPFNClassifier accuracy 0.8763079388786337
XGBClassifier precision 0.7979222839010743
LogisticRegression precision 0.6729456791201059
TabPFNClassifier precision 0.8042806738333679
XGBClassifier roc_auc 0.8633665712296613
LogisticRegression roc_auc 0.7884636678310548
TabPFNClassifier roc_auc 0.8921079832657959


In [None]:
print(results_avg)

In [None]:
path = tabpfn.model
print(path[2].__class__.__name__)

In [None]:
from tabpfn.transformer import TransformerModel
model = TransformerModel()
model.load_state_dict(torch.load('./state_dict.ckpt'))