In [17]:
import time
import torch
import numpy as np
import os

from scripts.model_builder import get_default_spec, save_model, load_model_only_inference
from scripts.transformer_prediction_interface import transformer_predict, get_params_from_config, TabPFNClassifier
from scripts.differentiable_pfn_evaluation import eval_model, eval_model_range
from scripts.model_builder import get_model, get_default_spec, save_model, load_model

from datasets import load_openml_list, open_cc_dids, open_cc_valid_dids, test_dids_classification

from scripts import tabular_metrics
import random

In [18]:
base_path = '.'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load Datasets

In [19]:
max_samples = 10000
bptt = 10000

cc_test_datasets_multiclass, cc_test_datasets_multiclass_df = load_openml_list(open_cc_dids, multiclass=True, shuffled=True, filter_for_nan=False, max_samples = max_samples, num_feats=100, return_capped=True)
cc_valid_datasets_multiclass, cc_valid_datasets_multiclass_df = load_openml_list(open_cc_valid_dids, multiclass=True, shuffled=True, filter_for_nan=False, max_samples = max_samples, num_feats=100, return_capped=True)

# Loading longer OpenML Datasets for generalization experiments (optional)
# test_datasets_multiclass, test_datasets_multiclass_df = load_openml_list(test_dids_classification, multiclass=True, shuffled=True, filter_for_nan=False, max_samples = 10000, num_feats=100, return_capped=True)

random.seed(0)
random.shuffle(cc_valid_datasets_multiclass)

Number of datasets: 30
Loading balance-scale 11 ..
Loading mfeat-fourier 14 ..
Loading breast-w 15 ..
Loading mfeat-karhunen 16 ..
Loading mfeat-morphological 18 ..
Loading mfeat-zernike 22 ..
Loading cmc 23 ..
Loading credit-approval 29 ..
Loading credit-g 31 ..
Loading diabetes 37 ..
Loading tic-tac-toe 50 ..
Loading vehicle 54 ..
Loading eucalyptus 188 ..
Loading analcatdata_authorship 458 ..
Loading analcatdata_dmft 469 ..
Loading pc4 1049 ..
Loading pc3 1050 ..
Loading kc2 1063 ..
Loading pc1 1068 ..
Loading banknote-authentication 1462 ..
Loading blood-transfusion-service-center 1464 ..
Loading ilpd 1480 ..
Loading qsar-biodeg 1494 ..
Loading wdbc 1510 ..
Loading cylinder-bands 6332 ..
Loading dresses-sales 23381 ..
Loading MiceProtein 40966 ..
Loading car 40975 ..
Loading steel-plates-fault 40982 ..
Loading climate-model-simulation-crashes 40994 ..
Number of datasets: 150
Loading breast-cancer 13 ..
Loading colic 25 ..
Loading dermatology 35 ..
Loading sonar 40 ..
Loading glass 

In [20]:
def get_datasets(selector, task_type, suite='cc'):
    if task_type == 'binary':
        ds = valid_datasets_binary if selector == 'valid' else test_datasets_binary
    else:
        if suite == 'openml':
            ds = valid_datasets_multiclass if selector == 'valid' else test_datasets_multiclass
        elif suite == 'cc':
            ds = cc_valid_datasets_multiclass if selector == 'valid' else cc_test_datasets_multiclass
        else:
            raise Exception("Unknown suite")
    return ds

In [21]:
model_string, longer, task_type = '', 1, 'multiclass'
eval_positions = [1000]
bptt = 2000
    
test_datasets, valid_datasets = get_datasets('test', task_type, suite='cc'), get_datasets('valid', task_type, suite='cc')

# Pick single Dataset

In [22]:
evaluation_dataset_index = 0 # Index of the dataset to predict
ds = test_datasets[evaluation_dataset_index]
print(f'Evaluation dataset name: {ds[0]} shape {ds[1].shape}')

Evaluation dataset name: balance-scale shape torch.Size([625, 4])


## Split x and y in train and test

In [23]:
xs, ys = ds[1].clone(), ds[2].clone()
eval_position = xs.shape[0] // 2
train_xs, train_ys = xs[0:eval_position], ys[0:eval_position]
test_xs, test_ys = xs[eval_position:], ys[eval_position:]

## weighting for the ensemble (AUC/CE)

In [26]:
class AccuracyEnsemble:
    def __init__(self, eval_positions, model_storage_folder="modelstorage", device="cpu", verbose=False):
        self.model_dict = {}  # Maps model_index to loaded model
        
        # Iterate over all files in the model_path directory
        for model_index, model_string in enumerate(os.listdir(model_storage_folder)):
            if model_string.endswith('.cpkt'):  # Assuming the models have a .cpkt extension
                loaded_model = TabPFNClassifier(base_path=model_storage_folder,
                                                model_string=model_string,
                                                N_ensemble_configurations=1,
                                                batch_size_inference=1)
                # loaded_model = load_model(path= model_storage_folder,filename=filename, device=device, eval_positions=eval_positions, verbose=verbose)
                self.model_dict[model_index] = {"model": loaded_model, "auc":-1, "ce":-1, "weight":-1}
                
    def fit(self, train_xs, train_ys):
        for model_index, model_auc_ce in self.model_dict.items():
            print(model_auc_ce)
            model = model_auc_ce["model"]
            model.fit(train_xs, train_ys)
            
    def get_train_auc_ce_of_model(self, model, split_share = 0.8):
        # use accuracy / AUC  for the model weighing? How to interpret each of them? 
        # we have to cache the full training data as .fit will overwrite this in the model
        full_train_x, full_train_y = model.X_, model.y_
        
        temp_split_pos = random.randint(int(full_train_x.shape[0] * split_share), full_train_x.shape[0]) # is 1 right dimension? sequence_length, num_feat
        
        temp_split_train_x = full_train_x[:temp_split_pos]
        temp_split_test_x = full_train_x[temp_split_pos:]
        
        temp_split_train_y = full_train_y[:temp_split_pos]
        temp_split_test_y = full_train_y[:temp_split_pos]
        
        model.fit(temp_split_train_x, temp_split_train_y)
        
        prediction_ = model.predict_proba(temp_split_test_x)
        auc, ce = tabular_metrics.auc_metric(temp_split_test_y, prediction_), tabular_metrics.cross_entropy(temp_split_test_y, prediction_)
        
        # have to reinsert the full x_train, y_train
        model.fit(full_train_x, full_train_y)
        return auc, ce
        
    def predict_proba(self, test_xs, metric = "auc"):
        
        sum_of_ce = 0
        sum_of_auc = 0
        for model_index, model_auc_ce in self.model_dict.items():
            model = model_auc_ce["model"]
            auc, ce = self.get_train_auc_ce_of_model(model, split_share=0.8) # split_share randomly choosen 
            
            self.model_dict[model_index]["auc"] = auc
            self.model_dict[model_index]["ce"] = ce
            
            sum_of_ce += ce
            sum_of_auc += auc
            
            
        #assign weight of model depending of ce
        for model_index, model_auc_ce in self.model_dict.items():
            if metric == "auc":
                self.model_dict[model_index]["weight"] = self.model_dict[model_index]["auc"] / sum_of_auc
            elif metric == "ce": 
                self.model_dict[model_index]["weight"] = self.model_dict[model_index]["ce"] / sum_of_ce
            else: 
                raise NotImplementedError(f'The metric for the weights is not allowed: {metric}.')


        prediction__weighted = None
        
        for model_index, model_auc_ce in self.model_dict.items():
            model = model_auc_ce["model"]
            prediction_ = model.predict_proba(test_xs)
            if prediction__weighted == None: 
                prediction__weighted = prediction_ * self.model_dict[model_index]["weight"]
            prediction__weighted += prediction_ * self.model_dict[model_index]["weight"]

        return prediction__weighted

In [27]:
model_path = "./modelstorage"
classifier_ensemble = AccuracyEnsemble(eval_positions=eval_position, model_storage_folder=model_path, device=device, verbose=False)
classifier_ensemble.fit(train_xs, train_ys)
prediction_ = classifier_ensemble.predict_proba(test_xs)
auc, ce = tabular_metrics.auc_metric(test_ys, prediction_), tabular_metrics.cross_entropy(test_ys, prediction_)
'AUC', float(auc), 'Cross Entropy', float(ce)

{'model': TabPFNClassifier(N_ensemble_configurations=1, base_path='./modelstorage',
                 batch_size_inference=1,
                 model_string='prior_diff_real_checkpoint_n_0_epoch_100.cpkt'), 'auc': -1, 'ce': -1, 'weight': -1}
{'model': TabPFNClassifier(N_ensemble_configurations=1, base_path='./modelstorage',
                 batch_size_inference=1,
                 model_string='prior_diff_real_checkpoint_n_0_epoch_42.cpkt'), 'auc': -1, 'ce': -1, 'weight': -1}
Found input variables with inconsistent numbers of samples: [263, 49]


ValueError: Expected input batch_size (49) to match target batch_size (263).