In [1]:
import time
import torch
import numpy as np
import os

from scripts.model_builder import get_default_spec, save_model, load_model_only_inference
from scripts.transformer_prediction_interface import transformer_predict, get_params_from_config, TabPFNClassifier
from scripts.differentiable_pfn_evaluation import eval_model, eval_model_range
from scripts.model_builder import get_model, get_default_spec, save_model, load_model

from datasets import load_openml_list, open_cc_dids, open_cc_valid_dids, test_dids_classification

from scripts import tabular_metrics
import random

In [14]:
base_path = '.'
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [15]:
# Load Datasets

In [16]:
max_samples = 10000
bptt = 10000

cc_test_datasets_multiclass, cc_test_datasets_multiclass_df = load_openml_list(open_cc_dids, multiclass=True, shuffled=True, filter_for_nan=False, max_samples = max_samples, num_feats=100, return_capped=True)
cc_valid_datasets_multiclass, cc_valid_datasets_multiclass_df = load_openml_list(open_cc_valid_dids, multiclass=True, shuffled=True, filter_for_nan=False, max_samples = max_samples, num_feats=100, return_capped=True)

# Loading longer OpenML Datasets for generalization experiments (optional)
# test_datasets_multiclass, test_datasets_multiclass_df = load_openml_list(test_dids_classification, multiclass=True, shuffled=True, filter_for_nan=False, max_samples = 10000, num_feats=100, return_capped=True)

random.seed(0)
random.shuffle(cc_valid_datasets_multiclass)

Number of datasets: 30
Loading balance-scale 11 ..
Loading mfeat-fourier 14 ..
Loading breast-w 15 ..
Loading mfeat-karhunen 16 ..
Loading mfeat-morphological 18 ..
Loading mfeat-zernike 22 ..
Loading cmc 23 ..
Loading credit-approval 29 ..
Loading credit-g 31 ..
Loading diabetes 37 ..
Loading tic-tac-toe 50 ..
Loading vehicle 54 ..
Loading eucalyptus 188 ..
Loading analcatdata_authorship 458 ..
Loading analcatdata_dmft 469 ..
Loading pc4 1049 ..
Loading pc3 1050 ..
Loading kc2 1063 ..
Loading pc1 1068 ..
Loading banknote-authentication 1462 ..
Loading blood-transfusion-service-center 1464 ..
Loading ilpd 1480 ..
Loading qsar-biodeg 1494 ..
Loading wdbc 1510 ..
Loading cylinder-bands 6332 ..
Loading dresses-sales 23381 ..
Loading MiceProtein 40966 ..
Loading car 40975 ..
Loading steel-plates-fault 40982 ..
Loading climate-model-simulation-crashes 40994 ..
Number of datasets: 150
Loading breast-cancer 13 ..
Loading colic 25 ..
Loading dermatology 35 ..
Loading sonar 40 ..
Loading glass 

In [17]:
def get_datasets(selector, task_type, suite='cc'):
    if task_type == 'binary':
        ds = valid_datasets_binary if selector == 'valid' else test_datasets_binary
    else:
        if suite == 'openml':
            ds = valid_datasets_multiclass if selector == 'valid' else test_datasets_multiclass
        elif suite == 'cc':
            ds = cc_valid_datasets_multiclass if selector == 'valid' else cc_test_datasets_multiclass
        else:
            raise Exception("Unknown suite")
    return ds

In [18]:
model_string, longer, task_type = '', 1, 'multiclass'
eval_positions = [1000]
bptt = 2000
    
test_datasets, valid_datasets = get_datasets('test', task_type, suite='cc'), get_datasets('valid', task_type, suite='cc')

In [19]:
# Pick single Dataset

In [20]:
evaluation_dataset_index = 0 # Index of the dataset to predict
ds = test_datasets[evaluation_dataset_index]
print(f'Evaluation dataset name: {ds[0]} shape {ds[1].shape}')

Evaluation dataset name: balance-scale shape torch.Size([625, 4])


In [21]:
# Split x and y in train and test

In [22]:
xs, ys = ds[1].clone(), ds[2].clone()
eval_position = xs.shape[0] // 2
train_xs, train_ys = xs[0:eval_position], ys[0:eval_position]
test_xs, test_ys = xs[eval_position:], ys[eval_position:]

In [40]:
class AccuracyEnsemble:
    def __init__(self, model_path, device, eval_positions, verbose):
        self.model_dict = {}  # Maps model_index to loaded model
        

        # Iterate over all files in the model_path directory
        for model_index, filename in enumerate(os.listdir(model_path)):
            if filename.endswith('.cpkt'):  # Assuming the models have a .cpkt extension
                loaded_model = load_model(path= model_path,filename=filename, device=device,
                                          eval_positions=eval_positions, verbose=verbose)
                self.model_dict[model_index] = {"model": loaded_model, "roc":-1, "ce":-1, "weight":-1}
                
    def fit(self, train_xs, train_ys):
        for model_index, model_roc_ce in self.model_dict.items():
            print(model_roc_ce)

            model = model_roc_ce["model"][2]
            model.fit(train_xs, train_ys)
            
    def get_train_roc_ce_of_model(self, model, split_share = 0.8):
        # use accuracy / AUC  for the model weighing? How to interpret each of them? 
        # we have to cache the full training data as .fit will overwrite this in the model
        full_train_x, full_train_y = model.X_, model.y_
        
        temp_split_pos = random.randint(int(full_train_x.shape[0] * split_share), full_train_x.shape[0]) # is 1 right dimension? sequence_length, num_feat
        
        temp_split_train_x = full_train_x[:temp_split_pos]
        temp_split_test_x = full_train_x[temp_split_pos:]
        
        temp_split_train_y = full_train_y[:temp_split_pos]
        temp_split_test_y = full_train_y[:temp_split_pos]
        
        model.fit(temp_split_train_x, temp_split_train_y)
        
        prediction_ = model.predict_proba(temp_split_test_x)
        roc, ce = tabular_metrics.auc_metric(temp_split_test_y, prediction_), tabular_metrics.cross_entropy(temp_split_test_y, prediction_)
        
        # have to reinsert the full x_train, y_train
        model.fit(full_train_x, full_train_y)
        return roc, ce
        
    def predict_proba(self, test_xs):
        
        sum_of_ce = 0
        for model_index, model_roc_ce in self.model_dict.items():
            model = model_roc_ce["model"]
            roc, ce = self.get_train_roc_ce_of_model(model, split_share=0.8) # split_share randomly choosen 
            
            self.model_dict[model_index]["roc"] = roc
            self.model_dict[model_index]["ce"] = ce
            
            sum_of_ce += ce
            
        #assign weight of model depending of ce
        for model_index, model_roc_ce in self.model_dict.items():
            self.model_dict[model_index]["weight"] = self.model_dict[model_index]["ce"] / sum_of_ce
        

        prediction__weighted = None
        
        for model_index, model_roc_ce in self.model_dict.items():
            model = model_roc_ce["model"]
            prediction_ = model.predict_proba(test_xs)
            if prediction__weighted == None: 
                prediction__weighted = prediction_ * self.model_dict[model_index]["weight"]
            prediction__weighted += prediction_ * self.model_dict[model_index]["weight"]

        return prediction__weighted

In [41]:
model_path = "./models_diff"
classifier_ensemble = AccuracyEnsemble(model_path=model_path, device=device, eval_positions=eval_position, verbose=False)
classifier_ensemble.fit(train_xs, train_ys)
prediction_ = classifier.predict_proba(test_xs)
roc, ce = tabular_metrics.auc_metric(test_ys, prediction_), tabular_metrics.cross_entropy(test_ys, prediction_)
'AUC', float(roc), 'Cross Entropy', float(ce)

Using style prior: True
Using cpu:0 device
---------------------------------------------
{'batch_size': 1, 'eval_pos_seq_len_sampler': <function train.<locals>.eval_pos_seq_len_sampler at 0x1618ef940>, 'seq_len_maximum': 10, 'device': 'cpu:0', 'num_features': 100, 'hyperparameters': {'lr': 0.0001, 'dropout': 0.0, 'emsize': 512, 'batch_size': 1, 'nlayers': 12, 'num_features': 100, 'nhead': 4, 'nhid_factor': 2, 'bptt': 10, 'eval_positions': [972], 'seq_len_used': 50, 'sampling': 'mixed', 'epochs': 400, 'num_steps': 8192, 'verbose': False, 'mix_activations': True, 'nan_prob_unknown_reason_reason_prior': 1.0, 'categorical_feature_p': 0.2, 'nan_prob_no_reason': 0.0, 'nan_prob_unknown_reason': 0.0, 'nan_prob_a_reason': 0.0, 'max_num_classes': 10, 'num_classes': 2, 'noise_type': 'Gaussian', 'balanced': False, 'normalize_to_ranking': False, 'set_value_to_nan': 0.1, 'normalize_by_used_features': True, 'num_features_used': <function load_model.<locals>.<lambda> at 0x1617f24c0>, 'num_categorical_

IndexError: tuple index out of range