In [1]:
import sys
import json
from gbdt_uncertainty.training import generate_ensemble_regression
import os
import numpy as np
import joblib
from catboost import Pool, CatBoostRegressor

In [2]:
mode = "regression" #sys.argv[1]

def create_dir(name):
    directory = os.path.dirname(name)
    if not os.path.exists(name):
        os.makedirs(name)

In [7]:
def load_regression_dataset(name):
    if name == "YearPredictionMSD":
        # https://archive.ics.uci.edu/ml/datasets/YearPredictionMSD
        data = np.loadtxt("datasets/" + name + ".txt", delimiter=",")
        n_splits = 1
        index_features = [i for i in range(1, 91)]
        index_target = 0
    else:
        # repository with all UCI datasets
        url = "https://raw.githubusercontent.com/yaringal/DropoutUncertaintyExps/master/UCI_Datasets/" + name + "/data/"
        data = np.loadtxt(url + "data.txt")
        n_splits = int(np.loadtxt(url + "n_splits.txt"))
        index_features = [int(i) for i in np.loadtxt(url + "index_features.txt")]
        index_target = int(np.loadtxt(url + "index_target.txt"))

    X = data[:, index_features]  # features
    y = data[:, index_target]  # target

    # prepare data for all train/test splits
    index_train = []
    index_test = []
    for i in range(n_splits):
        if name == "YearPredictionMSD":
            # default split for this dataset, see https://archive.ics.uci.edu/ml/datasets/YearPredictionMSD
            index_train.append([i for i in range(463715)])
            index_test.append([i for i in range(463715, 515345)])
        else:
            index_train.append([int(i) for i in np.loadtxt(url + "index_train_" + str(i) + ".txt")])
            index_test.append([int(i) for i in np.loadtxt(url + "index_test_" + str(i) + ".txt")])

    return X, y, index_train, index_test, n_splits

In [8]:
def make_train_val_test(X, y, index_train, index_test, fold):
    # train_all consists of all train instances
    X_train_all = X[index_train[fold], :]
    y_train_all = y[index_train[fold]]

    X_test = X[index_test[fold], :]
    y_test = y[index_test[fold]]

    # for parameter tuning we use 20% of train dataset for validation
    num_training_examples = int(0.8 * X_train_all.shape[0])
    X_train = X_train_all[0:num_training_examples, :]
    y_train = y_train_all[0:num_training_examples]
    X_validation = X_train_all[num_training_examples:, :]
    y_validation = y_train_all[num_training_examples:]

    return X_train_all, y_train_all, X_train, y_train, X_validation, y_validation, X_test, y_test

In [28]:
def tune_parameters_regression(X, y, index_train, index_test, n_splits, alg='sgb'):

    params = []
    seed = 1000 # starting random seed for hyperparameter tuning
    
    for fold in range(n_splits):

        # make catboost pools
        X_train_all, y_train_all, X_train, y_train, X_validation, y_validation, X_test, y_test = make_train_val_test(X, y, index_train, index_test, fold)
        full_train_pool = Pool(X_train_all, y_train_all)
        train_pool = Pool(X_train, y_train)
        validation_pool = Pool(X_validation, y_validation)
        test_pool = Pool(X_test, y_test)
        
        # list of hyperparameters for grid search
        # we do not tune the number of trees, it is important for virtual ensembles
        depths = [3, 4, 5, 6] # tree depth
        lrs = [0.001, 0.01, 0.1] # learning rate 
        if alg == "sgb" or alg == "sglb": # by default, we tune sample rate
            samples = [0.25, 0.5, 0.75]
        if alg == "sgb-fixed": # sgb without sample rate tuning
            samples = [0.5]
        if alg == "sglb-fixed": # sglb without sample rate tuning
            samples = [1.0]
        shape = (len(depths), len(lrs), len(samples))

        # perform grid search
        results = np.zeros(shape)
        for d, depth in enumerate(depths):
            for l, lr in enumerate(lrs):
                for s, sample in enumerate(samples):
                    if alg == 'sgb' or alg == 'sgb-fixed':
                        model = CatBoostRegressor(loss_function='RMSEWithUncertainty',
                                                  learning_rate=lr, depth=depth, 
                                                  subsample=sample, bootstrap_type='Bernoulli', verbose=False, 
                                                  random_seed=seed)                      
                    if alg == 'sglb' or alg == 'sglb-fixed':
                        model = CatBoostRegressor(loss_function='RMSEWithUncertainty',
                                                  learning_rate=lr, depth=depth, 
                                                  subsample=sample, 
                                                  bootstrap_type='Bernoulli', 
                                                  verbose=False, random_seed=seed, posterior_sampling=True,
                                                 allow_writing_files=False)
                    
                    model.fit(train_pool, eval_set=validation_pool, use_best_model=False)
                    
                    # compute nll
                    results[d, l, s] = model.evals_result_['validation']['RMSEWithUncertainty'][-1]
                    
                    seed += 1 # update seed
        
        # get best parameters
        argmin = np.unravel_index(np.argmin(results), shape)
        depth = depths[argmin[0]]
        lr = lrs[argmin[1]]
        sample = samples[argmin[2]]
        
        current_params = {'depth': depth, 'lr': lr, 'sample': sample}
        params.append(current_params)
    
    return params

In [7]:
def tune_parameters_regression_parkinsons(dataset_name, alg='sgb', n_splits=1):

    # load and prepare data
    data_dir = os.path.join('datasets', dataset_name)
    train_file = os.path.join(data_dir, 'train')
    validation_file = os.path.join(data_dir, 'validation')
    test_file = os.path.join(data_dir, 'test')
    cd_file = os.path.join(data_dir, 'pool.cd')
    
    train_pool = Pool(data=train_file, column_description=cd_file)
    validation_pool = Pool(data=validation_file, column_description=cd_file)
    test_pool = Pool(data=test_file, column_description=cd_file)
    

    params = []
    seed = 1000 # starting random seed for hyperparameter tuning
    
    for fold in range(n_splits):
    
        # list of hyperparameters for grid search
        # we do not tune the number of trees, it is important for virtual ensembles
        depths = [3, 4, 5, 6] # tree depth
        lrs = [0.001, 0.01, 0.1] # learning rate 
        if alg == "sgb" or alg == "sglb": # by default, we tune sample rate
            samples = [0.25, 0.5, 0.75]
        if alg == "sgb-fixed": # sgb without sample rate tuning
            samples = [0.5]
        if alg == "sglb-fixed": # sglb without sample rate tuning
            samples = [1.0]
        shape = (len(depths), len(lrs), len(samples))

        # perform grid search
        results = np.zeros(shape)
        for d, depth in enumerate(depths):
            for l, lr in enumerate(lrs):
                for s, sample in enumerate(samples):
                    if alg == 'sgb' or alg == 'sgb-fixed':
                            model = CatBoostRegressor(loss_function='RMSEWithUncertainty',
                                                      learning_rate=lr, depth=depth, 
                                                      subsample=sample, bootstrap_type='Bernoulli', verbose=False, 
                                                      random_seed=seed)                      
                    if alg == 'sglb' or alg == 'sglb-fixed':
                        model = CatBoostRegressor(loss_function='RMSEWithUncertainty',
                                                  learning_rate=lr, depth=depth, 
                                                  subsample=sample, 
                                                  bootstrap_type='Bernoulli', 
                                                  verbose=False, random_seed=seed, posterior_sampling=True,
                                                 allow_writing_files=False)

                    model.fit(train_pool, eval_set=validation_pool, use_best_model=False)

                # compute nll
                results[d, l, s] = model.evals_result_['validation']['RMSEWithUncertainty'][-1]

                seed += 1 # update seed

        # get best parameters
        argmin = np.unravel_index(np.argmin(results), shape)
        depth = depths[argmin[0]]
        lr = lrs[argmin[1]]
        sample = samples[argmin[2]]

        current_params = {'depth': depth, 'lr': lr, 'sample': sample}
        params.append(current_params)
    
    return params

In [29]:
def generate_ensemble_regression(dataset_name, X, y, index_train, index_test, n_splits, params, alg="sgb", num_models=10):

    for fold in range(n_splits):

        # make catboost pools
        X_train_all, y_train_all, X_train, y_train, X_validation, y_validation, X_test, y_test = make_train_val_test(X, y, index_train, index_test, fold)
        full_train_pool = Pool(X_train_all, y_train_all)
        test_pool = Pool(X_test, y_test)
    
        # params contains optimal parameters for each fold
        depth = params[fold]['depth']
        lr = params[fold]['lr']
        sample = params[fold]['sample']

        seed = 10 * fold # fix different starting random seeds for all folds
        for i in range(num_models):
            if alg == 'sgb' or alg == 'sgb-fixed':
                model = CatBoostRegressor(loss_function='RMSEWithUncertainty', verbose=False, 
                                          learning_rate=lr, depth=depth, subsample=sample,
                                          bootstrap_type='Bernoulli', custom_metric='RMSE', 
                                          random_seed=seed)   
            if alg == 'sglb' or alg == 'sglb-fixed':
                model = CatBoostRegressor(loss_function='RMSEWithUncertainty', verbose=False, 
                                          learning_rate=lr, depth=depth, subsample=sample, 
                                          bootstrap_type='Bernoulli', posterior_sampling=True, 
                                          custom_metric='RMSE', random_seed=seed, 
                                         allow_writing_files=False) #, task_type="GPU", devices='0')
            seed += 1 # new seed for each ensemble element

            model.fit(full_train_pool, eval_set=test_pool, use_best_model=False) # do not use test pool for choosing best iteration
            model.save_model("results/models/" + dataset_name + "_" + alg + "_f" + str(fold) + "_" + str(i), format="cbm")

In [25]:
def generate_ensemble_regression_parkinsions(dataset_name, params, alg="sgb", num_models=10, n_splits=1):

    for fold in range(n_splits):
        
        # load and prepare data
        data_dir = os.path.join('datasets', dataset_name)
        full_train_file = os.path.join(data_dir, 'full_train')
        test_file = os.path.join(data_dir, 'test')
        cd_file = os.path.join(data_dir, 'pool.cd')

        full_train_pool = Pool(data=full_train_file, column_description=cd_file)
        test_pool = Pool(data=test_file, column_description=cd_file)

        # params contains optimal parameters for each fold
        depth = params[fold]['depth']
        lr = params[fold]['lr']
        sample = params[fold]['sample']

        seed = 10 * fold # fix different starting random seeds for all folds
        for i in range(num_models):
            if alg == 'sgb' or alg == 'sgb-fixed':
                    model = CatBoostRegressor(loss_function='RMSEWithUncertainty', verbose=False, 
                                              learning_rate=lr, depth=depth, subsample=sample,
                                              bootstrap_type='Bernoulli', custom_metric='RMSE', 
                                              random_seed=seed)   
            if alg == 'sglb' or alg == 'sglb-fixed':
                model = CatBoostRegressor(loss_function='RMSEWithUncertainty', verbose=False, 
                                          learning_rate=lr, depth=depth, subsample=sample, 
                                          bootstrap_type='Bernoulli', posterior_sampling=True, 
                                          custom_metric='RMSE', random_seed=seed, 
                                         allow_writing_files=False) #, task_type="GPU", devices='0')
            seed += 1 # new seed for each ensemble element        

            model.fit(full_train_pool, eval_set=test_pool, use_best_model=False) # do not use test pool for choosing best iteration
            model.save_model("results/models/" + dataset_name + "_" + alg + "_f" + str(fold) + "_" + str(i), format="cbm")

In [12]:
if mode == "regression":

    try:
        tuning = 0 #int(sys.argv[2])
    except:
        print("Tuning parameter is required: 1 if tuning is needed")
        exit(0)
    
    datasets = ["bostonHousing"]
#     datasets = ["bostonHousing", "concrete", "energy", "kin8nm", 
#                 "naval-propulsion-plant", "power-plant", "protein-tertiary-structure",
#                 "wine-quality-red", "yacht", "YearPredictionMSD"]    

    algorithms = ['sgb-fixed', 'sglb-fixed'] 
    # for -fixed we do not tune sample rate and use 0.5 for sbf and 1. for sglb
    
    for name in datasets:
        print("dataset =", name)
    
        if tuning == 1:
            create_dir("results/params")
        
            # Tune hyperparameters
            print("tuning hyperparameters...")
            
            X, y, index_train, index_test, n_splits = load_regression_dataset(name)
            for alg in algorithms:
                print(alg)
                params = tune_parameters_regression(X, y, index_train, 
                                                    index_test, n_splits, alg=alg)
                with open("results/params/" + name + "_" + alg + '.json', 'w') as fp:
                    json.dump(params, fp)
            
        # Training models
        print("training models...")
        create_dir("results/models")
        
        for alg in algorithms:
            print(alg)
            X, y, index_train, index_test, n_splits = load_regression_dataset(name)
            with open("results/params/" + name + "_" + alg + '.json', 'r') as fp:
                params = json.load(fp)
            generate_ensemble_regression(name, X, y, index_train, index_test, 
                                         n_splits, params, alg=alg)
        print()

dataset = bostonHousing
training models...
sgb-fixed
sglb-fixed



In [30]:
if mode == "regression":

    try:
        tuning = 1 #int(sys.argv[2])
    except:
        print("Tuning parameter is required: 1 if tuning is needed")
        exit(0)
    
    datasets = ["yacht"]
#     datasets = ["bostonHousing", "concrete", "energy", "kin8nm", 
#                 "naval-propulsion-plant", "power-plant", "protein-tertiary-structure",
#                 "wine-quality-red", "yacht", "YearPredictionMSD"]    

    algorithms = ['sgb-fixed', 'sglb-fixed'] 
    # for -fixed we do not tune sample rate and use 0.5 for sbf and 1. for sglb
    
    for name in datasets:
        print("dataset =", name)
    
        if tuning == 1:
            create_dir("results/params")
        
            # Tune hyperparameters
            print("tuning hyperparameters...")
            
            X, y, index_train, index_test, n_splits = load_regression_dataset(name)
            for alg in algorithms:
                print(alg)
                params = tune_parameters_regression(X, y, index_train, 
                                                    index_test, n_splits, alg=alg)
                with open("results/params/" + name + "_" + alg + '.json', 'w') as fp:
                    json.dump(params, fp)
            
        # Training models
        print("training models...")
        create_dir("results/models")
        
        for alg in algorithms:
            print(alg)
            X, y, index_train, index_test, n_splits = load_regression_dataset(name)
            with open("results/params/" + name + "_" + alg + '.json', 'r') as fp:
                params = json.load(fp)
            generate_ensemble_regression(name, X, y, index_train, index_test, 
                                         n_splits, params, alg=alg)
        print()

dataset = yacht
tuning hyperparameters...
sgb-fixed
sglb-fixed
training models...
sgb-fixed
sglb-fixed



In [26]:
if mode == "regression":

    try:
        tuning = 0 #int(sys.argv[2])
    except:
        print("Tuning parameter is required: 1 if tuning is needed")
        exit(0)
    
    datasets = ["parkinsons"]
#     datasets = ["bostonHousing", "concrete", "energy", "kin8nm", 
#                 "naval-propulsion-plant", "power-plant", "protein-tertiary-structure",
#                 "wine-quality-red", "yacht", "YearPredictionMSD"]    

    algorithms = ['sgb-fixed', 'sglb-fixed'] 
    # for -fixed we do not tune sample rate and use 0.5 for sbf and 1. for sglb
    
    for name in datasets:
        print("dataset =", name)
    
        if tuning == 1:
            create_dir("results/params")
        
            # Tune hyperparameters
            print("tuning hyperparameters...")
            
#             X, y, index_train, index_test, n_splits = load_regression_dataset(name)
            for alg in algorithms:
                print(alg)
                params = tune_parameters_regression_parkinsons(name, alg=alg)
                with open("results/params/" + name + "_" + alg + '.json', 'w') as fp:
                    json.dump(params, fp)
            
        # Training models
        print("training models...")
        create_dir("results/models")
        
        for alg in algorithms:
            print(alg)
#             X, y, index_train, index_test, n_splits = load_regression_dataset(name)
            with open("results/params/" + name + "_" + alg + '.json', 'r') as fp:
                params = json.load(fp)
            generate_ensemble_regression_parkinsions(name, params, alg=alg)
        print()

dataset = parkinsons
training models...
sgb-fixed
sglb-fixed



# Results

In [1]:
import numpy as np
from catboost import Pool, CatBoostRegressor
from scipy.stats import ttest_rel
from gbdt_uncertainty.assessment import prr_regression, nll_regression, ens_nll_regression, ood_detect
from gbdt_uncertainty.uncertainty import ensemble_uncertainties_regression
import math
import joblib
import sys
from collections import defaultdict

In [8]:
datasets = ["bostonHousing"]
# datasets = ["bostonHousing", "concrete", "energy", "kin8nm", "naval-propulsion-plant",
#             "power-plant", "protein-tertiary-structure", "wine-quality-red", "yacht", 
#             "YearPredictionMSD"]
algorithms = ['sgb-fixed', 'sglb-fixed'] 

# for proper tables
convert_name = {"bostonHousing": "BostonH", "yacht": "Yacht", 'parkinsons': 'Parkinsons'}
# convert_name = {"bostonHousing": "BostonH", "concrete": "Concrete", "energy": "Energy", 
#                 "kin8nm": "Kin8nm", "naval-propulsion-plant": "Naval-p", "power-plant": "Power-p",
#                 "protein-tertiary-structure": "Protein", "wine-quality-red": "Wine-qu", 
#                 "yacht": "Yacht", "YearPredictionMSD": "Year"}

In [3]:
def calc_rmse(preds, target, raw=False):
    if raw:
        return (preds - target)**2 # for individual predictions
    return np.sqrt(np.mean((preds - target)**2))

def ens_rmse(target, preds, epsilon=1e-8, raw=False):
    means = preds[:, :, 0] 
    avg_mean = np.mean(means, axis=0) 
    if raw: # for individual predictions
        return calc_rmse(avg_mean, target, raw=True)
    return calc_rmse(avg_mean, target)

In [6]:
def load_and_predict(X, name, alg, fold, i):
    if alg == "rf":
        model = joblib.load("results/models/" + name + "_" + alg + "_f" + str(fold) + "_" + str(i))
        preds = model.predict(X)
        preds = np.array([(p, 1) for p in preds]) # 1 for unknown variance
    else:
        model = CatBoostRegressor()
        model.load_model("results/models/" + name + "_" + alg + "_f" + str(fold) + "_" + str(i)) 
        preds = model.predict(X)
    return preds, model
    
def predict(X, model, alg):
    preds = model.predict(X)
    if alg == "rf":
        preds = np.array([(p, 1) for p in preds])
    return preds
    
def rf_virtual_ensembles_predict(model, X, count=10):
    trees = model.estimators_
    num_trees = len(trees)
    ens_preds = []
    for i in range(count):
        indices = range(int(i*num_trees/count), int((i+1)*num_trees/count))
        all_preds = []
        for ind in indices:
            all_preds.append(trees[ind].predict(X))
        all_preds = np.array(all_preds)
        preds = np.mean(all_preds, axis=0)
        preds = np.array([(p, 1) for p in preds]) # 1 for unknown variance
        ens_preds.append(preds)
    ens_preds = np.array(ens_preds)

    return np.swapaxes(ens_preds, 0, 1)
    
def virtual_ensembles_load_and_predict(X, name, alg, fold, i, num_models=10):
    if alg == "rf":
        model = joblib.load("results/models/" + name + "_" + alg + "_f" + str(fold) + "_" + str(i))
        all_preds = rf_virtual_ensembles_predict(model, X)
    else:
        model = CatBoostRegressor()
        model.load_model("results/models/" + name + "_" + alg + "_f" + str(fold) + "_" + str(i)) 
        all_preds = model.virtual_ensembles_predict(X, prediction_type='VirtEnsembles', virtual_ensembles_count=num_models)
    return np.swapaxes(all_preds, 0, 1), model
  
def virtual_ensembles_predict(X, model, alg, num_models=10):
    if alg == "rf":
        all_preds = rf_virtual_ensembles_predict(model, X)
    else:
        all_preds = model.virtual_ensembles_predict(X, prediction_type='VirtEnsembles', virtual_ensembles_count=num_models)
    return np.swapaxes(all_preds, 0, 1)
    
def compute_significance(values_all, metric, minimize=True, raw=False):

    if raw:
        values_all = values_all[:, 0, :]

    values_mean = np.mean(values_all, axis=1) # mean wrt folds or elements
    
    if raw and metric == "rmse":
        values_mean = np.sqrt(values_mean)
    
    # choose best algorithm
    if minimize:
        best_idx = np.nanargmin(values_mean)
    else:
        best_idx = np.nanargmax(values_mean)
        
    textbf = {best_idx} # for all algorithms insignificantly different from the best one
    # compute statistical significance on test or wrt folds

    for idx in range(len(values_mean)):
        test = ttest_rel(values_all[best_idx], values_all[idx]) # paired t-test
        if test[1] > 0.05:
            textbf.add(idx)
            
    return values_mean, textbf

def compute_best(values, minimize=True):

    # choose best algorithm
    if minimize:
        best_idx = np.nanargmin(values)
    else:
        best_idx = np.nanargmax(values)
        
    textbf = {best_idx} 
    for idx in range(len(values)):
        if values[best_idx] == values[idx]: 
            textbf.add(idx)
            
    return textbf
    
def make_table_entry(values_all, metric, minimize=True, round=2, raw=True):
    
    num_values = len(values_all)
    
    values_mean, textbf = compute_significance(values_all, metric, minimize=minimize, raw=raw)

    # prepare all results in latex format

    table = ""

    for idx in range(num_values):
        if idx in textbf:
            table += "\\textbf{" + str(np.round(values_mean[idx], round)) + "} "
        else:    
            table += str(np.round(values_mean[idx], round)) + " "
        table += "& " 
            
    return table
            
def aggregate_results(name, modes = ["single", "ens", "virt"], 
                      algorithms = ['sgb-fixed', 'sglb-fixed'], num_models = 10, 
                      raw=False):
    
    n_splits=1
    if name != 'parkinsons':
        X, y, index_train, index_test, n_splits = load_regression_dataset(name)
    
    results = [] # metric values for all algorithms and all folds
    
    # for ood evaluation
    ood_X_test = np.loadtxt("datasets/ood/" + name)
    if name == "naval-propulsion-plant":
        ood_X_test = ood_X_test[:, :-1]
    ood_size = len(ood_X_test)
        
    for mode in modes:
        for alg in algorithms:
        
            values = defaultdict(lambda: []) # metric values for all folds for given algorithm

            for fold in range(n_splits):
                if name != 'parkinsons':
                    X_train_all, y_train_all, X_train, y_train, X_validation, y_validation, X_test, y_test = make_train_val_test(
                                                                                        X, y, index_train, index_test, fold)
                else:
                    ood_test_pool = Pool(data="datasets/ood/" + name, column_description="datasets/"+name+"/pool.cd")
                    X_test, y_test = ood_test_pool.get_features(), ood_test_pool.get_label()
                    y_test = np.array(y_test).astype(np.float64)
                test_size = len(X_test)
                domain_labels = np.concatenate([np.zeros(test_size), np.ones(ood_size)])

                if mode == "single":
                    # use 0th model from ensemble as a single model
                    preds, model = load_and_predict(X_test, name, alg, fold, 0)

                    values["rmse"].append(calc_rmse(preds[:, 0], y_test, raw=raw))
                    values["nll"].append(nll_regression(y_test, preds[:, 0], preds[:, 1], raw=raw))
                    values["TU_prr"].append(prr_regression(y_test, preds[:, 0], preds[:, 1]))
                    values["KU_prr"].append(float("nan"))
                    values["KU_auc"].append(float("nan"))
                    
                    ood_preds = predict(ood_X_test, model, alg)
                    in_measure = preds[:, 1]
                    out_measure = ood_preds[:, 1]
                    values["TU_auc"].append(ood_detect(domain_labels, in_measure, out_measure, mode="ROC"))

                if mode == "ens":
                    all_preds = [] # predictions of all models in ensemble
                    all_preds_ood = []
                    
                    for i in range(num_models):
                        preds, model = load_and_predict(X_test, name, alg, fold, i)
                        all_preds.append(preds)
                        preds = predict(ood_X_test, model, alg)
                        all_preds_ood.append(preds)   
                    all_preds = np.array(all_preds)
                    
                    values["rmse"].append(ens_rmse(y_test, all_preds, raw=raw))
                    values["nll"].append(ens_nll_regression(y_test, all_preds, raw=raw)) 
                    
                    TU = ensemble_uncertainties_regression(np.swapaxes(all_preds, 0, 1))["tvar"]
                    KU = ensemble_uncertainties_regression(np.swapaxes(all_preds, 0, 1))["varm"]

                    mean_preds = np.mean(all_preds[:, :, 0], axis=0)

                    values["TU_prr"].append(prr_regression(y_test, mean_preds, TU))
                    values["KU_prr"].append(prr_regression(y_test, mean_preds, KU))
                    
                    all_preds_ood = np.array(all_preds_ood)
                    TU_ood = ensemble_uncertainties_regression(np.swapaxes(all_preds_ood, 0, 1))["tvar"]
                    KU_ood = ensemble_uncertainties_regression(np.swapaxes(all_preds_ood, 0, 1))["varm"]
                    values["TU_auc"].append(ood_detect(domain_labels, TU, TU_ood, mode="ROC"))
                    values["KU_auc"].append(ood_detect(domain_labels, KU, KU_ood, mode="ROC"))
                        
                if mode == "virt":
                    if alg in ["sgb", "sgb-fixed"]: # we do not evaluate virtual sgb model
                        continue
                    # generate virtual ensemble from 0th model
                    all_preds, model = virtual_ensembles_load_and_predict(X_test, name, alg, fold, 0)

                    values["rmse"].append(ens_rmse(y_test, all_preds, raw=raw))
                    values["nll"].append(ens_nll_regression(y_test, all_preds, raw=raw)) 
                    
                    TU = ensemble_uncertainties_regression(np.swapaxes(all_preds, 0, 1))["tvar"]
                    KU = ensemble_uncertainties_regression(np.swapaxes(all_preds, 0, 1))["varm"]
                    
                    mean_preds = np.mean(all_preds[:, :, 0], axis=0)

                    values["TU_prr"].append(prr_regression(y_test, mean_preds, TU))
                    values["KU_prr"].append(prr_regression(y_test, mean_preds, KU))
                    
                    all_preds_ood = virtual_ensembles_predict(ood_X_test, model, alg)
                    all_preds_ood = np.array(all_preds_ood)
                    
                    TU_ood = ensemble_uncertainties_regression(np.swapaxes(all_preds_ood, 0, 1))["tvar"]
                    KU_ood = ensemble_uncertainties_regression(np.swapaxes(all_preds_ood, 0, 1))["varm"]
                    
                    values["TU_auc"].append(ood_detect(domain_labels, TU, TU_ood, mode="ROC"))
                    values["KU_auc"].append(ood_detect(domain_labels, KU, KU_ood, mode="ROC"))

            if mode == "virt" and alg in ["sgb", "sgb-fixed"]: # we do not evaluate virtual sgb model
                continue
            
            results.append(values)

    return np.array(results)
    
def make_table_element(mean, textbf, idx):
    table = ""
    if np.isnan(mean[idx]):
        table += "--- & "
        return table
    if idx in textbf:
        table += "\\textbf{" + str(int(np.rint(mean[idx]))) + "} "
    else:    
        table += str(int(np.rint(mean[idx]))) + " "
    table += "& "
    return table

In [16]:
table_type = "prr_auc" #sys.argv[1]

if table_type == "prr_auc":
    print("===PRR and AUC-ROC Table===")
    
    datasets = ["bostonHousing"]
        
    for name in datasets:

        values = aggregate_results(name, raw=False)
        
        prr_TU = np.array([values[i]["TU_prr"] for i in range(len(values))])
        prr_KU = np.array([values[i]["KU_prr"] for i in range(len(values))])
        prr = 100*np.concatenate((prr_TU, prr_KU), axis=0)

        mean_prr, textbf_prr = compute_significance(prr, "prr", minimize=False)
    
        auc_TU = np.array([values[i]["TU_auc"] for i in range(len(values))])
        auc_KU = np.array([values[i]["KU_auc"] for i in range(len(values))])
        auc = 100*np.concatenate((auc_TU, auc_KU), axis=0)
        mean_auc, textbf_auc = compute_significance(auc, "auc", minimize=False)

        num = len(auc_TU)
    
        table = "\multirow{2}{*} {" + convert_name[name] + "} & TU &"
        for idx in range(num):
            table += make_table_element(mean_prr, textbf_prr, idx)

        for idx in range(num):
            table += make_table_element(mean_auc, textbf_auc, idx)
            
        print(table.rstrip("& ") + " \\\\")
        
        table = " & KU & "
        for idx in range(num, 2*num):
            table += make_table_element(mean_prr, textbf_prr, idx)
            
        for idx in range(num, 2*num):
            table += make_table_element(mean_auc, textbf_auc, idx)
        print(table.rstrip("& ") + " \\\\")
        
        print("\midrule")

===PRR and AUC-ROC Table===
\multirow{2}{*} {BostonH} & TU &\textbf{47} & \textbf{42} & \textbf{47} & \textbf{44} & \textbf{44} & 96 & 95 & 96 & 95 & 96 \\
 & KU & --- & --- & \textbf{39} & \textbf{41} & \textbf{38} & --- & --- & \textbf{99} & \textbf{98} & \textbf{94} \\
\midrule


In [17]:
table_type = "prr_auc" #sys.argv[1]

if table_type == "prr_auc":
    print("===PRR and AUC-ROC Table===")
    
    datasets = ["yacht"]
        
    for name in datasets:

        values = aggregate_results(name, raw=False)
        
        prr_TU = np.array([values[i]["TU_prr"] for i in range(len(values))])
        prr_KU = np.array([values[i]["KU_prr"] for i in range(len(values))])
        prr = 100*np.concatenate((prr_TU, prr_KU), axis=0)

        mean_prr, textbf_prr = compute_significance(prr, "prr", minimize=False)
    
        auc_TU = np.array([values[i]["TU_auc"] for i in range(len(values))])
        auc_KU = np.array([values[i]["KU_auc"] for i in range(len(values))])
        auc = 100*np.concatenate((auc_TU, auc_KU), axis=0)
        mean_auc, textbf_auc = compute_significance(auc, "auc", minimize=False)

        num = len(auc_TU)
    
        table = "\multirow{2}{*} {" + convert_name[name] + "} & TU &"
        for idx in range(num):
            table += make_table_element(mean_prr, textbf_prr, idx)

        for idx in range(num):
            table += make_table_element(mean_auc, textbf_auc, idx)
            
        print(table.rstrip("& ") + " \\\\")
        
        table = " & KU & "
        for idx in range(num, 2*num):
            table += make_table_element(mean_prr, textbf_prr, idx)
            
        for idx in range(num, 2*num):
            table += make_table_element(mean_auc, textbf_auc, idx)
        print(table.rstrip("& ") + " \\\\")
        
        print("\midrule")

===PRR and AUC-ROC Table===
\multirow{2}{*} {Yacht} & TU &\textbf{88} & \textbf{87} & \textbf{88} & \textbf{88} & \textbf{87} & 77 & 70 & 79 & 74 & 81 \\
 & KU & --- & --- & 74 & 81 & 68 & --- & --- & \textbf{87} & \textbf{83} & \textbf{84} \\
\midrule


In [9]:
table_type = "prr_auc" #sys.argv[1]

if table_type == "prr_auc":
    print("===PRR and AUC-ROC Table===")
    
    datasets = ["parkinsons"]
        
    for name in datasets:

        values = aggregate_results(name, raw=False)
        
        prr_TU = np.array([values[i]["TU_prr"] for i in range(len(values))])
        prr_KU = np.array([values[i]["KU_prr"] for i in range(len(values))])
        prr = 100*np.concatenate((prr_TU, prr_KU), axis=0)

        mean_prr, textbf_prr = compute_significance(prr, "prr", minimize=False)
    
        auc_TU = np.array([values[i]["TU_auc"] for i in range(len(values))])
        auc_KU = np.array([values[i]["KU_auc"] for i in range(len(values))])
        auc = 100*np.concatenate((auc_TU, auc_KU), axis=0)
        mean_auc, textbf_auc = compute_significance(auc, "auc", minimize=False)

        num = len(auc_TU)
    
        table = "\multirow{2}{*} {" + convert_name[name] + "} & TU &"
        for idx in range(num):
            table += make_table_element(mean_prr, textbf_prr, idx)

        for idx in range(num):
            table += make_table_element(mean_auc, textbf_auc, idx)
            
        print(table.rstrip("& ") + " \\\\")
        
        table = " & KU & "
        for idx in range(num, 2*num):
            table += make_table_element(mean_prr, textbf_prr, idx)
            
        for idx in range(num, 2*num):
            table += make_table_element(mean_auc, textbf_auc, idx)
        print(table.rstrip("& ") + " \\\\")
        
        print("\midrule")

===PRR and AUC-ROC Table===
\multirow{2}{*} {Parkinsons} & TU &-15 & -50 & 6 & -15 & -36 & \textbf{100} & 97 & 88 & 64 & 93 \\
 & KU & --- & --- & \textbf{11} & -13 & -46 & --- & --- & 85 & 59 & 80 \\
\midrule


  test = ttest_rel(values_all[best_idx], values_all[idx]) # paired t-test
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
