In [1]:
import numpy as np
import torch

from adaptive_dg.evaluation.model_loading import load_model, load_losses, compute_best_metrics
from adaptive_dg.evaluation.competence_scores import compute_model_scores, create_features, score_function_create

In [2]:
def compute_ood_coverage(model_class, domain, directory, data_set='BikeSharingSeason', threshold=0.95, seed=42,set_feature=True):
    model = load_model(model_class, domain, directory, data_set=data_set, seed=seed)
    scores_val, scores_id_test, scores_ood = compute_model_scores(model, set_feature=set_feature)

    threshold = np.quantile(scores_val, threshold)
    
    mask_id_test = scores_id_test < threshold
    cov_id = sum(mask_id_test) / len(mask_id_test)
    mask_ood = scores_ood < threshold
    cov_ood = sum(mask_ood) / len(mask_ood)
    
    return cov_id, cov_ood

domain = "H"
data_set = "ColoredMNIST"
seeds = [42, 43, 44, 45, 46]
invariant_model_metrics = [[], []]
standard_model_metrics = [[], []]
set_model_metrics = [[], []]

set_model_metrics_selection = [[], []]
standard_model_metrics_selection = [[], []]
set_model_metrics_selection_alt = [[], []]

threshold = 0.95
for seed in seeds:
    directory = "mnist_exps/invariants/"
    model_class = 'PredYfromX'
    
    metric_invariant = compute_best_metrics(directory, data_set, domain, model_class, seed=seed, version=0)
    invariant_model_metrics[0].append(metric_invariant[1])
    invariant_model_metrics[1].append(metric_invariant[2])


    directory = "mnist_exps/log_exps/"

    model_class = 'PredYfromX'
    cov_id_y, cov_ood_y= compute_ood_coverage(model_class, domain, directory, data_set=data_set, threshold=threshold, set_feature=False)
    metric_std = compute_best_metrics(directory, data_set, domain, model_class, seed=seed, version=0)
    standard_model_metrics[0].append(metric_std[1])
    standard_model_metrics[1].append(metric_std[2])
    standard_model_metrics_selection[0].append(cov_id_y*metric_std[1] + (1-cov_id_y)*metric_invariant[1])
    standard_model_metrics_selection[1].append(cov_ood_y*metric_std[2] + (1-cov_ood_y)*metric_invariant[2])

    model_class = 'PredYfromXSet'
    cov_id_y_set, cov_ood_y_set = compute_ood_coverage(model_class, domain, directory, data_set=data_set, threshold=threshold, set_feature=True)
    metric = compute_best_metrics(directory, data_set, domain, model_class, seed=seed, version=0)
    set_model_metrics[0].append(metric[1])
    set_model_metrics[1].append(metric[2])
    set_model_metrics_selection[0].append(cov_id_y_set*metric[1] + (1-cov_id_y_set)*metric_invariant[1])   
    set_model_metrics_selection[1].append(cov_ood_y_set*metric[2] + (1-cov_ood_y_set)*metric_invariant[2])
    
    set_model_metrics_selection_alt[0].append(cov_id_y_set*metric_std[1] + (1-cov_id_y_set)*metric_invariant[1])
    set_model_metrics_selection_alt[1].append(cov_ood_y_set*metric_std[2] + (1-cov_ood_y_set)*metric_invariant[2])
    
    
print('Invariant', [np.array(a).mean() for a in invariant_model_metrics], [np.array(a).std() for a in invariant_model_metrics])
print("Standar", [np.array(a).mean() for a in standard_model_metrics], [np.array(a).std() for a in standard_model_metrics])
#print("Set", [np.array(a).mean() for a in set_model_metrics], [np.array(a).std() for a in set_model_metrics])
print("Set selection", [np.array(a).mean() for a in set_model_metrics_selection], [np.array(a).std() for a in set_model_metrics_selection])
print("Standard selection",[np.array(a).mean() for a in standard_model_metrics_selection], [np.array(a).std() for a in standard_model_metrics_selection])
#print("set alt", [np.array(a).mean() for a in set_model_metrics_selection_alt], [np.array(a).std() for a in set_model_metrics_selection_alt])

Found existing ColoredMNIST dataset in data, skipping download.
Found existing ColoredMNIST dataset in data, skipping download.
Found existing ColoredMNIST dataset in data, skipping download.
Found existing ColoredMNIST dataset in data, skipping download.
Found existing ColoredMNIST dataset in data, skipping download.
Found existing ColoredMNIST dataset in data, skipping download.
Found existing ColoredMNIST dataset in data, skipping download.
Found existing ColoredMNIST dataset in data, skipping download.
Found existing ColoredMNIST dataset in data, skipping download.
Found existing ColoredMNIST dataset in data, skipping download.
Invariant [0.7278182744979859, 0.7312590956687928] [0.009374229215528518, 0.001784034562035086]
Standar [0.8457351088523865, 0.10191137492656707] [0.0033897241806037735, 0.0029509026179080876]
Set selection [0.8431700324148668, 0.7312590956687928] [0.00045089154821336194, 0.001784034562035086]
Standard selection [0.8401614700705515, 0.13726693977711746] [0.0

In [5]:
# Number might deviate slightly from paper results due to different run
print('Invariant', [np.array(a).mean() for a in invariant_model_metrics], [np.array(a).std() for a in invariant_model_metrics])
print("Standar", [np.array(a).mean() for a in standard_model_metrics], [np.array(a).std() for a in standard_model_metrics])
#print("Set", [np.array(a).mean() for a in set_model_metrics], [np.array(a).std() for a in set_model_metrics])
print("Set selection", [np.array(a).mean() for a in set_model_metrics_selection], [np.array(a).std() for a in set_model_metrics_selection])
print("Standard selection",[np.array(a).mean() for a in standard_model_metrics_selection], [np.array(a).std() for a in standard_model_metrics_selection])

Invariant [0.7278182744979859, 0.7312590956687928] [0.009374229215528518, 0.001784034562035086]
Standar [0.8457351088523865, 0.10191137492656707] [0.0033897241806037735, 0.0029509026179080876]
Set selection [0.8431700324148668, 0.7312590956687928] [0.00045089154821336194, 0.001784034562035086]
Standard selection [0.8401614700705515, 0.13726693977711746] [0.003059429037085313, 0.0037124123039713134]
