In [1]:
import numpy as np
import pandas as pd
import os
import json
import argparse

class Args():
    def __init__(self, dataset_name, train_domain, baseline_path=None, pim_path=None, agg='mean'):
        self.dataset_name = dataset_name
        self.train_domain = train_domain
        self.baseline_path = baseline_path
        self.pim_path = pim_path
        self.agg = agg




In [2]:
def read_json(file_path):
    data_all=[]
    with open(file_path, 'r') as f:
        lines = f.readlines()
    for line in lines:
        data = json.loads(line)
        data_all.append(data)
    return data_all

In [14]:
def get_results_baselines(dataset_name, domain=None, baseline_path=None):
    baseline_scores=['msp','energy','pe']
    all_results_domain=[]# this is for individual domain name
    for baseline_score in baseline_scores:
        baseline_file = os.path.join(baseline_path, baseline_score+'_results.json')
        baseline_data = read_json(baseline_file)
        domain_results = [f for f in baseline_data if f['domain_name']==domain]
        domain_results = domain_results[0]
       
        results_to_append = [baseline_score, domain_results['true_test_acc'], domain_results['estimated_test_acc'],np.abs(domain_results['estimated_test_acc']-domain_results['true_test_acc']),
                            domain_results['test_failure_recall'],domain_results['test_success_recall'],domain_results['test_mathews_corr']]
        all_results_domain.append(results_to_append)

    #write all results_domain to a csv file
    df = pd.DataFrame(all_results_domain, columns=['method', 'true_test_acc', 'estimated_test_acc', 'gen gap', 'test_failure_recall', 'test_success_recall', 'test_mathews_corr'])
    #df.to_csv(f'{dataset_name}_{domain}_results.csv', index=False)
    return df


def get_results_pim(dataset_name, domain=None, pim_paths_dict=None):
    pim_scores =['cross_entropy']

    all_results_domain=[]# this is for individual domain name
    for pim_agg_method, pim_path in pim_paths_dict.items():
        for pim_score in pim_scores:
            pim_file = os.path.join(pim_path, f'{pim_score}_results.json')

            pim_data = read_json(pim_file)

            domain_results = [f for f in pim_data if f['domain_name']==domain]
            domain_results = domain_results[0]
            results_to_append = [pim_agg_method, domain_results['true_test_acc'], domain_results['estimated_test_acc'],np.abs( domain_results['estimated_test_acc']-domain_results['true_test_acc']),\
                                domain_results['test_failure_recall'],domain_results['test_success_recall'],domain_results['test_mathews_corr']]
            all_results_domain.append(results_to_append)
    #write all results_domain to a csv file
    df = pd.DataFrame(all_results_domain, columns=['method', 'true_test_acc', 'estimated_test_acc', 'gen gap', 'test_failure_recall', 'test_success_recall', 'test_mathews_corr'])
    return df
    #df.to_csv(f'{dataset_name}_{domain}_results.csv', index=False)












dataset_name='NICOpp'
train_domain='outdoor'
baseline_path=f'logs/{dataset_name}/failure_estimation/{train_domain}/resnet18/classifier/failure_results'
pim_paths_dict={'mean':f'logs/{dataset_name}/resnet18/{train_domain}/mapper/_agg_mean_bs_512_lr_0.0001_augmix_prob_0.2_cutmix_prob_0.2_scheduler_warmup_epoch_0_layer_model.layer1/failure_results',
                'max': f'logs/{dataset_name}/resnet18/{train_domain}/mapper/_agg_max_bs_512_lr_0.0001_augmix_prob_0.2_cutmix_prob_0.2_scheduler_warmup_epoch_0_layer_model.layer1/failure_results',
                }

for i, d in enumerate(["autumn", "dim", "grass", "outdoor", "rock" ,"water"]):
    df_baseline = get_results_baselines(dataset_name, domain=d, baseline_path=baseline_path)
    df_pim = get_results_pim(dataset_name, domain=d, pim_paths_dict=pim_paths_dict)
    df = pd.concat([df_baseline, df_pim], axis=0)
    if i == 0:
        df_all = df
    else:
        df_all = pd.concat([df_all, df], axis=0)

df_all.to_csv(f'{baseline_path}/{train_domain}_all_results.csv', index=False)


Unnamed: 0,method,true_test_acc,estimated_test_acc,gen gap,test_failure_recall,test_success_recall,test_mathews_corr
0,msp,0.6871,0.690109,0.003009,0.677885,0.85769,0.536994
1,energy,0.6871,0.681835,0.005265,0.65024,0.83306,0.481131
2,pe,0.6871,0.701768,0.014667,0.65024,0.862069,0.519243


In [None]:
def get_results(dataset_name, domain=None,baseline_path=None, pim_paths_dict=None):
    baseline_scores=['msp','energy','pe']
    pim_scores =['cross_entropy']

    all_results_domain=[]# this is for individual domain name
    for baseline_score in baseline_scores:
        
        baseline_file = os.path.join(baseline_path, baseline_score+'_results.json')
        
        baseline_data = read_json(baseline_file)
        domain_results = [f for f in baseline_data if f['domain_name']==domain]
        domain_results = domain_results[0]
       
        results_to_append = [baseline_score, domain_results['true_test_acc'], domain_results['estimated_test_acc'],np.abs(domain_results['estimated_test_acc']-domain_results['true_test_acc']),
                            domain_results['test_failure_recall'],domain_results['test_success_recall'],domain_results['test_mathews_corr']]
        all_results_domain.append(results_to_append)
    for pim_agg_method, pim_path in pim_paths_dict.items():
        for pim_score in pim_scores:
            pim_file = os.path.join(pim_path, f'{pim_score}_results.json')

            pim_data = read_json(pim_file)

            domain_results = [f for f in pim_data if f['domain_name']==domain]
            domain_results = domain_results[0]
            results_to_append = [pim_agg_method, domain_results['true_test_acc'], domain_results['estimated_test_acc'],np.abs( domain_results['estimated_test_acc']-domain_results['true_test_acc']),\
                                domain_results['test_failure_recall'],domain_results['test_success_recall'],domain_results['test_mathews_corr']]
            all_results_domain.append(results_to_append)
    #write all results_domain to a csv file
    df = pd.DataFrame(all_results_domain, columns=['method', 'true_test_acc', 'estimated_test_acc', 'gen gap', 'test_failure_recall', 'test_success_recall', 'test_mathews_corr'])
    df.to_csv(f'{dataset_name}_{domain}_results.csv', index=False)

In [None]:
get_results(args.dataset_name, args.domain, args.baseline_path, args.pim_paths_dict)