In [1]:
import sys
sys.path.insert(0, "../")

import pandas as pd
import numpy as np
from pathlib import Path
from typing import Dict, List, Tuple, Optional

from mdu.eval.table_analysis_utils import (
    transform_by_tasks,
    select_composite_and_components,
    check_composite_dominance,
    compute_average_ranks,
    analyze_composite_pareto_performance,
)
from configs.interesting_compositions import INTERESTING_COMPOSITIONS

# Set pandas display options to show all columns
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/nikita/Programming/multidimensional_uncertainty/.venv/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/nikita/Programming/multidimensional_uncertainty/.venv/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/nikita/Programming/multidimensional_uncertainty/.venv/lib

In [2]:
def load_config_file(eps: float, grid_size: int, n_targets_multiplier: int, 
                     target: str, scaler_type: str = "global_scaler") -> pd.DataFrame:
    """
    Load a specific config file based on hyperparameters. If multiple files with different
    prefixes exist for the same config, merge them together.
    
    Args:
        eps: Epsilon value
        grid_size: Grid size
        n_targets_multiplier: N targets multiplier  
        target: Target type (exp or beta)
        scaler_type: Scaler type (global_scaler, mahalanobis, or none)
    
    Returns:
        DataFrame with the loaded config data (merged if multiple files exist)
    """
    # Build filename suffix (common part)
    scaler_suffix = f"_{scaler_type}" if scaler_type != "none" else ""
    filename_suffix = (f"_entropic_target_{target}_eps_{eps}_iters_150_"
                      f"tol_1e-06_rs_42_grid_size_{grid_size}_n_targets_multiplier_{n_targets_multiplier}{scaler_suffix}.csv")
    
    # List of possible filename prefixes to try
    possible_prefixes = [
        "extended_benchmark",
        "extra_extended_benchmark",
    ]
    
    # Find all existing files with this config
    found_files = []
    base_dir = Path("../resources/extended_benchmark")
    
    for prefix in possible_prefixes:
        candidate_filename = f"{prefix}{filename_suffix}"
        candidate_filepath = base_dir / candidate_filename
        
        if candidate_filepath.exists():
            found_files.append((candidate_filename, candidate_filepath))
    
    if not found_files:
        # Try to construct a more informative error message
        attempted_files = [f"{prefix}{filename_suffix}" for prefix in possible_prefixes]
        raise FileNotFoundError(
            f"No config files found for the specified parameters.\n"
            f"Attempted files: {attempted_files}\n"
            f"Parameters: eps={eps}, grid_size={grid_size}, n_targets_multiplier={n_targets_multiplier}, "
            f"target={target}, scaler_type={scaler_type}"
        )
    
    # Load and merge all found files
    dataframes = []
    print(f"Found {len(found_files)} files with matching config:")
    
    for filename, filepath in found_files:
        print(f"  Loading: {filename}")
        df = pd.read_csv(filepath)
        print(f"    Rows: {len(df)}, Columns: {len(df.columns)}")
        dataframes.append(df)
    
    # Merge all dataframes
    if len(dataframes) == 1:
        merged_df = dataframes[0]
        print(f"Single file loaded: {len(merged_df)} rows, {len(merged_df.columns)} columns")
    else:
        # Concatenate all dataframes
        merged_df = pd.concat(dataframes, ignore_index=True)
        print(f"Merged {len(dataframes)} files: {len(merged_df)} rows, {len(merged_df.columns)} columns")
        
        # Check for potential issues in merging
        original_total_rows = sum(len(df) for df in dataframes)
        if len(merged_df) != original_total_rows:
            print(f"  Note: Expected {original_total_rows} rows, got {len(merged_df)} after merging")
        
        # Show unique values in key columns to verify merge quality
        if 'measure' in merged_df.columns:
            unique_measures = merged_df['measure'].nunique()
            print(f"  Unique measures in merged data: {unique_measures}")
        
        if 'ind_dataset' in merged_df.columns:
            unique_datasets = merged_df['ind_dataset'].nunique()
            print(f"  Unique ind_datasets in merged data: {unique_datasets}")
    
    return merged_df


In [3]:
for el in INTERESTING_COMPOSITIONS.keys():
    print(el)

COMPOSITE BAYES ALL OUTER
COMPOSITE BAYES ALL INNER
COMPOSITE BAYES ALL CENTRAL
COMPOSITE BAYES (LBS) OUTER
COMPOSITE BAYES (LBS) INNER
COMPOSITE BAYES (LBS) CENTRAL
COMPOSITE BAYES ALL OUTER + M
COMPOSITE BAYES ALL INNER + M
COMPOSITE BAYES ALL CENTRAL + M
COMPOSITE BAYES (LBS) OUTER + M
COMPOSITE BAYES (LBS) INNER + M
COMPOSITE BAYES (LBS) CENTRAL + M
COMPOSITE BAYES ALL OUTER + GMM
COMPOSITE BAYES ALL INNER + GMM
COMPOSITE BAYES ALL CENTRAL + GMM
COMPOSITE BAYES (LBS) OUTER + GMM
COMPOSITE BAYES (LBS) INNER + GMM
COMPOSITE BAYES (LBS) CENTRAL + GMM
COMPOSITE EXCESS ALL OUTER OUTER
COMPOSITE EXCESS ALL OUTER INNER
COMPOSITE EXCESS ALL OUTER CENTRAL
COMPOSITE EXCESS ALL INNER OUTER
COMPOSITE EXCESS ALL INNER INNER
COMPOSITE EXCESS ALL INNER CENTRAL
COMPOSITE EXCESS ALL CENTRAL OUTER
COMPOSITE EXCESS ALL CENTRAL INNER
COMPOSITE EXCESS ALL CENTRAL CENTRAL
COMPOSITE EXCESS LBS OUTER OUTER
COMPOSITE EXCESS LBS OUTER INNER
COMPOSITE EXCESS LBS OUTER CENTRAL
COMPOSITE EXCESS LBS INNER OUTER

In [4]:
def shorten_column_names(column_name: str) -> str:
    """
    Shorten column names for better presentation.
    
    Args:
        column_name: Original column name
        
    Returns:
        Shortened column name
    """
    # Handle composite columns
    if column_name.lower().startswith('composite'):
        return 'C'
    
    # Replace score names with shortcuts
    shortened = column_name
    shortened = shortened.replace('Logscore', 'L')
    shortened = shortened.replace('Brier', 'B')
    shortened = shortened.replace('Spherical', 'S')
    shortened = shortened.replace('Zero-one', 'Z')
    shortened = shortened.replace('mahalanobis', 'M')
    
    return shortened


def create_problem_specific_tables(transformed_df: pd.DataFrame, composite_name: str) -> Dict[str, pd.DataFrame]:
    """
    Create separate tables for each problem type: OOD detection, misclassification detection, and selective prediction.
    
    Args:
        transformed_df: Transformed DataFrame from transform_by_tasks
        composite_name: Name of the composite measure to analyze
    
    Returns:
        Dictionary with tables for each problem type
    """
    # Get composite and components data
    composite_df = select_composite_and_components(transformed_df, composite_name)
    
    # Reset index to access ind_dataset and eval columns
    df_reset = composite_df.reset_index()
    
    # Initialize result dictionary
    tables = {}
    
    # Define problem type patterns
    problem_patterns = {
        'ood_detection': '[ood]',
        'misclassification_detection': '[miscls]', 
        'selective_prediction': '[selective]'
    }
    
    for problem_type, pattern in problem_patterns.items():
        # Filter rows for this problem type
        mask = df_reset['eval'].str.contains(pattern, regex=False, na=False)
        problem_df = df_reset[mask].copy()
        
        print(f"Debug - {problem_type}: Found {len(problem_df)} rows with pattern '{pattern}'")
        if len(problem_df) > 0:
            print(f"  Sample eval values: {problem_df['eval'].unique()[:5]}")
        
        if problem_df.empty:
            print(f"Warning: No data found for {problem_type}")
            tables[problem_type] = pd.DataFrame()
            continue
        
        # Set index back to (ind_dataset, eval)
        problem_df = problem_df.set_index(['ind_dataset', 'eval'])
        
        # Identify composite and component columns
        composite_cols = [c for c in problem_df.columns if c.startswith('composite')]
        component_cols = [c for c in problem_df.columns if not c.startswith('composite')]
        
        # Reorder columns: components first, then composite
        ordered_cols = component_cols + composite_cols
        problem_df = problem_df[ordered_cols]
        
        # Apply shortened column names
        shortened_cols = {col: shorten_column_names(col) for col in problem_df.columns}
        problem_df = problem_df.rename(columns=shortened_cols)
        
        # Remove any duplicate rows based on the index
        problem_df = problem_df[~problem_df.index.duplicated(keep='first')]
        
        tables[problem_type] = problem_df
        
        print(f"{problem_type.replace('_', ' ').title()}: {len(problem_df)} rows, {len(problem_df.columns)} measures")
    
    return tables



def create_final_problem_tables(config_results_list: List[Dict]) -> Dict[str, pd.DataFrame]:
    """
    Create final tables for each problem type with rows as (ind_dataset, ood_dataset) 
    and columns stacked from all configurations.
    
    Args:
        config_results_list: List of results from analyze_specific_config
    
    Returns:
        Dictionary with final tables for each problem type
    """
    final_tables = {
        'ood_detection': [],
        'misclassification_detection': [],
        'selective_prediction': []
    }
    
    # Process each configuration result
    for result in config_results_list:
        config = result['config']
        tables = result['tables']
        
        for problem_type in final_tables.keys():
            if problem_type in tables and not tables[problem_type].empty:
                table = tables[problem_type].copy()
                
                # Reset index to get ind_dataset and eval as columns
                table_reset = table.reset_index()
                
                # Keep all metric columns as they are (with shortened names)
                metric_cols = [c for c in table.columns]
                
                if problem_type == 'ood_detection':
                    # For OOD detection: ind_dataset and ood_dataset are different
                    table_reset['ind_dataset'] = table_reset['ind_dataset']
                    table_reset['ood_dataset'] = table_reset['eval'].str.extract(r'^([^[]+)')[0].str.strip()
                    final_cols = ['ind_dataset', 'ood_dataset'] + metric_cols
                else:
                    # For selective prediction and misclassification detection: 
                    # ind_dataset and eval_dataset are the same (just use ind_dataset)
                    table_reset['ind_dataset'] = table_reset['ind_dataset']
                    table_reset['eval_dataset'] = table_reset['ind_dataset']  # Same as ind_dataset
                    final_cols = ['ind_dataset', 'eval_dataset'] + metric_cols
                
                table_final = table_reset[final_cols].copy()
                
                # Remove duplicates - keep only unique rows
                if problem_type == 'ood_detection':
                    table_final = table_final.drop_duplicates(subset=['ind_dataset', 'ood_dataset'])
                else:
                    table_final = table_final.drop_duplicates(subset=['ind_dataset', 'eval_dataset'])
                
                print(f"Debug - Adding table for {problem_type}: shape {table_final.shape} (after deduplication)")
                final_tables[problem_type].append(table_final)
    
    # Combine all configurations for each problem type by concatenating columns
    combined_tables = {}
    for problem_type, table_list in final_tables.items():
        if table_list:
            # Determine the key columns based on problem type
            if problem_type == 'ood_detection':
                key_cols = ['ind_dataset', 'ood_dataset']
            else:
                key_cols = ['ind_dataset', 'eval_dataset']
            
            # Start with the first table
            combined = table_list[0]
            
            # Add columns from subsequent tables
            for i, table in enumerate(table_list[1:], 1):
                # Get metric columns (exclude key columns)
                metric_cols = [c for c in table.columns if c not in key_cols]
                
                # Rename metric columns to avoid conflicts by adding config index
                table_to_merge = table[key_cols + metric_cols].copy()
                rename_dict = {col: f"{col}_v{i+1}" for col in metric_cols}
                table_to_merge = table_to_merge.rename(columns=rename_dict)
                
                # Also rename columns in the first table if this is the first merge
                if i == 1:
                    first_metric_cols = [c for c in combined.columns if c not in key_cols]
                    first_rename_dict = {col: f"{col}_v1" for col in first_metric_cols}
                    combined = combined.rename(columns=first_rename_dict)
                
                # Merge on key columns, adding new metric columns
                combined = pd.merge(combined, table_to_merge, 
                                  on=key_cols, how='outer')
            
            # Set index
            combined = combined.set_index(key_cols)
            combined_tables[problem_type] = combined
            print(f"Debug - Final {problem_type} table shape: {combined.shape}")
        else:
            combined_tables[problem_type] = pd.DataFrame()
            print(f"Debug - No data for {problem_type}")
    
    return combined_tables


def analyze_multiple_configs(config_list: List[Dict], selective_metric: str = "acc_cov_auc") -> Dict:
    """
    Analyze multiple configurations and return results as DataFrames.
    
    Args:
        config_list: List of dictionaries with config parameters and composite_name
        selective_metric: Metric for selective prediction
    
    Returns:
        Dictionary with results and final tables
    """
    results = []
    
    print(f"Analyzing {len(config_list)} configurations...")
    print("=" * 80)
    
    for i, config in enumerate(config_list):
        print(f"\nConfiguration {i+1}/{len(config_list)}:")
        print(f"  eps={config['eps']}, grid_size={config['grid_size']}, n_targets_multiplier={config['n_targets_multiplier']}")
        print(f"  target={config['target']}, scaler_type={config['scaler_type']}")
        print(f"  composite_name={config['composite_name']}")
        
        try:
            # Load and transform data
            df = load_config_file(
                config['eps'], config['grid_size'], config['n_targets_multiplier'], 
                config['target'], config.get('scaler_type', 'global_scaler')
            )
            
            transformed_df = transform_by_tasks(df, selective_metric=selective_metric)
            tables = create_problem_specific_tables(transformed_df, config['composite_name'])
            
            # Store result
            result = {
                'config': config,
                'raw_df': df,
                'transformed_df': transformed_df, 
                'tables': tables
            }
            results.append(result)
            
            print(f"  ✓ Successfully processed")
            
        except Exception as e:
            print(f"  ✗ Error processing configuration: {e}")
            continue
    
    print(f"\nSuccessfully processed {len(results)}/{len(config_list)} configurations")
    
    # Create final combined tables
    print("\nCreating final combined tables...")
    final_tables = create_final_problem_tables(results)
    
    return {
        'individual_results': results,
        'final_tables': final_tables,
        'config_list': config_list
    }


In [5]:
def display_tables_with_formatting(tables: Dict[str, pd.DataFrame], composite_name: str):
    """
    Display the tables with nice formatting and separators between components and composite.
    
    Args:
        tables: Dictionary of tables from create_problem_specific_tables
        composite_name: Name of the composite measure being analyzed
    """
    print("=" * 100)
    print(f"ANALYSIS FOR COMPOSITE MEASURE: {composite_name}")
    print("=" * 100)
    
    for problem_type, table in tables.items():
        if table.empty:
            continue
            
        print(f"\n{'-' * 60}")
        print(f"{problem_type.replace('_', ' ').upper()} TABLE")
        print(f"{'-' * 60}")
        
        # Identify composite and component columns (using shortened names)
        composite_cols = [c for c in table.columns if c.lower().startswith('comp')]
        component_cols = [c for c in table.columns if not c.lower().startswith('comp')]
        
        print(f"Component measures ({len(component_cols)}): {', '.join(component_cols)}")
        if composite_cols:
            print(f"Composite measure ({len(composite_cols)}): {', '.join(composite_cols)}")
        
        print("\nTable:")
        print(table.round(4))
        
        # Show summary statistics
        print(f"\nSummary Statistics for {problem_type.replace('_', ' ').title()}:")
        print("Mean values:")
        mean_values = table.mean()
        for col in component_cols + composite_cols:
            if col in mean_values:
                print(f"  {col}: {mean_values[col]:.4f}")
        
        print(f"\nStandard deviations:")
        std_values = table.std()
        for col in component_cols + composite_cols:
            if col in std_values:
                print(f"  {col}: {std_values[col]:.4f}")
    
    print("\n" + "=" * 100)


## Example Analysis

Now let's analyze a specific configuration. You can modify the parameters below to analyze different configurations:


In [None]:
config_list = [
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'global_scaler',
        'composite_name': 'COMPOSITE BAYES ALL OUTER'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'beta',
        'scaler_type': 'none',
        'composite_name': 'COMPOSITE BAYES ALL OUTER + M'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'global_scaler',
        'composite_name': 'COMPOSITE EXCESS ALL OUTER INNER'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'none',
        'composite_name': 'COMPOSITE EXCESS ALL OUTER INNER + M'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'beta',
        'scaler_type': 'none',
        'composite_name': 'COMPOSITE EAT LOGSCORE OUTER OUTER + M'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'beta',
        'scaler_type': 'none',
        'composite_name': 'COMPOSITE EAT LOGSCORE OUTER INNER + M'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'beta',
        'scaler_type': 'global_scaler',
        'composite_name': 'COMPOSITE EAT LOGSCORE OUTER CENTRAL + M'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'global_scaler',
        'composite_name': 'COMPOSITE EAT SPHERICAL OUTER OUTER + M'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'global_scaler',
        'composite_name': 'COMPOSITE EAT SPHERICAL OUTER INNER + M'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'global_scaler',
        'composite_name': 'COMPOSITE EAT SPHERICAL OUTER CENTRAL + M'
    },
]

In [7]:
# Run the analysis for all configurations
results = analyze_multiple_configs(config_list)


Analyzing 10 configurations...

Configuration 1/10:
  eps=0.5, grid_size=5, n_targets_multiplier=1
  target=exp, scaler_type=global_scaler
  composite_name=COMPOSITE BAYES ALL OUTER
Found 2 files with matching config:
  Loading: extended_benchmark_entropic_target_exp_eps_0.5_iters_150_tol_1e-06_rs_42_grid_size_5_n_targets_multiplier_1_global_scaler.csv
    Rows: 7680, Columns: 23
  Loading: extra_extended_benchmark_entropic_target_exp_eps_0.5_iters_150_tol_1e-06_rs_42_grid_size_5_n_targets_multiplier_1_global_scaler.csv
    Rows: 5940, Columns: 23
Merged 2 files: 13620 rows, 23 columns
  Unique measures in merged data: 141
  Unique ind_datasets in merged data: 3
Debug - ood_detection: Found 9 rows with pattern '[ood]'
  Sample eval values: ['cifar100 [ood]' 'svhn [ood]' 'tiny_imagenet [ood]' 'cifar10 [ood]'
 'imagenet_a [ood]']
Ood Detection: 9 rows, 5 measures
Debug - misclassification_detection: Found 3 rows with pattern '[miscls]'
  Sample eval values: ['cifar10 [miscls]' 'cifar100 

## Access Individual Tables

You can access individual tables from the results:


In [8]:
# Display final combined tables as DataFrames
print("FINAL COMBINED TABLES")
print("=" * 80)

# OOD Detection Table
print("\n1. OOD DETECTION TABLE")
print("-" * 40)
ood_final = results['final_tables']['ood_detection']
if not ood_final.empty:
    print(f"Shape: {ood_final.shape}")
    print("\nDataFrame:")
    display(ood_final.round(4))
else:
    print("No data available for OOD detection")

# Misclassification Detection Table
print("\n2. MISCLASSIFICATION DETECTION TABLE")
print("-" * 50)
miscls_final = results['final_tables']['misclassification_detection']
if not miscls_final.empty:
    print(f"Shape: {miscls_final.shape}")
    print("\nDataFrame:")
    display(miscls_final.round(4))
else:
    print("No data available for misclassification detection")

# Selective Prediction Table
print("\n3. SELECTIVE PREDICTION TABLE")
print("-" * 40)
selective_final = results['final_tables']['selective_prediction']
if not selective_final.empty:
    print(f"Shape: {selective_final.shape}")
    print("\nDataFrame:")
    display(selective_final.round(4))
else:
    print("No data available for selective prediction")


FINAL COMBINED TABLES

1. OOD DETECTION TABLE
----------------------------------------
Shape: (9, 52)

DataFrame:


Unnamed: 0_level_0,measure,R_b 1 (L)_v1,R_b 1 (B)_v1,R_b 1 (S)_v1,R_b 1 (Z)_v1,C_v1,R_b 1 (L)_v2,R_b 1 (B)_v2,R_b 1 (S)_v2,R_b 1 (Z)_v2,M_v2,C_v2,R_e 1 2 (L)_v3,R_e 1 2 (B)_v3,R_e 1 2 (S)_v3,R_e 1 2 (Z)_v3,C_v3,R_e 1 2 (L)_v4,R_e 1 2 (B)_v4,R_e 1 2 (S)_v4,R_e 1 2 (Z)_v4,M_v4,C_v4,R_e 1 1 (L)_v5,R_t 1 1 (L)_v5,R_b 1 (L)_v5,M_v5,C_v5,R_e 1 2 (L)_v6,R_t 1 2 (L)_v6,R_b 1 (L)_v6,M_v6,C_v6,R_e 1 3 (L)_v7,R_t 1 3 (L)_v7,R_b 1 (L)_v7,M_v7,C_v7,R_e 1 1 (S)_v8,R_t 1 1 (S)_v8,R_b 1 (S)_v8,M_v8,C_v8,R_e 1 2 (S)_v9,R_t 1 2 (S)_v9,R_b 1 (S)_v9,M_v9,C_v9,R_e 1 3 (S)_v10,R_t 1 3 (S)_v10,R_b 1 (S)_v10,M_v10,C_v10
ind_dataset,ood_dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1
cifar10,cifar100,0.9169,0.9144,0.9145,0.9132,0.9158,0.9169,0.9144,0.9145,0.9132,0.9122,0.9185,0.9072,0.9023,0.9024,0.7546,0.9059,0.9072,0.9023,0.9024,0.7546,0.9122,0.9103,0.9047,0.9115,0.9169,0.9122,0.9181,0.9072,0.9162,0.9169,0.9122,0.9194,0.9026,0.915,0.9169,0.9122,0.9189,0.9044,0.9113,0.9145,0.9122,0.9116,0.9024,0.913,0.9145,0.9122,0.9126,0.904,0.9126,0.9145,0.9122,0.9124
cifar10,svhn,0.963,0.9584,0.9585,0.9563,0.961,0.963,0.9584,0.9585,0.9563,0.9343,0.9572,0.9458,0.9402,0.9421,0.8246,0.944,0.9458,0.9402,0.9421,0.8246,0.9343,0.9423,0.9426,0.9565,0.963,0.9343,0.9573,0.9458,0.9631,0.963,0.9343,0.9591,0.9397,0.9619,0.963,0.9343,0.9584,0.942,0.9551,0.9585,0.9343,0.9558,0.9421,0.9577,0.9585,0.9343,0.957,0.9418,0.9571,0.9585,0.9343,0.9567
cifar10,tiny_imagenet,0.9114,0.9086,0.9087,0.9072,0.9101,0.9114,0.9086,0.9087,0.9072,0.9103,0.914,0.8987,0.8932,0.8931,0.7524,0.8972,0.8987,0.8932,0.8931,0.7524,0.9103,0.9042,0.8957,0.9036,0.9114,0.9103,0.9123,0.8987,0.9095,0.9114,0.9103,0.9141,0.8935,0.9079,0.9114,0.9103,0.9134,0.8956,0.9036,0.9087,0.9103,0.904,0.8931,0.9058,0.9087,0.9103,0.9052,0.8944,0.9052,0.9087,0.9103,0.9049
cifar100,cifar10,0.7733,0.773,0.7734,0.7722,0.7734,0.7733,0.773,0.7734,0.7722,0.5348,0.7681,0.7367,0.6813,0.7216,0.6891,0.7206,0.7367,0.6813,0.7216,0.6891,0.5348,0.6928,0.7245,0.774,0.7733,0.5348,0.7649,0.7367,0.7745,0.7733,0.5348,0.7667,0.718,0.7742,0.7733,0.5348,0.7652,0.7172,0.7735,0.7734,0.5348,0.7725,0.7216,0.7744,0.7734,0.5348,0.7722,0.7147,0.774,0.7734,0.5348,0.7717
cifar100,svhn,0.8701,0.8583,0.861,0.8559,0.8659,0.8701,0.8583,0.861,0.8559,0.6788,0.8677,0.7763,0.6621,0.7312,0.7057,0.7396,0.7763,0.6621,0.7312,0.7057,0.6788,0.7247,0.7558,0.8677,0.8701,0.6788,0.8717,0.7763,0.8702,0.8701,0.6788,0.8745,0.7499,0.8703,0.8701,0.6788,0.8738,0.7234,0.8526,0.861,0.6788,0.8512,0.7312,0.8567,0.861,0.6788,0.8484,0.7186,0.8546,0.861,0.6788,0.8475
cifar100,tiny_imagenet,0.8099,0.7895,0.8063,0.803,0.8054,0.8099,0.7895,0.8063,0.803,0.6229,0.8046,0.9999,0.9534,0.9944,0.9763,0.9999,0.9999,0.9534,0.9944,0.9763,0.6229,0.9996,0.9999,1.0,0.8099,0.6229,1.0,0.9999,0.926,0.8099,0.6229,0.9994,0.9999,0.9797,0.8099,0.6229,1.0,0.9915,0.993,0.8063,0.6229,0.9967,0.9944,0.9371,0.8063,0.6229,0.9989,0.9895,0.9793,0.8063,0.6229,0.9998
tiny_imagenet,imagenet_a,0.8354,0.8272,0.8301,0.8263,0.8331,0.8354,0.8272,0.8301,0.8263,0.441,0.8324,0.8014,0.6511,0.7319,0.7129,0.755,0.8014,0.6511,0.7319,0.7129,0.441,0.7293,0.7811,0.8463,0.8354,0.441,0.8469,0.8014,0.841,0.8354,0.441,0.8466,0.772,0.8427,0.8354,0.441,0.8462,0.7212,0.8352,0.8301,0.441,0.8332,0.7319,0.8345,0.8301,0.441,0.8329,0.7154,0.8354,0.8301,0.441,0.8333
tiny_imagenet,imagenet_o,0.7243,0.721,0.7226,0.7212,0.7234,0.7243,0.721,0.7226,0.7212,0.5127,0.7277,0.7552,0.6911,0.7236,0.7005,0.7368,0.7552,0.6911,0.7236,0.7005,0.5127,0.7213,0.7528,0.7535,0.7243,0.5127,0.7596,0.7552,0.7349,0.7243,0.5127,0.7514,0.7464,0.7406,0.7243,0.5127,0.7541,0.7211,0.7415,0.7226,0.5127,0.7426,0.7236,0.7358,0.7226,0.5127,0.7429,0.7189,0.7406,0.7226,0.5127,0.7457
tiny_imagenet,imagenet_r,0.8253,0.8162,0.8192,0.8155,0.8225,0.8253,0.8162,0.8192,0.8155,0.4048,0.8203,0.7933,0.657,0.7308,0.7122,0.7501,0.7933,0.657,0.7308,0.7122,0.4048,0.7238,0.7744,0.8366,0.8253,0.4048,0.8347,0.7933,0.831,0.8253,0.4048,0.8344,0.7644,0.8327,0.8253,0.4048,0.8339,0.7216,0.8253,0.8192,0.4048,0.8236,0.7308,0.8241,0.8192,0.4048,0.8231,0.7163,0.8254,0.8192,0.4048,0.8238



2. MISCLASSIFICATION DETECTION TABLE
--------------------------------------------------
Shape: (3, 52)

DataFrame:


Unnamed: 0_level_0,measure,R_b 1 (L)_v1,R_b 1 (B)_v1,R_b 1 (S)_v1,R_b 1 (Z)_v1,C_v1,R_b 1 (L)_v2,R_b 1 (B)_v2,R_b 1 (S)_v2,R_b 1 (Z)_v2,M_v2,C_v2,R_e 1 2 (L)_v3,R_e 1 2 (B)_v3,R_e 1 2 (S)_v3,R_e 1 2 (Z)_v3,C_v3,R_e 1 2 (L)_v4,R_e 1 2 (B)_v4,R_e 1 2 (S)_v4,R_e 1 2 (Z)_v4,M_v4,C_v4,R_e 1 1 (L)_v5,R_t 1 1 (L)_v5,R_b 1 (L)_v5,M_v5,C_v5,R_e 1 2 (L)_v6,R_t 1 2 (L)_v6,R_b 1 (L)_v6,M_v6,C_v6,R_e 1 3 (L)_v7,R_t 1 3 (L)_v7,R_b 1 (L)_v7,M_v7,C_v7,R_e 1 1 (S)_v8,R_t 1 1 (S)_v8,R_b 1 (S)_v8,M_v8,C_v8,R_e 1 2 (S)_v9,R_t 1 2 (S)_v9,R_b 1 (S)_v9,M_v9,C_v9,R_e 1 3 (S)_v10,R_t 1 3 (S)_v10,R_b 1 (S)_v10,M_v10,C_v10
ind_dataset,eval_dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1
cifar10,cifar10,0.9423,0.9425,0.9422,0.9418,0.9423,0.9423,0.9425,0.9422,0.9418,0.9276,0.942,0.9416,0.9424,0.9419,0.7967,0.9426,0.9416,0.9424,0.9419,0.7967,0.9276,0.9418,0.9404,0.9432,0.9423,0.9276,0.9439,0.9416,0.9449,0.9423,0.9276,0.9441,0.9365,0.9422,0.9423,0.9276,0.942,0.9431,0.9452,0.9422,0.9276,0.9452,0.9419,0.9458,0.9422,0.9276,0.9457,0.9424,0.9456,0.9422,0.9276,0.9456
cifar100,cifar100,0.8451,0.8578,0.8562,0.859,0.8527,0.8451,0.8578,0.8562,0.859,0.5739,0.8527,0.8315,0.7826,0.8372,0.8063,0.8289,0.8315,0.7826,0.8372,0.8063,0.5739,0.8034,0.818,0.8531,0.8451,0.5739,0.8489,0.8315,0.8501,0.8451,0.5739,0.8502,0.8042,0.8488,0.8451,0.5739,0.8457,0.8298,0.8679,0.8562,0.5739,0.8677,0.8372,0.8656,0.8562,0.5739,0.8691,0.8274,0.8671,0.8562,0.5739,0.8685
tiny_imagenet,tiny_imagenet,0.8447,0.8547,0.8532,0.8547,0.8506,0.8447,0.8547,0.8532,0.8547,0.4168,0.8511,0.8315,0.7542,0.8188,0.8008,0.8192,0.8315,0.7542,0.8188,0.8008,0.4168,0.7946,0.8131,0.8508,0.8447,0.4168,0.8466,0.8315,0.8497,0.8447,0.4168,0.8493,0.7908,0.8465,0.8447,0.4168,0.8419,0.8086,0.8647,0.8532,0.4168,0.8643,0.8188,0.8634,0.8532,0.4168,0.8657,0.8052,0.8645,0.8532,0.4168,0.8647



3. SELECTIVE PREDICTION TABLE
----------------------------------------
Shape: (3, 52)

DataFrame:


Unnamed: 0_level_0,measure,R_b 1 (L)_v1,R_b 1 (B)_v1,R_b 1 (S)_v1,R_b 1 (Z)_v1,C_v1,R_b 1 (L)_v2,R_b 1 (B)_v2,R_b 1 (S)_v2,R_b 1 (Z)_v2,M_v2,C_v2,R_e 1 2 (L)_v3,R_e 1 2 (B)_v3,R_e 1 2 (S)_v3,R_e 1 2 (Z)_v3,C_v3,R_e 1 2 (L)_v4,R_e 1 2 (B)_v4,R_e 1 2 (S)_v4,R_e 1 2 (Z)_v4,M_v4,C_v4,R_e 1 1 (L)_v5,R_t 1 1 (L)_v5,R_b 1 (L)_v5,M_v5,C_v5,R_e 1 2 (L)_v6,R_t 1 2 (L)_v6,R_b 1 (L)_v6,M_v6,C_v6,R_e 1 3 (L)_v7,R_t 1 3 (L)_v7,R_b 1 (L)_v7,M_v7,C_v7,R_e 1 1 (S)_v8,R_t 1 1 (S)_v8,R_b 1 (S)_v8,M_v8,C_v8,R_e 1 2 (S)_v9,R_t 1 2 (S)_v9,R_b 1 (S)_v9,M_v9,C_v9,R_e 1 3 (S)_v10,R_t 1 3 (S)_v10,R_b 1 (S)_v10,M_v10,C_v10
ind_dataset,eval_dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1
cifar10,cifar10,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9963,0.9969,0.9969,0.9969,0.9967,0.9828,0.9969,0.9969,0.9969,0.9967,0.9828,0.9963,0.9968,0.9968,0.9969,0.9968,0.9963,0.9969,0.9969,0.9969,0.9968,0.9963,0.9969,0.9967,0.9968,0.9968,0.9963,0.9969,0.9969,0.9969,0.9968,0.9963,0.9969,0.9967,0.997,0.9968,0.9963,0.997,0.9968,0.997,0.9968,0.9963,0.997
cifar100,cifar100,0.9159,0.9202,0.9197,0.9206,0.9186,0.9159,0.9202,0.9197,0.9206,0.8106,0.9173,0.9133,0.9,0.9162,0.8789,0.913,0.9133,0.9,0.9162,0.8789,0.8106,0.9013,0.9097,0.9184,0.9159,0.8106,0.9154,0.9133,0.9175,0.9159,0.8106,0.9158,0.9056,0.9171,0.9159,0.8106,0.9144,0.9141,0.9231,0.9197,0.8106,0.9231,0.9162,0.9225,0.9197,0.8106,0.9234,0.9135,0.9229,0.9197,0.8106,0.9233
tiny_imagenet,tiny_imagenet,0.8889,0.8927,0.8921,0.8926,0.8912,0.8889,0.8927,0.8921,0.8926,0.6598,0.889,0.8853,0.86,0.8825,0.8528,0.882,0.8853,0.86,0.8825,0.8528,0.6598,0.8666,0.8792,0.8909,0.8889,0.6598,0.8863,0.8853,0.8907,0.8889,0.6598,0.8874,0.8717,0.8893,0.8889,0.6598,0.8846,0.8793,0.8961,0.8921,0.6598,0.8959,0.8825,0.8957,0.8921,0.6598,0.8964,0.8779,0.896,0.8921,0.6598,0.8961


## Results Structure

The analysis returns:
1. **`final_tables`**: Combined tables for each problem type with the requested format
2. **`individual_results`**: Detailed results for each configuration
3. **`config_list`**: The original configuration list

### Final Table Format
Each final table has:
- **Index**: `(ind_dataset_clean, ood_dataset)` - the in-distribution and out-of-distribution dataset names
- **Columns**: Individual measures and composite measures, labeled by configuration


In [9]:
# Debug: Check individual configuration results and their sizes
print("INDIVIDUAL CONFIGURATION RESULTS - DEBUG")
print("=" * 60)

for i, individual_result in enumerate(results['individual_results']):
    config = individual_result['config']
    tables = individual_result['tables']
    transformed_df = individual_result['transformed_df']
    
    print(f"\nConfiguration {i+1}: {config['composite_name']}")
    print(f"Parameters: eps={config['eps']}, grid_size={config['grid_size']}, target={config['target']}")
    print(f"Original transformed_df shape: {transformed_df.shape}")
    print(f"Transformed_df index levels: {transformed_df.index.names}")
    print(f"Sample index values: {transformed_df.index[:5]}")
    
    for problem_type, table in tables.items():
        if not table.empty:
            print(f"\n{problem_type.replace('_', ' ').title()} - Shape: {table.shape}")
            print(f"Index: {table.index[:3]}")
            # Show first few rows
            display(table.head(3).round(4))
        else:
            print(f"\n{problem_type.replace('_', ' ').title()}: No data")


INDIVIDUAL CONFIGURATION RESULTS - DEBUG

Configuration 1: COMPOSITE BAYES ALL OUTER
Parameters: eps=0.5, grid_size=5, target=exp
Original transformed_df shape: (15, 141)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9158
cifar10,svhn [ood],0.963,0.9584,0.9585,0.9563,0.961
cifar10,tiny_imagenet [ood],0.9114,0.9086,0.9087,0.9072,0.9101



Misclassification Detection - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [miscls]'),
            (     'cifar100',      'cifar100 [miscls]'),
            ('tiny_imagenet', 'tiny_imagenet [miscls]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9423,0.9425,0.9422,0.9418,0.9423
cifar100,cifar100 [miscls],0.8451,0.8578,0.8562,0.859,0.8527
tiny_imagenet,tiny_imagenet [miscls],0.8447,0.8547,0.8532,0.8547,0.8506



Selective Prediction - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [selective]'),
            (     'cifar100',      'cifar100 [selective]'),
            ('tiny_imagenet', 'tiny_imagenet [selective]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [selective],0.9968,0.9968,0.9968,0.9968,0.9968
cifar100,cifar100 [selective],0.9159,0.9202,0.9197,0.9206,0.9186
tiny_imagenet,tiny_imagenet [selective],0.8889,0.8927,0.8921,0.8926,0.8912



Configuration 2: COMPOSITE BAYES ALL OUTER + M
Parameters: eps=0.5, grid_size=5, target=beta
Original transformed_df shape: (15, 145)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 6)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9122,0.9185
cifar10,svhn [ood],0.963,0.9584,0.9585,0.9563,0.9343,0.9572
cifar10,tiny_imagenet [ood],0.9114,0.9086,0.9087,0.9072,0.9103,0.914



Misclassification Detection - Shape: (3, 6)
Index: MultiIndex([(      'cifar10',       'cifar10 [miscls]'),
            (     'cifar100',      'cifar100 [miscls]'),
            ('tiny_imagenet', 'tiny_imagenet [miscls]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar10 [miscls],0.9423,0.9425,0.9422,0.9418,0.9276,0.942
cifar100,cifar100 [miscls],0.8451,0.8578,0.8562,0.859,0.5739,0.8527
tiny_imagenet,tiny_imagenet [miscls],0.8447,0.8547,0.8532,0.8547,0.4168,0.8511



Selective Prediction - Shape: (3, 6)
Index: MultiIndex([(      'cifar10',       'cifar10 [selective]'),
            (     'cifar100',      'cifar100 [selective]'),
            ('tiny_imagenet', 'tiny_imagenet [selective]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar10 [selective],0.9968,0.9968,0.9968,0.9968,0.9963,0.9969
cifar100,cifar100 [selective],0.9159,0.9202,0.9197,0.9206,0.8106,0.9173
tiny_imagenet,tiny_imagenet [selective],0.8889,0.8927,0.8921,0.8926,0.6598,0.889



Configuration 3: COMPOSITE EXCESS ALL OUTER INNER
Parameters: eps=0.5, grid_size=5, target=exp
Original transformed_df shape: (15, 141)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.9072,0.9023,0.9024,0.7546,0.9059
cifar10,svhn [ood],0.9458,0.9402,0.9421,0.8246,0.944
cifar10,tiny_imagenet [ood],0.8987,0.8932,0.8931,0.7524,0.8972



Misclassification Detection - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [miscls]'),
            (     'cifar100',      'cifar100 [miscls]'),
            ('tiny_imagenet', 'tiny_imagenet [miscls]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9416,0.9424,0.9419,0.7967,0.9426
cifar100,cifar100 [miscls],0.8315,0.7826,0.8372,0.8063,0.8289
tiny_imagenet,tiny_imagenet [miscls],0.8315,0.7542,0.8188,0.8008,0.8192



Selective Prediction - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [selective]'),
            (     'cifar100',      'cifar100 [selective]'),
            ('tiny_imagenet', 'tiny_imagenet [selective]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [selective],0.9969,0.9969,0.9967,0.9828,0.9969
cifar100,cifar100 [selective],0.9133,0.9,0.9162,0.8789,0.913
tiny_imagenet,tiny_imagenet [selective],0.8853,0.86,0.8825,0.8528,0.882



Configuration 4: COMPOSITE EXCESS ALL OUTER INNER + M
Parameters: eps=0.5, grid_size=5, target=exp
Original transformed_df shape: (15, 145)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 6)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar100 [ood],0.9072,0.9023,0.9024,0.7546,0.9122,0.9103
cifar10,svhn [ood],0.9458,0.9402,0.9421,0.8246,0.9343,0.9423
cifar10,tiny_imagenet [ood],0.8987,0.8932,0.8931,0.7524,0.9103,0.9042



Misclassification Detection - Shape: (3, 6)
Index: MultiIndex([(      'cifar10',       'cifar10 [miscls]'),
            (     'cifar100',      'cifar100 [miscls]'),
            ('tiny_imagenet', 'tiny_imagenet [miscls]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar10 [miscls],0.9416,0.9424,0.9419,0.7967,0.9276,0.9418
cifar100,cifar100 [miscls],0.8315,0.7826,0.8372,0.8063,0.5739,0.8034
tiny_imagenet,tiny_imagenet [miscls],0.8315,0.7542,0.8188,0.8008,0.4168,0.7946



Selective Prediction - Shape: (3, 6)
Index: MultiIndex([(      'cifar10',       'cifar10 [selective]'),
            (     'cifar100',      'cifar100 [selective]'),
            ('tiny_imagenet', 'tiny_imagenet [selective]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar10 [selective],0.9969,0.9969,0.9967,0.9828,0.9963,0.9968
cifar100,cifar100 [selective],0.9133,0.9,0.9162,0.8789,0.8106,0.9013
tiny_imagenet,tiny_imagenet [selective],0.8853,0.86,0.8825,0.8528,0.6598,0.8666



Configuration 5: COMPOSITE EAT LOGSCORE OUTER OUTER + M
Parameters: eps=0.5, grid_size=5, target=beta
Original transformed_df shape: (15, 145)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 1 (L),R_t 1 1 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.9047,0.9115,0.9169,0.9122,0.9181
cifar10,svhn [ood],0.9426,0.9565,0.963,0.9343,0.9573
cifar10,tiny_imagenet [ood],0.8957,0.9036,0.9114,0.9103,0.9123



Misclassification Detection - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [miscls]'),
            (     'cifar100',      'cifar100 [miscls]'),
            ('tiny_imagenet', 'tiny_imagenet [miscls]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 1 (L),R_t 1 1 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9404,0.9432,0.9423,0.9276,0.9439
cifar100,cifar100 [miscls],0.818,0.8531,0.8451,0.5739,0.8489
tiny_imagenet,tiny_imagenet [miscls],0.8131,0.8508,0.8447,0.4168,0.8466



Selective Prediction - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [selective]'),
            (     'cifar100',      'cifar100 [selective]'),
            ('tiny_imagenet', 'tiny_imagenet [selective]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 1 (L),R_t 1 1 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [selective],0.9968,0.9969,0.9968,0.9963,0.9969
cifar100,cifar100 [selective],0.9097,0.9184,0.9159,0.8106,0.9154
tiny_imagenet,tiny_imagenet [selective],0.8792,0.8909,0.8889,0.6598,0.8863



Configuration 6: COMPOSITE EAT LOGSCORE OUTER INNER + M
Parameters: eps=0.5, grid_size=5, target=beta
Original transformed_df shape: (15, 145)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_t 1 2 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.9072,0.9162,0.9169,0.9122,0.9194
cifar10,svhn [ood],0.9458,0.9631,0.963,0.9343,0.9591
cifar10,tiny_imagenet [ood],0.8987,0.9095,0.9114,0.9103,0.9141



Misclassification Detection - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [miscls]'),
            (     'cifar100',      'cifar100 [miscls]'),
            ('tiny_imagenet', 'tiny_imagenet [miscls]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_t 1 2 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9416,0.9449,0.9423,0.9276,0.9441
cifar100,cifar100 [miscls],0.8315,0.8501,0.8451,0.5739,0.8502
tiny_imagenet,tiny_imagenet [miscls],0.8315,0.8497,0.8447,0.4168,0.8493



Selective Prediction - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [selective]'),
            (     'cifar100',      'cifar100 [selective]'),
            ('tiny_imagenet', 'tiny_imagenet [selective]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_t 1 2 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [selective],0.9969,0.9969,0.9968,0.9963,0.9969
cifar100,cifar100 [selective],0.9133,0.9175,0.9159,0.8106,0.9158
tiny_imagenet,tiny_imagenet [selective],0.8853,0.8907,0.8889,0.6598,0.8874



Configuration 7: COMPOSITE EAT LOGSCORE OUTER CENTRAL + M
Parameters: eps=0.5, grid_size=5, target=beta
Original transformed_df shape: (15, 145)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 3 (L),R_t 1 3 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.9026,0.915,0.9169,0.9122,0.9189
cifar10,svhn [ood],0.9397,0.9619,0.963,0.9343,0.9584
cifar10,tiny_imagenet [ood],0.8935,0.9079,0.9114,0.9103,0.9134



Misclassification Detection - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [miscls]'),
            (     'cifar100',      'cifar100 [miscls]'),
            ('tiny_imagenet', 'tiny_imagenet [miscls]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 3 (L),R_t 1 3 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9365,0.9422,0.9423,0.9276,0.942
cifar100,cifar100 [miscls],0.8042,0.8488,0.8451,0.5739,0.8457
tiny_imagenet,tiny_imagenet [miscls],0.7908,0.8465,0.8447,0.4168,0.8419



Selective Prediction - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [selective]'),
            (     'cifar100',      'cifar100 [selective]'),
            ('tiny_imagenet', 'tiny_imagenet [selective]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 3 (L),R_t 1 3 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [selective],0.9967,0.9968,0.9968,0.9963,0.9969
cifar100,cifar100 [selective],0.9056,0.9171,0.9159,0.8106,0.9144
tiny_imagenet,tiny_imagenet [selective],0.8717,0.8893,0.8889,0.6598,0.8846



Configuration 8: COMPOSITE EAT SPHERICAL OUTER OUTER + M
Parameters: eps=0.5, grid_size=5, target=exp
Original transformed_df shape: (15, 141)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 1 (S),R_t 1 1 (S),R_b 1 (S),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.9044,0.9113,0.9145,0.9122,0.9116
cifar10,svhn [ood],0.942,0.9551,0.9585,0.9343,0.9558
cifar10,tiny_imagenet [ood],0.8956,0.9036,0.9087,0.9103,0.904



Misclassification Detection - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [miscls]'),
            (     'cifar100',      'cifar100 [miscls]'),
            ('tiny_imagenet', 'tiny_imagenet [miscls]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 1 (S),R_t 1 1 (S),R_b 1 (S),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9431,0.9452,0.9422,0.9276,0.9452
cifar100,cifar100 [miscls],0.8298,0.8679,0.8562,0.5739,0.8677
tiny_imagenet,tiny_imagenet [miscls],0.8086,0.8647,0.8532,0.4168,0.8643



Selective Prediction - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [selective]'),
            (     'cifar100',      'cifar100 [selective]'),
            ('tiny_imagenet', 'tiny_imagenet [selective]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 1 (S),R_t 1 1 (S),R_b 1 (S),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [selective],0.9969,0.9969,0.9968,0.9963,0.9969
cifar100,cifar100 [selective],0.9141,0.9231,0.9197,0.8106,0.9231
tiny_imagenet,tiny_imagenet [selective],0.8793,0.8961,0.8921,0.6598,0.8959



Configuration 9: COMPOSITE EAT SPHERICAL OUTER INNER + M
Parameters: eps=0.5, grid_size=5, target=beta
Original transformed_df shape: (15, 145)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (S),R_t 1 2 (S),R_b 1 (S),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.9024,0.913,0.9145,0.9122,0.9126
cifar10,svhn [ood],0.9421,0.9577,0.9585,0.9343,0.957
cifar10,tiny_imagenet [ood],0.8931,0.9058,0.9087,0.9103,0.9052



Misclassification Detection - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [miscls]'),
            (     'cifar100',      'cifar100 [miscls]'),
            ('tiny_imagenet', 'tiny_imagenet [miscls]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (S),R_t 1 2 (S),R_b 1 (S),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9419,0.9458,0.9422,0.9276,0.9457
cifar100,cifar100 [miscls],0.8372,0.8656,0.8562,0.5739,0.8691
tiny_imagenet,tiny_imagenet [miscls],0.8188,0.8634,0.8532,0.4168,0.8657



Selective Prediction - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [selective]'),
            (     'cifar100',      'cifar100 [selective]'),
            ('tiny_imagenet', 'tiny_imagenet [selective]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (S),R_t 1 2 (S),R_b 1 (S),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [selective],0.9967,0.997,0.9968,0.9963,0.997
cifar100,cifar100 [selective],0.9162,0.9225,0.9197,0.8106,0.9234
tiny_imagenet,tiny_imagenet [selective],0.8825,0.8957,0.8921,0.6598,0.8964



Configuration 10: COMPOSITE EAT SPHERICAL OUTER CENTRAL + M
Parameters: eps=0.5, grid_size=5, target=beta
Original transformed_df shape: (15, 145)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 3 (S),R_t 1 3 (S),R_b 1 (S),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.904,0.9126,0.9145,0.9122,0.9124
cifar10,svhn [ood],0.9418,0.9571,0.9585,0.9343,0.9567
cifar10,tiny_imagenet [ood],0.8944,0.9052,0.9087,0.9103,0.9049



Misclassification Detection - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [miscls]'),
            (     'cifar100',      'cifar100 [miscls]'),
            ('tiny_imagenet', 'tiny_imagenet [miscls]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 3 (S),R_t 1 3 (S),R_b 1 (S),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9424,0.9456,0.9422,0.9276,0.9456
cifar100,cifar100 [miscls],0.8274,0.8671,0.8562,0.5739,0.8685
tiny_imagenet,tiny_imagenet [miscls],0.8052,0.8645,0.8532,0.4168,0.8647



Selective Prediction - Shape: (3, 5)
Index: MultiIndex([(      'cifar10',       'cifar10 [selective]'),
            (     'cifar100',      'cifar100 [selective]'),
            ('tiny_imagenet', 'tiny_imagenet [selective]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 3 (S),R_t 1 3 (S),R_b 1 (S),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [selective],0.9968,0.997,0.9968,0.9963,0.997
cifar100,cifar100 [selective],0.9135,0.9229,0.9197,0.8106,0.9233
tiny_imagenet,tiny_imagenet [selective],0.8779,0.896,0.8921,0.6598,0.8961


## Compare Composite vs Components

You can also create custom comparisons between the composite measure and its individual components:


In [10]:
# Column information for final tables
print("COLUMN INFORMATION FOR FINAL TABLES")
print("=" * 60)

for problem_type, final_table in results['final_tables'].items():
    if not final_table.empty:
        print(f"\n{problem_type.replace('_', ' ').title()}:")
        print(f"Shape: {final_table.shape}")
        print(f"Index: {final_table.index.names}")
        print("Columns:")
        for col in final_table.columns:
            print(f"  - {col}")
    else:
        print(f"\n{problem_type.replace('_', ' ').title()}: No data")


COLUMN INFORMATION FOR FINAL TABLES

Ood Detection:
Shape: (9, 52)
Index: ['ind_dataset', 'ood_dataset']
Columns:
  - R_b 1 (L)_v1
  - R_b 1 (B)_v1
  - R_b 1 (S)_v1
  - R_b 1 (Z)_v1
  - C_v1
  - R_b 1 (L)_v2
  - R_b 1 (B)_v2
  - R_b 1 (S)_v2
  - R_b 1 (Z)_v2
  - M_v2
  - C_v2
  - R_e 1 2 (L)_v3
  - R_e 1 2 (B)_v3
  - R_e 1 2 (S)_v3
  - R_e 1 2 (Z)_v3
  - C_v3
  - R_e 1 2 (L)_v4
  - R_e 1 2 (B)_v4
  - R_e 1 2 (S)_v4
  - R_e 1 2 (Z)_v4
  - M_v4
  - C_v4
  - R_e 1 1 (L)_v5
  - R_t 1 1 (L)_v5
  - R_b 1 (L)_v5
  - M_v5
  - C_v5
  - R_e 1 2 (L)_v6
  - R_t 1 2 (L)_v6
  - R_b 1 (L)_v6
  - M_v6
  - C_v6
  - R_e 1 3 (L)_v7
  - R_t 1 3 (L)_v7
  - R_b 1 (L)_v7
  - M_v7
  - C_v7
  - R_e 1 1 (S)_v8
  - R_t 1 1 (S)_v8
  - R_b 1 (S)_v8
  - M_v8
  - C_v8
  - R_e 1 2 (S)_v9
  - R_t 1 2 (S)_v9
  - R_b 1 (S)_v9
  - M_v9
  - C_v9
  - R_e 1 3 (S)_v10
  - R_t 1 3 (S)_v10
  - R_b 1 (S)_v10
  - M_v10
  - C_v10

Misclassification Detection:
Shape: (3, 52)
Index: ['ind_dataset', 'eval_dataset']
Columns:
  - R_b 

In [11]:
# Export final tables to CSV (optional)
print("EXPORT OPTIONS")
print("=" * 40)

for problem_type, final_table in results['final_tables'].items():
    if not final_table.empty:
        filename = f"final_{problem_type}_table.csv"
        print(f"To export {problem_type.replace('_', ' ').title()} table:")
        print(f"  results['final_tables']['{problem_type}'].to_csv('{filename}')")
        # Uncomment to actually export:
        # final_table.to_csv(filename)
    else:
        print(f"No data to export for {problem_type.replace('_', ' ').title()}")


EXPORT OPTIONS
To export Ood Detection table:
  results['final_tables']['ood_detection'].to_csv('final_ood_detection_table.csv')
To export Misclassification Detection table:
  results['final_tables']['misclassification_detection'].to_csv('final_misclassification_detection_table.csv')
To export Selective Prediction table:
  results['final_tables']['selective_prediction'].to_csv('final_selective_prediction_table.csv')


In [12]:
def compute_pareto_depth(points: List[Tuple[float, float]], point_idx: int) -> int:
    """
    Compute Pareto depth of a point (how many layers of Pareto fronts it takes to reach this point).
    
    Args:
        points: List of (x, y) coordinate tuples
        point_idx: Index of the point to compute depth for
    
    Returns:
        Pareto depth (0 = on Pareto front, 1 = on second front, etc.)
    """
    from mdu.eval.table_analysis_utils import pareto_front
    
    remaining_points = list(enumerate(points))
    depth = 0
    
    while remaining_points:
        # Find current Pareto front
        current_points = [point for _, point in remaining_points]
        pareto_indices = pareto_front(current_points)
        
        # Check if our point is on this front
        for i, (orig_idx, _) in enumerate(remaining_points):
            if orig_idx == point_idx and i in pareto_indices:
                return depth
        
        # Remove Pareto front points and continue
        remaining_points = [item for i, item in enumerate(remaining_points) if i not in pareto_indices]
        depth += 1
    
    return depth


def analyze_composite_pareto_detailed(results: Dict) -> pd.DataFrame:
    """
    Analyze Pareto front performance for all composite measures across all configurations.
    
    Args:
        results: Results dictionary from analyze_multiple_configs
    
    Returns:
        DataFrame with composite measure, pareto count, average depth, and number of experiments
    """
    import itertools
    
    pareto_analysis = []
    
    # Get all unique composite measures
    composite_names = set()
    for individual_result in results['individual_results']:
        composite_names.add(individual_result['config']['composite_name'])
    
    for composite_name in composite_names:
        pareto_counts = []
        pareto_depths = []
        total_experiments = 0
        
        # Analyze each configuration with this composite measure
        for individual_result in results['individual_results']:
            if individual_result['config']['composite_name'] == composite_name:
                transformed_df = individual_result['transformed_df']
                
                try:
                    # Use the existing pareto analysis function
                    pareto_results = analyze_composite_pareto_performance(
                        transformed_df, {composite_name: INTERESTING_COMPOSITIONS[composite_name]}
                    )
                    
                    if composite_name in pareto_results:
                        pareto_data = pareto_results[composite_name]
                        pareto_counts.append(pareto_data['pareto_count'])
                        total_experiments += pareto_data['total_problems']
                        
                        # Calculate average pareto depth for this configuration
                        composite_df = select_composite_and_components(transformed_df, composite_name)
                        composite_cols = [c for c in composite_df.columns if c.startswith('composite')]
                        component_cols = [c for c in composite_df.columns if not c.startswith('composite')]
                        
                        if composite_cols and len(component_cols) >= 2:
                            composite_col = composite_cols[0]
                            problems = list(composite_df.index)
                            depths = []
                            
                            for problem1, problem2 in itertools.combinations(problems, 2):
                                row1 = composite_df.loc[problem1]
                                row2 = composite_df.loc[problem2]
                                
                                c1, c2 = row1[composite_col], row2[composite_col]
                                if pd.isna(c1) or pd.isna(c2):
                                    continue
                                
                                points = []
                                # Add component points
                                for col in component_cols:
                                    v1, v2 = row1[col], row2[col]
                                    if pd.notna(v1) and pd.notna(v2):
                                        points.append((v1, v2))
                                
                                # Add composite point
                                points.append((c1, c2))
                                
                                if len(points) >= 3:  # Need at least 2 components + 1 composite
                                    composite_idx = len(points) - 1
                                    depth = compute_pareto_depth(points, composite_idx)
                                    depths.append(depth)
                            
                            if depths:
                                avg_depth = sum(depths) / len(depths)
                                pareto_depths.append(avg_depth)
                
                except Exception as e:
                    print(f"Error analyzing {composite_name}: {e}")
                    continue
        
        # Aggregate results for this composite measure
        if pareto_counts:
            total_pareto_count = sum(pareto_counts)
            avg_pareto_depth = sum(pareto_depths) / len(pareto_depths) if pareto_depths else float('nan')
            
            pareto_analysis.append({
                'composite_measure': composite_name,
                'pareto_front_count': total_pareto_count,
                'avg_pareto_depth': avg_pareto_depth,
                'total_experiments': total_experiments
            })
    
    # Create DataFrame and sort by pareto front count
    pareto_df = pd.DataFrame(pareto_analysis)
    if not pareto_df.empty:
        pareto_df = pareto_df.sort_values('pareto_front_count', ascending=False)
    
    return pareto_df


## Pareto Front Analysis

Analyze how often each composite measure appears on the Pareto front compared to its individual components:


In [13]:
# Perform Pareto front analysis
print("PARETO FRONT ANALYSIS")
print("=" * 60)

pareto_results_df = analyze_composite_pareto_detailed(results)

if not pareto_results_df.empty:
    print("Composite Measure Pareto Performance:")
    print("-" * 40)
    
    # Display the results DataFrame
    display(pareto_results_df.round(3))
    
    print(f"\nSummary:")
    print(f"- Total composite measures analyzed: {len(pareto_results_df)}")
    print(f"- Best performing composite (most Pareto front appearances): {pareto_results_df.iloc[0]['composite_measure']}")
    print(f"  - Pareto front count: {pareto_results_df.iloc[0]['pareto_front_count']}")
    print(f"  - Average Pareto depth: {pareto_results_df.iloc[0]['avg_pareto_depth']:.3f}")
    print(f"  - Total experiments: {pareto_results_df.iloc[0]['total_experiments']}")
    
    # Calculate percentage for best performer
    best_pct = (pareto_results_df.iloc[0]['pareto_front_count'] / pareto_results_df.iloc[0]['total_experiments']) * 100
    print(f"  - Pareto front percentage: {best_pct:.1f}%")
    
else:
    print("No Pareto analysis results available.")


PARETO FRONT ANALYSIS
Composite Measure Pareto Performance:
----------------------------------------


Unnamed: 0,composite_measure,pareto_front_count,avg_pareto_depth,total_experiments
6,COMPOSITE EAT LOGSCORE OUTER INNER + M,89,0.21,105
4,COMPOSITE EAT LOGSCORE OUTER OUTER + M,87,0.2,105
7,COMPOSITE EAT LOGSCORE OUTER CENTRAL + M,84,0.267,105
1,COMPOSITE EAT SPHERICAL OUTER CENTRAL + M,69,0.4,105
8,COMPOSITE EAT SPHERICAL OUTER INNER + M,69,0.4,105
0,COMPOSITE EAT SPHERICAL OUTER OUTER + M,68,0.352,105
5,COMPOSITE BAYES ALL OUTER + M,62,0.581,105
2,COMPOSITE BAYES ALL OUTER,57,0.571,105
9,COMPOSITE EXCESS ALL OUTER INNER + M,28,0.952,105
3,COMPOSITE EXCESS ALL OUTER INNER,27,0.762,105



Summary:
- Total composite measures analyzed: 10
- Best performing composite (most Pareto front appearances): COMPOSITE EAT LOGSCORE OUTER INNER + M
  - Pareto front count: 89
  - Average Pareto depth: 0.210
  - Total experiments: 105
  - Pareto front percentage: 84.8%


In [14]:
pareto_results_df

Unnamed: 0,composite_measure,pareto_front_count,avg_pareto_depth,total_experiments
6,COMPOSITE EAT LOGSCORE OUTER INNER + M,89,0.209524,105
4,COMPOSITE EAT LOGSCORE OUTER OUTER + M,87,0.2,105
7,COMPOSITE EAT LOGSCORE OUTER CENTRAL + M,84,0.266667,105
1,COMPOSITE EAT SPHERICAL OUTER CENTRAL + M,69,0.4,105
8,COMPOSITE EAT SPHERICAL OUTER INNER + M,69,0.4,105
0,COMPOSITE EAT SPHERICAL OUTER OUTER + M,68,0.352381,105
5,COMPOSITE BAYES ALL OUTER + M,62,0.580952,105
2,COMPOSITE BAYES ALL OUTER,57,0.571429,105
9,COMPOSITE EXCESS ALL OUTER INNER + M,28,0.952381,105
3,COMPOSITE EXCESS ALL OUTER INNER,27,0.761905,105


In [15]:
# Additional analysis of Pareto results
if not pareto_results_df.empty:
    print("\nDETAILED PARETO ANALYSIS")
    print("=" * 50)
    
    # Add percentage column
    pareto_results_df['pareto_percentage'] = (pareto_results_df['pareto_front_count'] / 
                                            pareto_results_df['total_experiments'] * 100)
    
    # Sort by percentage
    pareto_results_df_sorted = pareto_results_df.sort_values('pareto_percentage', ascending=False)
    
    print("Ranking by Pareto Front Percentage:")
    for i, (_, row) in enumerate(pareto_results_df_sorted.iterrows(), 1):
        print(f"{i:2d}. {row['composite_measure']}")
        print(f"    Pareto front: {row['pareto_front_count']:3d}/{row['total_experiments']:3d} ({row['pareto_percentage']:5.1f}%)")
        print(f"    Avg depth: {row['avg_pareto_depth']:6.3f}")
        print()
    
    # Show statistics
    print("STATISTICS:")
    print(f"- Mean Pareto percentage: {pareto_results_df['pareto_percentage'].mean():.1f}%")
    print(f"- Median Pareto percentage: {pareto_results_df['pareto_percentage'].median():.1f}%")
    print(f"- Best Pareto percentage: {pareto_results_df['pareto_percentage'].max():.1f}%")
    print(f"- Worst Pareto percentage: {pareto_results_df['pareto_percentage'].min():.1f}%")
    print()
    print(f"- Mean Pareto depth: {pareto_results_df['avg_pareto_depth'].mean():.3f}")
    print(f"- Best (lowest) Pareto depth: {pareto_results_df['avg_pareto_depth'].min():.3f}")
    print(f"- Worst (highest) Pareto depth: {pareto_results_df['avg_pareto_depth'].max():.3f}")
    
    # Final DataFrame with all columns
    print("\nFINAL RESULTS DATAFRAME:")
    display(pareto_results_df_sorted[['composite_measure', 'pareto_front_count', 
                                    'avg_pareto_depth', 'total_experiments', 'pareto_percentage']].round(3))



DETAILED PARETO ANALYSIS
Ranking by Pareto Front Percentage:
 1. COMPOSITE EAT LOGSCORE OUTER INNER + M
    Pareto front:  89/105 ( 84.8%)
    Avg depth:  0.210

 2. COMPOSITE EAT LOGSCORE OUTER OUTER + M
    Pareto front:  87/105 ( 82.9%)
    Avg depth:  0.200

 3. COMPOSITE EAT LOGSCORE OUTER CENTRAL + M
    Pareto front:  84/105 ( 80.0%)
    Avg depth:  0.267

 4. COMPOSITE EAT SPHERICAL OUTER CENTRAL + M
    Pareto front:  69/105 ( 65.7%)
    Avg depth:  0.400

 5. COMPOSITE EAT SPHERICAL OUTER INNER + M
    Pareto front:  69/105 ( 65.7%)
    Avg depth:  0.400

 6. COMPOSITE EAT SPHERICAL OUTER OUTER + M
    Pareto front:  68/105 ( 64.8%)
    Avg depth:  0.352

 7. COMPOSITE BAYES ALL OUTER + M
    Pareto front:  62/105 ( 59.0%)
    Avg depth:  0.581

 8. COMPOSITE BAYES ALL OUTER
    Pareto front:  57/105 ( 54.3%)
    Avg depth:  0.571

 9. COMPOSITE EXCESS ALL OUTER INNER + M
    Pareto front:  28/105 ( 26.7%)
    Avg depth:  0.952

10. COMPOSITE EXCESS ALL OUTER INNER
    Paret

Unnamed: 0,composite_measure,pareto_front_count,avg_pareto_depth,total_experiments,pareto_percentage
6,COMPOSITE EAT LOGSCORE OUTER INNER + M,89,0.21,105,84.762
4,COMPOSITE EAT LOGSCORE OUTER OUTER + M,87,0.2,105,82.857
7,COMPOSITE EAT LOGSCORE OUTER CENTRAL + M,84,0.267,105,80.0
1,COMPOSITE EAT SPHERICAL OUTER CENTRAL + M,69,0.4,105,65.714
8,COMPOSITE EAT SPHERICAL OUTER INNER + M,69,0.4,105,65.714
0,COMPOSITE EAT SPHERICAL OUTER OUTER + M,68,0.352,105,64.762
5,COMPOSITE BAYES ALL OUTER + M,62,0.581,105,59.048
2,COMPOSITE BAYES ALL OUTER,57,0.571,105,54.286
9,COMPOSITE EXCESS ALL OUTER INNER + M,28,0.952,105,26.667
3,COMPOSITE EXCESS ALL OUTER INNER,27,0.762,105,25.714


In [16]:
# Additional analysis of Pareto results
if not pareto_results_df.empty:
    print("\nDETAILED PARETO ANALYSIS")
    print("=" * 50)
    
    # Add percentage column
    pareto_results_df['pareto_percentage'] = (pareto_results_df['pareto_front_count'] / 
                                            pareto_results_df['total_experiments'] * 100)
    
    # Sort by percentage
    pareto_results_df_sorted = pareto_results_df.sort_values('pareto_percentage', ascending=False)
    
    print("Ranking by Pareto Front Percentage:")
    for i, (_, row) in enumerate(pareto_results_df_sorted.iterrows(), 1):
        print(f"{i:2d}. {row['composite_measure']}")
        print(f"    Pareto front: {row['pareto_front_count']:3d}/{row['total_experiments']:3d} ({row['pareto_percentage']:5.1f}%)")
        print(f"    Avg depth: {row['avg_pareto_depth']:6.3f}")
        print()
    
    # Show statistics
    print("STATISTICS:")
    print(f"- Mean Pareto percentage: {pareto_results_df['pareto_percentage'].mean():.1f}%")
    print(f"- Median Pareto percentage: {pareto_results_df['pareto_percentage'].median():.1f}%")
    print(f"- Best Pareto percentage: {pareto_results_df['pareto_percentage'].max():.1f}%")
    print(f"- Worst Pareto percentage: {pareto_results_df['pareto_percentage'].min():.1f}%")
    print()
    print(f"- Mean Pareto depth: {pareto_results_df['avg_pareto_depth'].mean():.3f}")
    print(f"- Best (lowest) Pareto depth: {pareto_results_df['avg_pareto_depth'].min():.3f}")
    print(f"- Worst (highest) Pareto depth: {pareto_results_df['avg_pareto_depth'].max():.3f}")
    
    # Final DataFrame with all columns
    print("\nFINAL RESULTS DATAFRAME:")
    display(pareto_results_df_sorted[['composite_measure', 'pareto_front_count', 
                                    'avg_pareto_depth', 'total_experiments', 'pareto_percentage']].round(3))



DETAILED PARETO ANALYSIS
Ranking by Pareto Front Percentage:
 1. COMPOSITE EAT LOGSCORE OUTER INNER + M
    Pareto front:  89/105 ( 84.8%)
    Avg depth:  0.210

 2. COMPOSITE EAT LOGSCORE OUTER OUTER + M
    Pareto front:  87/105 ( 82.9%)
    Avg depth:  0.200

 3. COMPOSITE EAT LOGSCORE OUTER CENTRAL + M
    Pareto front:  84/105 ( 80.0%)
    Avg depth:  0.267

 4. COMPOSITE EAT SPHERICAL OUTER CENTRAL + M
    Pareto front:  69/105 ( 65.7%)
    Avg depth:  0.400

 5. COMPOSITE EAT SPHERICAL OUTER INNER + M
    Pareto front:  69/105 ( 65.7%)
    Avg depth:  0.400

 6. COMPOSITE EAT SPHERICAL OUTER OUTER + M
    Pareto front:  68/105 ( 64.8%)
    Avg depth:  0.352

 7. COMPOSITE BAYES ALL OUTER + M
    Pareto front:  62/105 ( 59.0%)
    Avg depth:  0.581

 8. COMPOSITE BAYES ALL OUTER
    Pareto front:  57/105 ( 54.3%)
    Avg depth:  0.571

 9. COMPOSITE EXCESS ALL OUTER INNER + M
    Pareto front:  28/105 ( 26.7%)
    Avg depth:  0.952

10. COMPOSITE EXCESS ALL OUTER INNER
    Paret

Unnamed: 0,composite_measure,pareto_front_count,avg_pareto_depth,total_experiments,pareto_percentage
6,COMPOSITE EAT LOGSCORE OUTER INNER + M,89,0.21,105,84.762
4,COMPOSITE EAT LOGSCORE OUTER OUTER + M,87,0.2,105,82.857
7,COMPOSITE EAT LOGSCORE OUTER CENTRAL + M,84,0.267,105,80.0
1,COMPOSITE EAT SPHERICAL OUTER CENTRAL + M,69,0.4,105,65.714
8,COMPOSITE EAT SPHERICAL OUTER INNER + M,69,0.4,105,65.714
0,COMPOSITE EAT SPHERICAL OUTER OUTER + M,68,0.352,105,64.762
5,COMPOSITE BAYES ALL OUTER + M,62,0.581,105,59.048
2,COMPOSITE BAYES ALL OUTER,57,0.571,105,54.286
9,COMPOSITE EXCESS ALL OUTER INNER + M,28,0.952,105,26.667
3,COMPOSITE EXCESS ALL OUTER INNER,27,0.762,105,25.714


## Export Tables

You can export the tables to CSV files for further analysis:


In [17]:
# Example: Add more configurations to the analysis
additional_configs = [
    {
        'eps': 2.0,
        'grid_size': 2,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'global_scaler',
        'composite_name': 'COMPOSITE BAYES ALL INNER'
    }
]

print("To analyze additional configurations, add them to the config_list:")
print("config_list.extend(additional_configs)")
print("results_extended = analyze_multiple_configs(config_list)")

# Uncomment to run with additional configs:
# config_list.extend(additional_configs)
# results_extended = analyze_multiple_configs(config_list)


To analyze additional configurations, add them to the config_list:
config_list.extend(additional_configs)
results_extended = analyze_multiple_configs(config_list)


In [18]:
# Test the shortened column names function
test_columns = [
    'R_b 1 (Logscore)',
    'R_e 2 3 (Brier)', 
    'R_t 1 2 (Spherical)',
    'composite bayes all outer',
    'mahalanobis',
    'R_b 3 (Zero-one)'
]

print("Column name shortening examples:")
print("Original -> Shortened")
print("-" * 40)
for col in test_columns:
    shortened = shorten_column_names(col)
    print(f"{col} -> {shortened}")


Column name shortening examples:
Original -> Shortened
----------------------------------------
R_b 1 (Logscore) -> R_b 1 (L)
R_e 2 3 (Brier) -> R_e 2 3 (B)
R_t 1 2 (Spherical) -> R_t 1 2 (S)
composite bayes all outer -> C
mahalanobis -> M
R_b 3 (Zero-one) -> R_b 3 (Z)


In [19]:
# Create concatenated tables per problem type (side-by-side by composite measure)
def create_concatenated_problem_tables(individual_results: List[Dict]) -> Dict[str, pd.DataFrame]:
    """
    Create concatenated tables for each problem type with all composite measures side-by-side.
    
    Args:
        individual_results: List of individual configuration results
        
    Returns:
        Dictionary with concatenated tables for each problem type
    """
    problem_types = ['ood_detection', 'misclassification_detection', 'selective_prediction']
    concatenated_tables = {}
    
    for problem_type in problem_types:
        tables_to_concat = []
        
        for i, result in enumerate(individual_results):
            config = result['config']
            tables = result['tables']
            
            if problem_type in tables and not tables[problem_type].empty:
                table = tables[problem_type].copy()
                
                # Add suffix to column names to identify the composite measure
                composite_suffix = config['composite_name'].replace(' ', '_').replace('+', 'PLUS')
                
                # Rename columns with composite suffix
                new_columns = {}
                for col in table.columns:
                    new_columns[col] = f"{col}_{composite_suffix}"
                
                table = table.rename(columns=new_columns)
                tables_to_concat.append(table)
        
        # Concatenate all tables side-by-side
        if tables_to_concat:
            # Use outer join to include all indices
            concatenated = pd.concat(tables_to_concat, axis=1, join='outer', sort=True)
            concatenated_tables[problem_type] = concatenated
        else:
            concatenated_tables[problem_type] = pd.DataFrame()
    
    return concatenated_tables

# Create the concatenated tables
concatenated_results = create_concatenated_problem_tables(results['individual_results'])

print("CONCATENATED TABLES BY PROBLEM TYPE")
print("=" * 80)
print("Each table combines all composite measures side-by-side")
print()


CONCATENATED TABLES BY PROBLEM TYPE
Each table combines all composite measures side-by-side



In [20]:
# Display the concatenated tables
for problem_type, table in concatenated_results.items():
    print(f"\n{problem_type.replace('_', ' ').upper()} - CONCATENATED TABLE")
    print("-" * 60)
    
    if not table.empty:
        print(f"Shape: {table.shape}")
        print(f"Index: {table.index.names}")
        print(f"Columns ({len(table.columns)}):")
        
        # Group columns by composite measure for better display
        column_groups = {}
        for col in table.columns:
            # Extract the composite measure suffix
            parts = col.split('_')
            if len(parts) >= 2:
                # Find the composite measure part (usually the last few parts)
                measure_parts = []
                for part in reversed(parts):
                    measure_parts.insert(0, part)
                    if any(comp_key in '_'.join(measure_parts) for comp_key in ['COMPOSITE', 'BAYES', 'EXCESS']):
                        break
                measure_key = '_'.join(measure_parts)
            else:
                measure_key = 'OTHER'
                
            if measure_key not in column_groups:
                column_groups[measure_key] = []
            column_groups[measure_key].append(col)
        
        # Display columns grouped by composite measure
        for measure, cols in column_groups.items():
            print(f"\n  {measure} ({len(cols)} columns):")
            for col in cols:
                print(f"    - {col}")
        
        print(f"\nDataFrame:")
        display(table.round(4))
        
    else:
        print("No data available")
    
    print("\n" + "="*80)



OOD DETECTION - CONCATENATED TABLE
------------------------------------------------------------
Shape: (9, 52)
Index: ['ind_dataset', 'eval']
Columns (52):

  BAYES_ALL_OUTER (5 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER
    - C_COMPOSITE_BAYES_ALL_OUTER

  BAYES_ALL_OUTER_PLUS_M (6 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M

  EXCESS_ALL_OUTER_INNER (5 columns):
    - R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - C_COMPOSITE_EXCESS_ALL_OUTER_INNER

  EXCES

Unnamed: 0_level_0,measure,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER,C_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER,C_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,M_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,C_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_t 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_e 1 2 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,R_t 1 2 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,R_e 1 3 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,R_t 1 3 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,R_e 1 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,R_t 1 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,R_b 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,M_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,C_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,R_e 1 2 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,R_t 1 2 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,R_b 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,M_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,C_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,R_e 1 3 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,R_t 1 3 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,R_b 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,M_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,C_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9158,0.9169,0.9144,0.9145,0.9132,0.9122,0.9185,0.9072,0.9023,0.9024,0.7546,0.9059,0.9072,0.9023,0.9024,0.7546,0.9122,0.9103,0.9047,0.9115,0.9169,0.9122,0.9181,0.9072,0.9162,0.9169,0.9122,0.9194,0.9026,0.915,0.9169,0.9122,0.9189,0.9044,0.9113,0.9145,0.9122,0.9116,0.9024,0.913,0.9145,0.9122,0.9126,0.904,0.9126,0.9145,0.9122,0.9124
cifar10,svhn [ood],0.963,0.9584,0.9585,0.9563,0.961,0.963,0.9584,0.9585,0.9563,0.9343,0.9572,0.9458,0.9402,0.9421,0.8246,0.944,0.9458,0.9402,0.9421,0.8246,0.9343,0.9423,0.9426,0.9565,0.963,0.9343,0.9573,0.9458,0.9631,0.963,0.9343,0.9591,0.9397,0.9619,0.963,0.9343,0.9584,0.942,0.9551,0.9585,0.9343,0.9558,0.9421,0.9577,0.9585,0.9343,0.957,0.9418,0.9571,0.9585,0.9343,0.9567
cifar10,tiny_imagenet [ood],0.9114,0.9086,0.9087,0.9072,0.9101,0.9114,0.9086,0.9087,0.9072,0.9103,0.914,0.8987,0.8932,0.8931,0.7524,0.8972,0.8987,0.8932,0.8931,0.7524,0.9103,0.9042,0.8957,0.9036,0.9114,0.9103,0.9123,0.8987,0.9095,0.9114,0.9103,0.9141,0.8935,0.9079,0.9114,0.9103,0.9134,0.8956,0.9036,0.9087,0.9103,0.904,0.8931,0.9058,0.9087,0.9103,0.9052,0.8944,0.9052,0.9087,0.9103,0.9049
cifar100,cifar10 [ood],0.7733,0.773,0.7734,0.7722,0.7734,0.7733,0.773,0.7734,0.7722,0.5348,0.7681,0.7367,0.6813,0.7216,0.6891,0.7206,0.7367,0.6813,0.7216,0.6891,0.5348,0.6928,0.7245,0.774,0.7733,0.5348,0.7649,0.7367,0.7745,0.7733,0.5348,0.7667,0.718,0.7742,0.7733,0.5348,0.7652,0.7172,0.7735,0.7734,0.5348,0.7725,0.7216,0.7744,0.7734,0.5348,0.7722,0.7147,0.774,0.7734,0.5348,0.7717
cifar100,svhn [ood],0.8701,0.8583,0.861,0.8559,0.8659,0.8701,0.8583,0.861,0.8559,0.6788,0.8677,0.7763,0.6621,0.7312,0.7057,0.7396,0.7763,0.6621,0.7312,0.7057,0.6788,0.7247,0.7558,0.8677,0.8701,0.6788,0.8717,0.7763,0.8702,0.8701,0.6788,0.8745,0.7499,0.8703,0.8701,0.6788,0.8738,0.7234,0.8526,0.861,0.6788,0.8512,0.7312,0.8567,0.861,0.6788,0.8484,0.7186,0.8546,0.861,0.6788,0.8475
cifar100,tiny_imagenet [ood],0.8099,0.7895,0.8063,0.803,0.8054,0.8099,0.7895,0.8063,0.803,0.6229,0.8046,0.9999,0.9534,0.9944,0.9763,0.9999,0.9999,0.9534,0.9944,0.9763,0.6229,0.9996,0.9999,1.0,0.8099,0.6229,1.0,0.9999,0.926,0.8099,0.6229,0.9994,0.9999,0.9797,0.8099,0.6229,1.0,0.9915,0.993,0.8063,0.6229,0.9967,0.9944,0.9371,0.8063,0.6229,0.9989,0.9895,0.9793,0.8063,0.6229,0.9998
tiny_imagenet,imagenet_a [ood],0.8354,0.8272,0.8301,0.8263,0.8331,0.8354,0.8272,0.8301,0.8263,0.441,0.8324,0.8014,0.6511,0.7319,0.7129,0.755,0.8014,0.6511,0.7319,0.7129,0.441,0.7293,0.7811,0.8463,0.8354,0.441,0.8469,0.8014,0.841,0.8354,0.441,0.8466,0.772,0.8427,0.8354,0.441,0.8462,0.7212,0.8352,0.8301,0.441,0.8332,0.7319,0.8345,0.8301,0.441,0.8329,0.7154,0.8354,0.8301,0.441,0.8333
tiny_imagenet,imagenet_o [ood],0.7243,0.721,0.7226,0.7212,0.7234,0.7243,0.721,0.7226,0.7212,0.5127,0.7277,0.7552,0.6911,0.7236,0.7005,0.7368,0.7552,0.6911,0.7236,0.7005,0.5127,0.7213,0.7528,0.7535,0.7243,0.5127,0.7596,0.7552,0.7349,0.7243,0.5127,0.7514,0.7464,0.7406,0.7243,0.5127,0.7541,0.7211,0.7415,0.7226,0.5127,0.7426,0.7236,0.7358,0.7226,0.5127,0.7429,0.7189,0.7406,0.7226,0.5127,0.7457
tiny_imagenet,imagenet_r [ood],0.8253,0.8162,0.8192,0.8155,0.8225,0.8253,0.8162,0.8192,0.8155,0.4048,0.8203,0.7933,0.657,0.7308,0.7122,0.7501,0.7933,0.657,0.7308,0.7122,0.4048,0.7238,0.7744,0.8366,0.8253,0.4048,0.8347,0.7933,0.831,0.8253,0.4048,0.8344,0.7644,0.8327,0.8253,0.4048,0.8339,0.7216,0.8253,0.8192,0.4048,0.8236,0.7308,0.8241,0.8192,0.4048,0.8231,0.7163,0.8254,0.8192,0.4048,0.8238




MISCLASSIFICATION DETECTION - CONCATENATED TABLE
------------------------------------------------------------
Shape: (3, 52)
Index: ['ind_dataset', 'eval']
Columns (52):

  BAYES_ALL_OUTER (5 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER
    - C_COMPOSITE_BAYES_ALL_OUTER

  BAYES_ALL_OUTER_PLUS_M (6 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M

  EXCESS_ALL_OUTER_INNER (5 columns):
    - R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - C_COMPOSITE_EXCESS_ALL_OUTER

Unnamed: 0_level_0,measure,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER,C_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER,C_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,M_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,C_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_t 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_e 1 2 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,R_t 1 2 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,R_e 1 3 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,R_t 1 3 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,R_e 1 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,R_t 1 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,R_b 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,M_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,C_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,R_e 1 2 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,R_t 1 2 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,R_b 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,M_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,C_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,R_e 1 3 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,R_t 1 3 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,R_b 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,M_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,C_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1
cifar10,cifar10 [miscls],0.9423,0.9425,0.9422,0.9418,0.9423,0.9423,0.9425,0.9422,0.9418,0.9276,0.942,0.9416,0.9424,0.9419,0.7967,0.9426,0.9416,0.9424,0.9419,0.7967,0.9276,0.9418,0.9404,0.9432,0.9423,0.9276,0.9439,0.9416,0.9449,0.9423,0.9276,0.9441,0.9365,0.9422,0.9423,0.9276,0.942,0.9431,0.9452,0.9422,0.9276,0.9452,0.9419,0.9458,0.9422,0.9276,0.9457,0.9424,0.9456,0.9422,0.9276,0.9456
cifar100,cifar100 [miscls],0.8451,0.8578,0.8562,0.859,0.8527,0.8451,0.8578,0.8562,0.859,0.5739,0.8527,0.8315,0.7826,0.8372,0.8063,0.8289,0.8315,0.7826,0.8372,0.8063,0.5739,0.8034,0.818,0.8531,0.8451,0.5739,0.8489,0.8315,0.8501,0.8451,0.5739,0.8502,0.8042,0.8488,0.8451,0.5739,0.8457,0.8298,0.8679,0.8562,0.5739,0.8677,0.8372,0.8656,0.8562,0.5739,0.8691,0.8274,0.8671,0.8562,0.5739,0.8685
tiny_imagenet,tiny_imagenet [miscls],0.8447,0.8547,0.8532,0.8547,0.8506,0.8447,0.8547,0.8532,0.8547,0.4168,0.8511,0.8315,0.7542,0.8188,0.8008,0.8192,0.8315,0.7542,0.8188,0.8008,0.4168,0.7946,0.8131,0.8508,0.8447,0.4168,0.8466,0.8315,0.8497,0.8447,0.4168,0.8493,0.7908,0.8465,0.8447,0.4168,0.8419,0.8086,0.8647,0.8532,0.4168,0.8643,0.8188,0.8634,0.8532,0.4168,0.8657,0.8052,0.8645,0.8532,0.4168,0.8647




SELECTIVE PREDICTION - CONCATENATED TABLE
------------------------------------------------------------
Shape: (3, 52)
Index: ['ind_dataset', 'eval']
Columns (52):

  BAYES_ALL_OUTER (5 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER
    - C_COMPOSITE_BAYES_ALL_OUTER

  BAYES_ALL_OUTER_PLUS_M (6 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M

  EXCESS_ALL_OUTER_INNER (5 columns):
    - R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - C_COMPOSITE_EXCESS_ALL_OUTER_INNER


Unnamed: 0_level_0,measure,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER,C_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER,C_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,M_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,C_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_t 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_e 1 2 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,R_t 1 2 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_INNER_PLUS_M,R_e 1 3 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,R_t 1 3 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_CENTRAL_PLUS_M,R_e 1 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,R_t 1 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,R_b 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,M_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,C_COMPOSITE_EAT_SPHERICAL_OUTER_OUTER_PLUS_M,R_e 1 2 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,R_t 1 2 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,R_b 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,M_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,C_COMPOSITE_EAT_SPHERICAL_OUTER_INNER_PLUS_M,R_e 1 3 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,R_t 1 3 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,R_b 1 (S)_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,M_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M,C_COMPOSITE_EAT_SPHERICAL_OUTER_CENTRAL_PLUS_M
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1
cifar10,cifar10 [selective],0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9963,0.9969,0.9969,0.9969,0.9967,0.9828,0.9969,0.9969,0.9969,0.9967,0.9828,0.9963,0.9968,0.9968,0.9969,0.9968,0.9963,0.9969,0.9969,0.9969,0.9968,0.9963,0.9969,0.9967,0.9968,0.9968,0.9963,0.9969,0.9969,0.9969,0.9968,0.9963,0.9969,0.9967,0.997,0.9968,0.9963,0.997,0.9968,0.997,0.9968,0.9963,0.997
cifar100,cifar100 [selective],0.9159,0.9202,0.9197,0.9206,0.9186,0.9159,0.9202,0.9197,0.9206,0.8106,0.9173,0.9133,0.9,0.9162,0.8789,0.913,0.9133,0.9,0.9162,0.8789,0.8106,0.9013,0.9097,0.9184,0.9159,0.8106,0.9154,0.9133,0.9175,0.9159,0.8106,0.9158,0.9056,0.9171,0.9159,0.8106,0.9144,0.9141,0.9231,0.9197,0.8106,0.9231,0.9162,0.9225,0.9197,0.8106,0.9234,0.9135,0.9229,0.9197,0.8106,0.9233
tiny_imagenet,tiny_imagenet [selective],0.8889,0.8927,0.8921,0.8926,0.8912,0.8889,0.8927,0.8921,0.8926,0.6598,0.889,0.8853,0.86,0.8825,0.8528,0.882,0.8853,0.86,0.8825,0.8528,0.6598,0.8666,0.8792,0.8909,0.8889,0.6598,0.8863,0.8853,0.8907,0.8889,0.6598,0.8874,0.8717,0.8893,0.8889,0.6598,0.8846,0.8793,0.8961,0.8921,0.6598,0.8959,0.8825,0.8957,0.8921,0.6598,0.8964,0.8779,0.896,0.8921,0.6598,0.8961





In [21]:
# Access individual concatenated tables
ood_concatenated = concatenated_results['ood_detection']
miscls_concatenated = concatenated_results['misclassification_detection']
selective_concatenated = concatenated_results['selective_prediction']

print("INDIVIDUAL ACCESS TO CONCATENATED TABLES")
print("=" * 50)
print("You can now access each concatenated table individually:")
print()
print("# OOD Detection (all composite measures side-by-side)")
print("ood_concatenated = concatenated_results['ood_detection']")
print(f"Shape: {ood_concatenated.shape if not ood_concatenated.empty else 'Empty'}")
print()
print("# Misclassification Detection (all composite measures side-by-side)")  
print("miscls_concatenated = concatenated_results['misclassification_detection']")
print(f"Shape: {miscls_concatenated.shape if not miscls_concatenated.empty else 'Empty'}")
print()
print("# Selective Prediction (all composite measures side-by-side)")
print("selective_concatenated = concatenated_results['selective_prediction']")
print(f"Shape: {selective_concatenated.shape if not selective_concatenated.empty else 'Empty'}")
print()
print("Each table has:")
print("- Rows: (ind_dataset, eval) pairs")
print("- Columns: Individual measures + Comp, grouped by composite measure")
print("- All composite measures are side-by-side in the same table")


INDIVIDUAL ACCESS TO CONCATENATED TABLES
You can now access each concatenated table individually:

# OOD Detection (all composite measures side-by-side)
ood_concatenated = concatenated_results['ood_detection']
Shape: (9, 52)

# Misclassification Detection (all composite measures side-by-side)
miscls_concatenated = concatenated_results['misclassification_detection']
Shape: (3, 52)

# Selective Prediction (all composite measures side-by-side)
selective_concatenated = concatenated_results['selective_prediction']
Shape: (3, 52)

Each table has:
- Rows: (ind_dataset, eval) pairs
- Columns: Individual measures + Comp, grouped by composite measure
- All composite measures are side-by-side in the same table


In [22]:
# Create separate pandas DataFrames for each problem type

# Convert each problem type table to a pandas DataFrame
problem_dataframes = {}
for problem_type in results['final_tables'].keys():
    problem_dataframes[problem_type] = pd.DataFrame(results['final_tables'][problem_type])
    print(f"\n{problem_type.upper()} Problem DataFrame:")
    print(f"Shape: {problem_dataframes[problem_type].shape}")
    print(problem_dataframes[problem_type])


OOD_DETECTION Problem DataFrame:
Shape: (9, 52)
measure                      R_b 1 (L)_v1  R_b 1 (B)_v1  R_b 1 (S)_v1  \
ind_dataset   ood_dataset                                               
cifar10       cifar100           0.916906      0.914436      0.914506   
              svhn               0.962992      0.958357      0.958530   
              tiny_imagenet      0.911364      0.908611      0.908679   
cifar100      cifar10            0.773270      0.773000      0.773388   
              svhn               0.870120      0.858293      0.860970   
              tiny_imagenet      0.809888      0.789521      0.806267   
tiny_imagenet imagenet_a         0.835350      0.827188      0.830074   
              imagenet_o         0.724312      0.720970      0.722622   
              imagenet_r         0.825339      0.816241      0.819225   

measure                      R_b 1 (Z)_v1      C_v1  R_b 1 (L)_v2  \
ind_dataset   ood_dataset                                           
cifar10  

In [23]:
problem_dataframes['ood_detection']

Unnamed: 0_level_0,measure,R_b 1 (L)_v1,R_b 1 (B)_v1,R_b 1 (S)_v1,R_b 1 (Z)_v1,C_v1,R_b 1 (L)_v2,R_b 1 (B)_v2,R_b 1 (S)_v2,R_b 1 (Z)_v2,M_v2,C_v2,R_e 1 2 (L)_v3,R_e 1 2 (B)_v3,R_e 1 2 (S)_v3,R_e 1 2 (Z)_v3,C_v3,R_e 1 2 (L)_v4,R_e 1 2 (B)_v4,R_e 1 2 (S)_v4,R_e 1 2 (Z)_v4,M_v4,C_v4,R_e 1 1 (L)_v5,R_t 1 1 (L)_v5,R_b 1 (L)_v5,M_v5,C_v5,R_e 1 2 (L)_v6,R_t 1 2 (L)_v6,R_b 1 (L)_v6,M_v6,C_v6,R_e 1 3 (L)_v7,R_t 1 3 (L)_v7,R_b 1 (L)_v7,M_v7,C_v7,R_e 1 1 (S)_v8,R_t 1 1 (S)_v8,R_b 1 (S)_v8,M_v8,C_v8,R_e 1 2 (S)_v9,R_t 1 2 (S)_v9,R_b 1 (S)_v9,M_v9,C_v9,R_e 1 3 (S)_v10,R_t 1 3 (S)_v10,R_b 1 (S)_v10,M_v10,C_v10
ind_dataset,ood_dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1
cifar10,cifar100,0.916906,0.914436,0.914506,0.913191,0.915758,0.916906,0.914436,0.914506,0.913191,0.912238,0.918528,0.907203,0.902342,0.902404,0.754648,0.905882,0.907203,0.902342,0.902404,0.754648,0.912238,0.910281,0.90473,0.911507,0.916906,0.912238,0.918087,0.907203,0.916237,0.916906,0.912238,0.919435,0.902604,0.915012,0.916906,0.912238,0.918911,0.904375,0.911267,0.914506,0.912238,0.911576,0.902404,0.913049,0.914506,0.912238,0.912573,0.903989,0.91258,0.914506,0.912238,0.912362
cifar10,svhn,0.962992,0.958357,0.95853,0.956294,0.960983,0.962992,0.958357,0.95853,0.956294,0.934311,0.957215,0.945786,0.940173,0.942129,0.824603,0.943957,0.945786,0.940173,0.942129,0.824603,0.934311,0.942297,0.942616,0.956506,0.962992,0.934311,0.957326,0.945786,0.96315,0.962992,0.934311,0.959062,0.939684,0.961888,0.962992,0.934311,0.958395,0.941999,0.955074,0.95853,0.934311,0.955774,0.942129,0.957686,0.95853,0.934311,0.957044,0.941816,0.957069,0.95853,0.934311,0.956739
cifar10,tiny_imagenet,0.911364,0.908611,0.908679,0.90721,0.910094,0.911364,0.908611,0.908679,0.90721,0.910273,0.914034,0.898685,0.893229,0.893123,0.752376,0.897247,0.898685,0.893229,0.893123,0.752376,0.910273,0.904191,0.895731,0.903615,0.911364,0.910273,0.912274,0.898685,0.909549,0.911364,0.910273,0.914107,0.893504,0.907927,0.911364,0.910273,0.913435,0.895592,0.903609,0.908679,0.910273,0.903958,0.893123,0.905816,0.908679,0.910273,0.905217,0.894407,0.905196,0.908679,0.910273,0.904926
cifar100,cifar10,0.77327,0.773,0.773388,0.772177,0.773419,0.77327,0.773,0.773388,0.772177,0.534822,0.768139,0.736746,0.681292,0.72158,0.689059,0.720619,0.736746,0.681292,0.72158,0.689059,0.534822,0.69281,0.724548,0.774023,0.77327,0.534822,0.764887,0.736746,0.774538,0.77327,0.534822,0.766726,0.718033,0.774192,0.77327,0.534822,0.765247,0.717172,0.773545,0.773388,0.534822,0.772483,0.72158,0.774385,0.773388,0.534822,0.772186,0.714663,0.773969,0.773388,0.534822,0.771673
cifar100,svhn,0.87012,0.858293,0.86097,0.855937,0.865872,0.87012,0.858293,0.86097,0.855937,0.678832,0.867749,0.77634,0.662081,0.731242,0.705718,0.739645,0.77634,0.662081,0.731242,0.705718,0.678832,0.724697,0.755848,0.867727,0.87012,0.678832,0.871659,0.77634,0.870178,0.87012,0.678832,0.874491,0.749942,0.870307,0.87012,0.678832,0.873782,0.723351,0.852639,0.86097,0.678832,0.851173,0.731242,0.856738,0.86097,0.678832,0.848436,0.718591,0.854648,0.86097,0.678832,0.847529
cifar100,tiny_imagenet,0.809888,0.789521,0.806267,0.803022,0.805415,0.809888,0.789521,0.806267,0.803022,0.622941,0.804597,0.999942,0.953406,0.994416,0.976322,0.999854,0.999942,0.953406,0.994416,0.976322,0.622941,0.999611,0.999852,0.999997,0.809888,0.622941,0.999998,0.999942,0.926019,0.809888,0.622941,0.999382,0.9999,0.979732,0.809888,0.622941,0.999974,0.991516,0.992987,0.806267,0.622941,0.996683,0.994416,0.937085,0.806267,0.622941,0.998886,0.989533,0.979339,0.806267,0.622941,0.999849
tiny_imagenet,imagenet_a,0.83535,0.827188,0.830074,0.826338,0.833061,0.83535,0.827188,0.830074,0.826338,0.440974,0.832422,0.801424,0.651126,0.731905,0.712905,0.754996,0.801424,0.651126,0.731905,0.712905,0.440974,0.729263,0.78113,0.846305,0.83535,0.440974,0.846915,0.801424,0.840955,0.83535,0.440974,0.84665,0.771967,0.842723,0.83535,0.440974,0.84622,0.721152,0.835164,0.830074,0.440974,0.833246,0.731905,0.834463,0.830074,0.440974,0.832908,0.715396,0.835404,0.830074,0.440974,0.83325
tiny_imagenet,imagenet_o,0.724312,0.72097,0.722622,0.721199,0.723431,0.724312,0.72097,0.722622,0.721199,0.512686,0.727679,0.755245,0.691078,0.723552,0.7005,0.736824,0.755245,0.691078,0.723552,0.7005,0.512686,0.721285,0.752832,0.753524,0.724312,0.512686,0.759562,0.755245,0.734891,0.724312,0.512686,0.751435,0.74641,0.740633,0.724312,0.512686,0.7541,0.721061,0.741544,0.722622,0.512686,0.74256,0.723552,0.735803,0.722622,0.512686,0.742889,0.718873,0.740566,0.722622,0.512686,0.745659
tiny_imagenet,imagenet_r,0.825339,0.816241,0.819225,0.815484,0.822548,0.825339,0.816241,0.819225,0.815484,0.404755,0.820272,0.793349,0.65699,0.730844,0.712239,0.750137,0.793349,0.65699,0.730844,0.712239,0.404755,0.723755,0.774366,0.836572,0.825339,0.404755,0.834687,0.793349,0.83095,0.825339,0.404755,0.834365,0.76438,0.832725,0.825339,0.404755,0.833866,0.721591,0.825256,0.819225,0.404755,0.823563,0.730844,0.824133,0.819225,0.404755,0.82311,0.716317,0.825429,0.819225,0.404755,0.823839
