In [1]:
import sys
sys.path.insert(0, "../")

import pandas as pd
import numpy as np
from pathlib import Path
from typing import Dict, List, Tuple, Optional

from mdu.eval.table_analysis_utils import (
    transform_by_tasks,
    select_composite_and_components,
    check_composite_dominance,
    compute_average_ranks,
    analyze_composite_pareto_performance,
)
from configs.interesting_compositions import INTERESTING_COMPOSITIONS

# Set pandas display options to show all columns
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.3.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/Users/nikita/Programming/multidimensional_uncertainty/.venv/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/nikita/Programming/multidimensional_uncertainty/.venv/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/nikita/Programming/multidimensional_uncertainty/.venv/lib

In [2]:
def load_config_file(eps: float, grid_size: int, n_targets_multiplier: int, 
                     target: str, scaler_type: str = "global_scaler") -> pd.DataFrame:
    """
    Load a specific config file based on hyperparameters.
    
    Args:
        eps: Epsilon value
        grid_size: Grid size
        n_targets_multiplier: N targets multiplier  
        target: Target type (exp or beta)
        scaler_type: Scaler type (global_scaler, mahalanobis, or none)
    
    Returns:
        DataFrame with the loaded config data
    """
    # Build filename
    scaler_suffix = f"_{scaler_type}" if scaler_type != "none" else ""
    filename = (f"extended_benchmark_entropic_target_{target}_eps_{eps}_iters_150_"
                f"tol_1e-06_rs_42_grid_size_{grid_size}_n_targets_multiplier_{n_targets_multiplier}{scaler_suffix}.csv")
    
    filepath = Path("../resources/extended_benchmark") / filename
    
    if not filepath.exists():
        raise FileNotFoundError(f"Config file not found: {filepath}")
    
    print(f"Loading config file: {filename}")
    df = pd.read_csv(filepath)
    print(f"Loaded {len(df)} rows, {len(df.columns)} columns")
    
    return df


In [3]:
def shorten_column_names(column_name: str) -> str:
    """
    Shorten column names for better presentation.
    
    Args:
        column_name: Original column name
        
    Returns:
        Shortened column name
    """
    # Handle composite columns
    if column_name.lower().startswith('composite'):
        return 'C'
    
    # Replace score names with shortcuts
    shortened = column_name
    shortened = shortened.replace('Logscore', 'L')
    shortened = shortened.replace('Brier', 'B')
    shortened = shortened.replace('Spherical', 'S')
    shortened = shortened.replace('Zero-one', 'Z')
    shortened = shortened.replace('mahalanobis', 'M')
    
    return shortened


def create_problem_specific_tables(transformed_df: pd.DataFrame, composite_name: str) -> Dict[str, pd.DataFrame]:
    """
    Create separate tables for each problem type: OOD detection, misclassification detection, and selective prediction.
    
    Args:
        transformed_df: Transformed DataFrame from transform_by_tasks
        composite_name: Name of the composite measure to analyze
    
    Returns:
        Dictionary with tables for each problem type
    """
    # Get composite and components data
    composite_df = select_composite_and_components(transformed_df, composite_name)
    
    # Reset index to access ind_dataset and eval columns
    df_reset = composite_df.reset_index()
    
    # Initialize result dictionary
    tables = {}
    
    # Define problem type patterns
    problem_patterns = {
        'ood_detection': '[ood]',
        'misclassification_detection': '[miscls]', 
        'selective_prediction': '[selective]'
    }
    
    for problem_type, pattern in problem_patterns.items():
        # Filter rows for this problem type
        mask = df_reset['eval'].str.contains(pattern, na=False)
        print(pattern)
        problem_df = df_reset[mask].copy()

        display(problem_df)
        
        print(f"Debug - {problem_type}: Found {len(problem_df)} rows with pattern '{pattern}'")
        if len(problem_df) > 0:
            print(f"  Sample eval values: {problem_df['eval'].unique()[:5]}")
        
        if problem_df.empty:
            print(f"Warning: No data found for {problem_type}")
            tables[problem_type] = pd.DataFrame()
            continue
        
        # Set index back to (ind_dataset, eval)
        problem_df = problem_df.set_index(['ind_dataset', 'eval'])
        
        # Identify composite and component columns
        composite_cols = [c for c in problem_df.columns if c.startswith('composite')]
        component_cols = [c for c in problem_df.columns if not c.startswith('composite')]
        
        # Reorder columns: components first, then composite
        ordered_cols = component_cols + composite_cols
        problem_df = problem_df[ordered_cols]
        
        # Apply shortened column names
        shortened_cols = {col: shorten_column_names(col) for col in problem_df.columns}
        problem_df = problem_df.rename(columns=shortened_cols)
        
        # Remove any duplicate rows based on the index
        problem_df = problem_df[~problem_df.index.duplicated(keep='first')]
        
        tables[problem_type] = problem_df
        
        print(f"{problem_type.replace('_', ' ').title()}: {len(problem_df)} rows, {len(problem_df.columns)} measures")
    
    return tables



def create_final_problem_tables(config_results_list: List[Dict]) -> Dict[str, pd.DataFrame]:
    """
    Create final tables for each problem type with rows as (ind_dataset, ood_dataset) 
    and columns stacked from all configurations.
    
    Args:
        config_results_list: List of results from analyze_specific_config
    
    Returns:
        Dictionary with final tables for each problem type
    """
    final_tables = {
        'ood_detection': [],
        'misclassification_detection': [],
        'selective_prediction': []
    }
    
    # Process each configuration result
    for result in config_results_list:
        config = result['config']
        tables = result['tables']
        
        for problem_type in final_tables.keys():
            if problem_type in tables and not tables[problem_type].empty:
                table = tables[problem_type].copy()
                
                # Reset index to get ind_dataset and eval as columns
                table_reset = table.reset_index()
                
                # Keep all metric columns as they are (with shortened names)
                metric_cols = [c for c in table.columns]
                
                if problem_type == 'ood_detection':
                    # For OOD detection: ind_dataset and ood_dataset are different
                    table_reset['ind_dataset'] = table_reset['ind_dataset']
                    table_reset['ood_dataset'] = table_reset['eval'].str.extract(r'^([^[]+)')[0].str.strip()
                    final_cols = ['ind_dataset', 'ood_dataset'] + metric_cols
                else:
                    # For selective prediction and misclassification detection: 
                    # ind_dataset and eval_dataset are the same (just use ind_dataset)
                    table_reset['ind_dataset'] = table_reset['ind_dataset']
                    table_reset['eval_dataset'] = table_reset['ind_dataset']  # Same as ind_dataset
                    final_cols = ['ind_dataset', 'eval_dataset'] + metric_cols
                
                table_final = table_reset[final_cols].copy()
                
                # Remove duplicates - keep only unique rows
                if problem_type == 'ood_detection':
                    table_final = table_final.drop_duplicates(subset=['ind_dataset', 'ood_dataset'])
                else:
                    table_final = table_final.drop_duplicates(subset=['ind_dataset', 'eval_dataset'])
                
                print(f"Debug - Adding table for {problem_type}: shape {table_final.shape} (after deduplication)")
                final_tables[problem_type].append(table_final)
    
    # Combine all configurations for each problem type by concatenating columns
    combined_tables = {}
    for problem_type, table_list in final_tables.items():
        if table_list:
            # Determine the key columns based on problem type
            if problem_type == 'ood_detection':
                key_cols = ['ind_dataset', 'ood_dataset']
            else:
                key_cols = ['ind_dataset', 'eval_dataset']
            
            # Start with the first table
            combined = table_list[0]
            
            # Add columns from subsequent tables
            for i, table in enumerate(table_list[1:], 1):
                # Get metric columns (exclude key columns)
                metric_cols = [c for c in table.columns if c not in key_cols]
                
                # Rename metric columns to avoid conflicts by adding config index
                table_to_merge = table[key_cols + metric_cols].copy()
                rename_dict = {col: f"{col}_v{i+1}" for col in metric_cols}
                table_to_merge = table_to_merge.rename(columns=rename_dict)
                
                # Also rename columns in the first table if this is the first merge
                if i == 1:
                    first_metric_cols = [c for c in combined.columns if c not in key_cols]
                    first_rename_dict = {col: f"{col}_v1" for col in first_metric_cols}
                    combined = combined.rename(columns=first_rename_dict)
                
                # Merge on key columns, adding new metric columns
                combined = pd.merge(combined, table_to_merge, 
                                  on=key_cols, how='outer')
            
            # Set index
            combined = combined.set_index(key_cols)
            combined_tables[problem_type] = combined
            print(f"Debug - Final {problem_type} table shape: {combined.shape}")
        else:
            combined_tables[problem_type] = pd.DataFrame()
            print(f"Debug - No data for {problem_type}")
    
    return combined_tables


def analyze_multiple_configs(config_list: List[Dict], selective_metric: str = "acc_cov_auc") -> Dict:
    """
    Analyze multiple configurations and return results as DataFrames.
    
    Args:
        config_list: List of dictionaries with config parameters and composite_name
        selective_metric: Metric for selective prediction
    
    Returns:
        Dictionary with results and final tables
    """
    results = []
    
    print(f"Analyzing {len(config_list)} configurations...")
    print("=" * 80)
    
    for i, config in enumerate(config_list):
        print(f"\nConfiguration {i+1}/{len(config_list)}:")
        print(f"  eps={config['eps']}, grid_size={config['grid_size']}, n_targets_multiplier={config['n_targets_multiplier']}")
        print(f"  target={config['target']}, scaler_type={config['scaler_type']}")
        print(f"  composite_name={config['composite_name']}")
        
        try:
            # Load and transform data
            df = load_config_file(
                config['eps'], config['grid_size'], config['n_targets_multiplier'], 
                config['target'], config.get('scaler_type', 'global_scaler')
            )
            
            transformed_df = transform_by_tasks(df, selective_metric=selective_metric)
            tables = create_problem_specific_tables(transformed_df, config['composite_name'])
            
            # Store result
            result = {
                'config': config,
                'raw_df': df,
                'transformed_df': transformed_df, 
                'tables': tables
            }
            results.append(result)
            
            print(f"  ✓ Successfully processed")
            
        except Exception as e:
            print(f"  ✗ Error processing configuration: {e}")
            continue
    
    print(f"\nSuccessfully processed {len(results)}/{len(config_list)} configurations")
    
    # Create final combined tables
    print("\nCreating final combined tables...")
    final_tables = create_final_problem_tables(results)
    
    return {
        'individual_results': results,
        'final_tables': final_tables,
        'config_list': config_list
    }


In [4]:
def display_tables_with_formatting(tables: Dict[str, pd.DataFrame], composite_name: str):
    """
    Display the tables with nice formatting and separators between components and composite.
    
    Args:
        tables: Dictionary of tables from create_problem_specific_tables
        composite_name: Name of the composite measure being analyzed
    """
    print("=" * 100)
    print(f"ANALYSIS FOR COMPOSITE MEASURE: {composite_name}")
    print("=" * 100)
    
    for problem_type, table in tables.items():
        if table.empty:
            continue
            
        print(f"\n{'-' * 60}")
        print(f"{problem_type.replace('_', ' ').upper()} TABLE")
        print(f"{'-' * 60}")
        
        # Identify composite and component columns (using shortened names)
        composite_cols = [c for c in table.columns if c.lower().startswith('comp')]
        component_cols = [c for c in table.columns if not c.lower().startswith('comp')]
        
        print(f"Component measures ({len(component_cols)}): {', '.join(component_cols)}")
        if composite_cols:
            print(f"Composite measure ({len(composite_cols)}): {', '.join(composite_cols)}")
        
        print("\nTable:")
        print(table.round(4))
        
        # Show summary statistics
        print(f"\nSummary Statistics for {problem_type.replace('_', ' ').title()}:")
        print("Mean values:")
        mean_values = table.mean()
        for col in component_cols + composite_cols:
            if col in mean_values:
                print(f"  {col}: {mean_values[col]:.4f}")
        
        print(f"\nStandard deviations:")
        std_values = table.std()
        for col in component_cols + composite_cols:
            if col in std_values:
                print(f"  {col}: {std_values[col]:.4f}")
    
    print("\n" + "=" * 100)


## Example Analysis

Now let's analyze a specific configuration. You can modify the parameters below to analyze different configurations:


In [5]:
config_list = [
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'global_scaler',
        'composite_name': 'COMPOSITE BAYES ALL OUTER'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'beta',
        'scaler_type': 'none',
        'composite_name': 'COMPOSITE BAYES ALL OUTER + M'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'global_scaler',
        'composite_name': 'COMPOSITE EXCESS ALL OUTER INNER'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'none',
        'composite_name': 'COMPOSITE EXCESS ALL OUTER INNER + M'
    },
    {
        'eps': 0.5,
        'grid_size': 5,
        'n_targets_multiplier': 1,
        'target': 'beta',
        'scaler_type': 'none',
        'composite_name': 'COMPOSITE EAT LOGSCORE OUTER OUTER + M'
    }
]

In [6]:
# Run the analysis for all configurations
print("hey")
results = analyze_multiple_configs(config_list)


hey
Analyzing 5 configurations...

Configuration 1/5:
  eps=0.5, grid_size=5, n_targets_multiplier=1
  target=exp, scaler_type=global_scaler
  composite_name=COMPOSITE BAYES ALL OUTER
Loading config file: extended_benchmark_entropic_target_exp_eps_0.5_iters_150_tol_1e-06_rs_42_grid_size_5_n_targets_multiplier_1_global_scaler.csv
Loaded 7680 rows, 23 columns
[ood]


measure,ind_dataset,eval,R_b 1 (Logscore),R_b 1 (Brier),R_b 1 (Spherical),R_b 1 (Zero-one),composite bayes all outer
2,cifar10,cifar100 [ood],0.916906,0.914436,0.914506,0.913191,0.915758
3,cifar10,svhn [ood],0.962992,0.958357,0.95853,0.956294,0.960983
4,cifar10,tiny_imagenet [ood],0.911364,0.908611,0.908679,0.90721,0.910094
5,cifar100,cifar10 [ood],0.77327,0.773,0.773388,0.772177,0.773419
8,cifar100,svhn [ood],0.87012,0.858293,0.86097,0.855937,0.865872
9,cifar100,tiny_imagenet [ood],0.809888,0.789521,0.806267,0.803022,0.805415
12,tiny_imagenet,imagenet_a [ood],0.83535,0.827188,0.830074,0.826338,0.833061
13,tiny_imagenet,imagenet_r [ood],0.825339,0.816241,0.819225,0.815484,0.822548
14,tiny_imagenet,imagenet_o [ood],0.724312,0.72097,0.722622,0.721199,0.723431


Debug - ood_detection: Found 9 rows with pattern '[ood]'
  Sample eval values: ['cifar100 [ood]' 'svhn [ood]' 'tiny_imagenet [ood]' 'cifar10 [ood]'
 'imagenet_a [ood]']
Ood Detection: 9 rows, 5 measures
[miscls]


measure,ind_dataset,eval,R_b 1 (Logscore),R_b 1 (Brier),R_b 1 (Spherical),R_b 1 (Zero-one),composite bayes all outer
0,cifar10,cifar10 [miscls],0.942267,0.942466,0.942228,0.941776,0.942284
1,cifar10,cifar10 [selective],0.996818,0.996836,0.996827,0.99681,0.996822
2,cifar10,cifar100 [ood],0.916906,0.914436,0.914506,0.913191,0.915758
3,cifar10,svhn [ood],0.962992,0.958357,0.95853,0.956294,0.960983
4,cifar10,tiny_imagenet [ood],0.911364,0.908611,0.908679,0.90721,0.910094
5,cifar100,cifar10 [ood],0.77327,0.773,0.773388,0.772177,0.773419
6,cifar100,cifar100 [miscls],0.845094,0.857773,0.856181,0.858986,0.852723
7,cifar100,cifar100 [selective],0.915943,0.920203,0.919719,0.920576,0.91864
8,cifar100,svhn [ood],0.87012,0.858293,0.86097,0.855937,0.865872
9,cifar100,tiny_imagenet [ood],0.809888,0.789521,0.806267,0.803022,0.805415


Debug - misclassification_detection: Found 15 rows with pattern '[miscls]'
  Sample eval values: ['cifar10 [miscls]' 'cifar10 [selective]' 'cifar100 [ood]' 'svhn [ood]'
 'tiny_imagenet [ood]']
Misclassification Detection: 15 rows, 5 measures
[selective]


measure,ind_dataset,eval,R_b 1 (Logscore),R_b 1 (Brier),R_b 1 (Spherical),R_b 1 (Zero-one),composite bayes all outer
0,cifar10,cifar10 [miscls],0.942267,0.942466,0.942228,0.941776,0.942284
1,cifar10,cifar10 [selective],0.996818,0.996836,0.996827,0.99681,0.996822
2,cifar10,cifar100 [ood],0.916906,0.914436,0.914506,0.913191,0.915758
3,cifar10,svhn [ood],0.962992,0.958357,0.95853,0.956294,0.960983
4,cifar10,tiny_imagenet [ood],0.911364,0.908611,0.908679,0.90721,0.910094
5,cifar100,cifar10 [ood],0.77327,0.773,0.773388,0.772177,0.773419
6,cifar100,cifar100 [miscls],0.845094,0.857773,0.856181,0.858986,0.852723
7,cifar100,cifar100 [selective],0.915943,0.920203,0.919719,0.920576,0.91864
8,cifar100,svhn [ood],0.87012,0.858293,0.86097,0.855937,0.865872
9,cifar100,tiny_imagenet [ood],0.809888,0.789521,0.806267,0.803022,0.805415


Debug - selective_prediction: Found 15 rows with pattern '[selective]'
  Sample eval values: ['cifar10 [miscls]' 'cifar10 [selective]' 'cifar100 [ood]' 'svhn [ood]'
 'tiny_imagenet [ood]']
Selective Prediction: 15 rows, 5 measures
  ✓ Successfully processed

Configuration 2/5:
  eps=0.5, grid_size=5, n_targets_multiplier=1
  target=beta, scaler_type=none
  composite_name=COMPOSITE BAYES ALL OUTER + M
Loading config file: extended_benchmark_entropic_target_beta_eps_0.5_iters_150_tol_1e-06_rs_42_grid_size_5_n_targets_multiplier_1.csv
Loaded 7920 rows, 23 columns
[ood]


measure,ind_dataset,eval,R_b 1 (Logscore),R_b 1 (Brier),R_b 1 (Spherical),R_b 1 (Zero-one),mahalanobis,composite bayes all outer + m
2,cifar10,cifar100 [ood],0.916906,0.914436,0.914506,0.913191,0.912238,0.918528
3,cifar10,svhn [ood],0.962992,0.958357,0.95853,0.956294,0.934311,0.957215
4,cifar10,tiny_imagenet [ood],0.911364,0.908611,0.908679,0.90721,0.910273,0.914034
5,cifar100,cifar10 [ood],0.77327,0.773,0.773388,0.772177,0.534822,0.768139
8,cifar100,svhn [ood],0.87012,0.858293,0.86097,0.855937,0.678832,0.867749
9,cifar100,tiny_imagenet [ood],0.809888,0.789521,0.806267,0.803022,0.622941,0.804597
12,tiny_imagenet,imagenet_a [ood],0.83535,0.827188,0.830074,0.826338,0.440974,0.832422
13,tiny_imagenet,imagenet_r [ood],0.825339,0.816241,0.819225,0.815484,0.404755,0.820272
14,tiny_imagenet,imagenet_o [ood],0.724312,0.72097,0.722622,0.721199,0.512686,0.727679


Debug - ood_detection: Found 9 rows with pattern '[ood]'
  Sample eval values: ['cifar100 [ood]' 'svhn [ood]' 'tiny_imagenet [ood]' 'cifar10 [ood]'
 'imagenet_a [ood]']
Ood Detection: 9 rows, 6 measures
[miscls]


measure,ind_dataset,eval,R_b 1 (Logscore),R_b 1 (Brier),R_b 1 (Spherical),R_b 1 (Zero-one),mahalanobis,composite bayes all outer + m
0,cifar10,cifar10 [miscls],0.942267,0.942466,0.942228,0.941776,0.927621,0.941973
1,cifar10,cifar10 [selective],0.996818,0.996836,0.996827,0.99681,0.996266,0.996852
2,cifar10,cifar100 [ood],0.916906,0.914436,0.914506,0.913191,0.912238,0.918528
3,cifar10,svhn [ood],0.962992,0.958357,0.95853,0.956294,0.934311,0.957215
4,cifar10,tiny_imagenet [ood],0.911364,0.908611,0.908679,0.90721,0.910273,0.914034
5,cifar100,cifar10 [ood],0.77327,0.773,0.773388,0.772177,0.534822,0.768139
6,cifar100,cifar100 [miscls],0.845094,0.857773,0.856181,0.858986,0.573908,0.852722
7,cifar100,cifar100 [selective],0.915943,0.920203,0.919719,0.920576,0.81055,0.917283
8,cifar100,svhn [ood],0.87012,0.858293,0.86097,0.855937,0.678832,0.867749
9,cifar100,tiny_imagenet [ood],0.809888,0.789521,0.806267,0.803022,0.622941,0.804597


Debug - misclassification_detection: Found 15 rows with pattern '[miscls]'
  Sample eval values: ['cifar10 [miscls]' 'cifar10 [selective]' 'cifar100 [ood]' 'svhn [ood]'
 'tiny_imagenet [ood]']
Misclassification Detection: 15 rows, 6 measures
[selective]


measure,ind_dataset,eval,R_b 1 (Logscore),R_b 1 (Brier),R_b 1 (Spherical),R_b 1 (Zero-one),mahalanobis,composite bayes all outer + m
0,cifar10,cifar10 [miscls],0.942267,0.942466,0.942228,0.941776,0.927621,0.941973
1,cifar10,cifar10 [selective],0.996818,0.996836,0.996827,0.99681,0.996266,0.996852
2,cifar10,cifar100 [ood],0.916906,0.914436,0.914506,0.913191,0.912238,0.918528
3,cifar10,svhn [ood],0.962992,0.958357,0.95853,0.956294,0.934311,0.957215
4,cifar10,tiny_imagenet [ood],0.911364,0.908611,0.908679,0.90721,0.910273,0.914034
5,cifar100,cifar10 [ood],0.77327,0.773,0.773388,0.772177,0.534822,0.768139
6,cifar100,cifar100 [miscls],0.845094,0.857773,0.856181,0.858986,0.573908,0.852722
7,cifar100,cifar100 [selective],0.915943,0.920203,0.919719,0.920576,0.81055,0.917283
8,cifar100,svhn [ood],0.87012,0.858293,0.86097,0.855937,0.678832,0.867749
9,cifar100,tiny_imagenet [ood],0.809888,0.789521,0.806267,0.803022,0.622941,0.804597


Debug - selective_prediction: Found 15 rows with pattern '[selective]'
  Sample eval values: ['cifar10 [miscls]' 'cifar10 [selective]' 'cifar100 [ood]' 'svhn [ood]'
 'tiny_imagenet [ood]']
Selective Prediction: 15 rows, 6 measures
  ✓ Successfully processed

Configuration 3/5:
  eps=0.5, grid_size=5, n_targets_multiplier=1
  target=exp, scaler_type=global_scaler
  composite_name=COMPOSITE EXCESS ALL OUTER INNER
Loading config file: extended_benchmark_entropic_target_exp_eps_0.5_iters_150_tol_1e-06_rs_42_grid_size_5_n_targets_multiplier_1_global_scaler.csv
Loaded 7680 rows, 23 columns
[ood]


measure,ind_dataset,eval,R_e 1 2 (Logscore),R_e 1 2 (Brier),R_e 1 2 (Spherical),R_e 1 2 (Zero-one),composite excess all outer inner
2,cifar10,cifar100 [ood],0.907203,0.902342,0.902404,0.754648,0.905882
3,cifar10,svhn [ood],0.945786,0.940173,0.942129,0.824603,0.943957
4,cifar10,tiny_imagenet [ood],0.898685,0.893229,0.893123,0.752376,0.897247
5,cifar100,cifar10 [ood],0.736746,0.681292,0.72158,0.689059,0.720619
8,cifar100,svhn [ood],0.77634,0.662081,0.731242,0.705718,0.739645
9,cifar100,tiny_imagenet [ood],0.999942,0.953406,0.994416,0.976322,0.999854
12,tiny_imagenet,imagenet_a [ood],0.801424,0.651126,0.731905,0.712905,0.754996
13,tiny_imagenet,imagenet_r [ood],0.793349,0.65699,0.730844,0.712239,0.750137
14,tiny_imagenet,imagenet_o [ood],0.755245,0.691078,0.723552,0.7005,0.736824


Debug - ood_detection: Found 9 rows with pattern '[ood]'
  Sample eval values: ['cifar100 [ood]' 'svhn [ood]' 'tiny_imagenet [ood]' 'cifar10 [ood]'
 'imagenet_a [ood]']
Ood Detection: 9 rows, 5 measures
[miscls]


measure,ind_dataset,eval,R_e 1 2 (Logscore),R_e 1 2 (Brier),R_e 1 2 (Spherical),R_e 1 2 (Zero-one),composite excess all outer inner
0,cifar10,cifar10 [miscls],0.941645,0.942365,0.941898,0.796675,0.942578
1,cifar10,cifar10 [selective],0.996886,0.996913,0.996698,0.982762,0.996921
2,cifar10,cifar100 [ood],0.907203,0.902342,0.902404,0.754648,0.905882
3,cifar10,svhn [ood],0.945786,0.940173,0.942129,0.824603,0.943957
4,cifar10,tiny_imagenet [ood],0.898685,0.893229,0.893123,0.752376,0.897247
5,cifar100,cifar10 [ood],0.736746,0.681292,0.72158,0.689059,0.720619
6,cifar100,cifar100 [miscls],0.831543,0.782572,0.83724,0.806319,0.828869
7,cifar100,cifar100 [selective],0.913284,0.899957,0.916155,0.878853,0.913014
8,cifar100,svhn [ood],0.77634,0.662081,0.731242,0.705718,0.739645
9,cifar100,tiny_imagenet [ood],0.999942,0.953406,0.994416,0.976322,0.999854


Debug - misclassification_detection: Found 15 rows with pattern '[miscls]'
  Sample eval values: ['cifar10 [miscls]' 'cifar10 [selective]' 'cifar100 [ood]' 'svhn [ood]'
 'tiny_imagenet [ood]']
Misclassification Detection: 15 rows, 5 measures
[selective]


measure,ind_dataset,eval,R_e 1 2 (Logscore),R_e 1 2 (Brier),R_e 1 2 (Spherical),R_e 1 2 (Zero-one),composite excess all outer inner
0,cifar10,cifar10 [miscls],0.941645,0.942365,0.941898,0.796675,0.942578
1,cifar10,cifar10 [selective],0.996886,0.996913,0.996698,0.982762,0.996921
2,cifar10,cifar100 [ood],0.907203,0.902342,0.902404,0.754648,0.905882
3,cifar10,svhn [ood],0.945786,0.940173,0.942129,0.824603,0.943957
4,cifar10,tiny_imagenet [ood],0.898685,0.893229,0.893123,0.752376,0.897247
5,cifar100,cifar10 [ood],0.736746,0.681292,0.72158,0.689059,0.720619
6,cifar100,cifar100 [miscls],0.831543,0.782572,0.83724,0.806319,0.828869
7,cifar100,cifar100 [selective],0.913284,0.899957,0.916155,0.878853,0.913014
8,cifar100,svhn [ood],0.77634,0.662081,0.731242,0.705718,0.739645
9,cifar100,tiny_imagenet [ood],0.999942,0.953406,0.994416,0.976322,0.999854


Debug - selective_prediction: Found 15 rows with pattern '[selective]'
  Sample eval values: ['cifar10 [miscls]' 'cifar10 [selective]' 'cifar100 [ood]' 'svhn [ood]'
 'tiny_imagenet [ood]']
Selective Prediction: 15 rows, 5 measures
  ✓ Successfully processed

Configuration 4/5:
  eps=0.5, grid_size=5, n_targets_multiplier=1
  target=exp, scaler_type=none
  composite_name=COMPOSITE EXCESS ALL OUTER INNER + M
Loading config file: extended_benchmark_entropic_target_exp_eps_0.5_iters_150_tol_1e-06_rs_42_grid_size_5_n_targets_multiplier_1.csv
Loaded 7920 rows, 23 columns
[ood]


measure,ind_dataset,eval,R_e 1 2 (Logscore),R_e 1 2 (Brier),R_e 1 2 (Spherical),R_e 1 2 (Zero-one),mahalanobis,composite excess all outer inner + m
2,cifar10,cifar100 [ood],0.907203,0.902342,0.902404,0.754648,0.912238,0.910281
3,cifar10,svhn [ood],0.945786,0.940173,0.942129,0.824603,0.934311,0.942297
4,cifar10,tiny_imagenet [ood],0.898685,0.893229,0.893123,0.752376,0.910273,0.904191
5,cifar100,cifar10 [ood],0.736746,0.681292,0.72158,0.689059,0.534822,0.69281
8,cifar100,svhn [ood],0.77634,0.662081,0.731242,0.705718,0.678832,0.724697
9,cifar100,tiny_imagenet [ood],0.999942,0.953406,0.994416,0.976322,0.622941,0.999611
12,tiny_imagenet,imagenet_a [ood],0.801424,0.651126,0.731905,0.712905,0.440974,0.729263
13,tiny_imagenet,imagenet_r [ood],0.793349,0.65699,0.730844,0.712239,0.404755,0.723755
14,tiny_imagenet,imagenet_o [ood],0.755245,0.691078,0.723552,0.7005,0.512686,0.721285


Debug - ood_detection: Found 9 rows with pattern '[ood]'
  Sample eval values: ['cifar100 [ood]' 'svhn [ood]' 'tiny_imagenet [ood]' 'cifar10 [ood]'
 'imagenet_a [ood]']
Ood Detection: 9 rows, 6 measures
[miscls]


measure,ind_dataset,eval,R_e 1 2 (Logscore),R_e 1 2 (Brier),R_e 1 2 (Spherical),R_e 1 2 (Zero-one),mahalanobis,composite excess all outer inner + m
0,cifar10,cifar10 [miscls],0.941645,0.942365,0.941898,0.796675,0.927621,0.941765
1,cifar10,cifar10 [selective],0.996886,0.996913,0.996698,0.982762,0.996266,0.996818
2,cifar10,cifar100 [ood],0.907203,0.902342,0.902404,0.754648,0.912238,0.910281
3,cifar10,svhn [ood],0.945786,0.940173,0.942129,0.824603,0.934311,0.942297
4,cifar10,tiny_imagenet [ood],0.898685,0.893229,0.893123,0.752376,0.910273,0.904191
5,cifar100,cifar10 [ood],0.736746,0.681292,0.72158,0.689059,0.534822,0.69281
6,cifar100,cifar100 [miscls],0.831543,0.782572,0.83724,0.806319,0.573908,0.803405
7,cifar100,cifar100 [selective],0.913284,0.899957,0.916155,0.878853,0.81055,0.901336
8,cifar100,svhn [ood],0.77634,0.662081,0.731242,0.705718,0.678832,0.724697
9,cifar100,tiny_imagenet [ood],0.999942,0.953406,0.994416,0.976322,0.622941,0.999611


Debug - misclassification_detection: Found 15 rows with pattern '[miscls]'
  Sample eval values: ['cifar10 [miscls]' 'cifar10 [selective]' 'cifar100 [ood]' 'svhn [ood]'
 'tiny_imagenet [ood]']
Misclassification Detection: 15 rows, 6 measures
[selective]


measure,ind_dataset,eval,R_e 1 2 (Logscore),R_e 1 2 (Brier),R_e 1 2 (Spherical),R_e 1 2 (Zero-one),mahalanobis,composite excess all outer inner + m
0,cifar10,cifar10 [miscls],0.941645,0.942365,0.941898,0.796675,0.927621,0.941765
1,cifar10,cifar10 [selective],0.996886,0.996913,0.996698,0.982762,0.996266,0.996818
2,cifar10,cifar100 [ood],0.907203,0.902342,0.902404,0.754648,0.912238,0.910281
3,cifar10,svhn [ood],0.945786,0.940173,0.942129,0.824603,0.934311,0.942297
4,cifar10,tiny_imagenet [ood],0.898685,0.893229,0.893123,0.752376,0.910273,0.904191
5,cifar100,cifar10 [ood],0.736746,0.681292,0.72158,0.689059,0.534822,0.69281
6,cifar100,cifar100 [miscls],0.831543,0.782572,0.83724,0.806319,0.573908,0.803405
7,cifar100,cifar100 [selective],0.913284,0.899957,0.916155,0.878853,0.81055,0.901336
8,cifar100,svhn [ood],0.77634,0.662081,0.731242,0.705718,0.678832,0.724697
9,cifar100,tiny_imagenet [ood],0.999942,0.953406,0.994416,0.976322,0.622941,0.999611


Debug - selective_prediction: Found 15 rows with pattern '[selective]'
  Sample eval values: ['cifar10 [miscls]' 'cifar10 [selective]' 'cifar100 [ood]' 'svhn [ood]'
 'tiny_imagenet [ood]']
Selective Prediction: 15 rows, 6 measures
  ✓ Successfully processed

Configuration 5/5:
  eps=0.5, grid_size=5, n_targets_multiplier=1
  target=beta, scaler_type=none
  composite_name=COMPOSITE EAT LOGSCORE OUTER OUTER + M
Loading config file: extended_benchmark_entropic_target_beta_eps_0.5_iters_150_tol_1e-06_rs_42_grid_size_5_n_targets_multiplier_1.csv
Loaded 7920 rows, 23 columns
[ood]


measure,ind_dataset,eval,R_e 1 1 (Logscore),R_t 1 1 (Logscore),R_b 1 (Logscore),mahalanobis,composite eat logscore outer outer + m
2,cifar10,cifar100 [ood],0.90473,0.911507,0.916906,0.912238,0.918087
3,cifar10,svhn [ood],0.942616,0.956506,0.962992,0.934311,0.957326
4,cifar10,tiny_imagenet [ood],0.895731,0.903615,0.911364,0.910273,0.912274
5,cifar100,cifar10 [ood],0.724548,0.774023,0.77327,0.534822,0.764887
8,cifar100,svhn [ood],0.755848,0.867727,0.87012,0.678832,0.871659
9,cifar100,tiny_imagenet [ood],0.999852,0.999997,0.809888,0.622941,0.999998
12,tiny_imagenet,imagenet_a [ood],0.78113,0.846305,0.83535,0.440974,0.846915
13,tiny_imagenet,imagenet_r [ood],0.774366,0.836572,0.825339,0.404755,0.834687
14,tiny_imagenet,imagenet_o [ood],0.752832,0.753524,0.724312,0.512686,0.759562


Debug - ood_detection: Found 9 rows with pattern '[ood]'
  Sample eval values: ['cifar100 [ood]' 'svhn [ood]' 'tiny_imagenet [ood]' 'cifar10 [ood]'
 'imagenet_a [ood]']
Ood Detection: 9 rows, 5 measures
[miscls]


measure,ind_dataset,eval,R_e 1 1 (Logscore),R_t 1 1 (Logscore),R_b 1 (Logscore),mahalanobis,composite eat logscore outer outer + m
0,cifar10,cifar10 [miscls],0.94043,0.943244,0.942267,0.927621,0.943901
1,cifar10,cifar10 [selective],0.996843,0.996864,0.996818,0.996266,0.99692
2,cifar10,cifar100 [ood],0.90473,0.911507,0.916906,0.912238,0.918087
3,cifar10,svhn [ood],0.942616,0.956506,0.962992,0.934311,0.957326
4,cifar10,tiny_imagenet [ood],0.895731,0.903615,0.911364,0.910273,0.912274
5,cifar100,cifar10 [ood],0.724548,0.774023,0.77327,0.534822,0.764887
6,cifar100,cifar100 [miscls],0.818006,0.853064,0.845094,0.573908,0.848899
7,cifar100,cifar100 [selective],0.909667,0.918425,0.915943,0.81055,0.915408
8,cifar100,svhn [ood],0.755848,0.867727,0.87012,0.678832,0.871659
9,cifar100,tiny_imagenet [ood],0.999852,0.999997,0.809888,0.622941,0.999998


Debug - misclassification_detection: Found 15 rows with pattern '[miscls]'
  Sample eval values: ['cifar10 [miscls]' 'cifar10 [selective]' 'cifar100 [ood]' 'svhn [ood]'
 'tiny_imagenet [ood]']
Misclassification Detection: 15 rows, 5 measures
[selective]


measure,ind_dataset,eval,R_e 1 1 (Logscore),R_t 1 1 (Logscore),R_b 1 (Logscore),mahalanobis,composite eat logscore outer outer + m
0,cifar10,cifar10 [miscls],0.94043,0.943244,0.942267,0.927621,0.943901
1,cifar10,cifar10 [selective],0.996843,0.996864,0.996818,0.996266,0.99692
2,cifar10,cifar100 [ood],0.90473,0.911507,0.916906,0.912238,0.918087
3,cifar10,svhn [ood],0.942616,0.956506,0.962992,0.934311,0.957326
4,cifar10,tiny_imagenet [ood],0.895731,0.903615,0.911364,0.910273,0.912274
5,cifar100,cifar10 [ood],0.724548,0.774023,0.77327,0.534822,0.764887
6,cifar100,cifar100 [miscls],0.818006,0.853064,0.845094,0.573908,0.848899
7,cifar100,cifar100 [selective],0.909667,0.918425,0.915943,0.81055,0.915408
8,cifar100,svhn [ood],0.755848,0.867727,0.87012,0.678832,0.871659
9,cifar100,tiny_imagenet [ood],0.999852,0.999997,0.809888,0.622941,0.999998


Debug - selective_prediction: Found 15 rows with pattern '[selective]'
  Sample eval values: ['cifar10 [miscls]' 'cifar10 [selective]' 'cifar100 [ood]' 'svhn [ood]'
 'tiny_imagenet [ood]']
Selective Prediction: 15 rows, 5 measures
  ✓ Successfully processed

Successfully processed 5/5 configurations

Creating final combined tables...
Debug - Adding table for ood_detection: shape (9, 7) (after deduplication)
Debug - Adding table for misclassification_detection: shape (3, 7) (after deduplication)
Debug - Adding table for selective_prediction: shape (3, 7) (after deduplication)
Debug - Adding table for ood_detection: shape (9, 8) (after deduplication)
Debug - Adding table for misclassification_detection: shape (3, 8) (after deduplication)
Debug - Adding table for selective_prediction: shape (3, 8) (after deduplication)
Debug - Adding table for ood_detection: shape (9, 7) (after deduplication)
Debug - Adding table for misclassification_detection: shape (3, 7) (after deduplication)
Debug - 

## Access Individual Tables

You can access individual tables from the results:


In [7]:


res = select_composite_and_components(results['individual_results'][0]['transformed_df'], "COMPOSITE BAYES ALL OUTER")
res

Unnamed: 0_level_0,measure,R_b 1 (Logscore),R_b 1 (Brier),R_b 1 (Spherical),R_b 1 (Zero-one),composite bayes all outer
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.942267,0.942466,0.942228,0.941776,0.942284
cifar10,cifar10 [selective],0.996818,0.996836,0.996827,0.99681,0.996822
cifar10,cifar100 [ood],0.916906,0.914436,0.914506,0.913191,0.915758
cifar10,svhn [ood],0.962992,0.958357,0.95853,0.956294,0.960983
cifar10,tiny_imagenet [ood],0.911364,0.908611,0.908679,0.90721,0.910094
cifar100,cifar10 [ood],0.77327,0.773,0.773388,0.772177,0.773419
cifar100,cifar100 [miscls],0.845094,0.857773,0.856181,0.858986,0.852723
cifar100,cifar100 [selective],0.915943,0.920203,0.919719,0.920576,0.91864
cifar100,svhn [ood],0.87012,0.858293,0.86097,0.855937,0.865872
cifar100,tiny_imagenet [ood],0.809888,0.789521,0.806267,0.803022,0.805415


In [8]:
results['final_tables']['selective_prediction']

Unnamed: 0_level_0,measure,R_b 1 (L)_v1,R_b 1 (B)_v1,R_b 1 (S)_v1,R_b 1 (Z)_v1,C_v1,R_b 1 (L)_v2,R_b 1 (B)_v2,R_b 1 (S)_v2,R_b 1 (Z)_v2,M_v2,C_v2,R_e 1 2 (L)_v3,R_e 1 2 (B)_v3,R_e 1 2 (S)_v3,R_e 1 2 (Z)_v3,C_v3,R_e 1 2 (L)_v4,R_e 1 2 (B)_v4,R_e 1 2 (S)_v4,R_e 1 2 (Z)_v4,M_v4,C_v4,R_e 1 1 (L)_v5,R_t 1 1 (L)_v5,R_b 1 (L)_v5,M_v5,C_v5
ind_dataset,eval_dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
cifar10,cifar10,0.942267,0.942466,0.942228,0.941776,0.942284,0.942267,0.942466,0.942228,0.941776,0.927621,0.941973,0.941645,0.942365,0.941898,0.796675,0.942578,0.941645,0.942365,0.941898,0.796675,0.927621,0.941765,0.94043,0.943244,0.942267,0.927621,0.943901
cifar100,cifar100,0.77327,0.773,0.773388,0.772177,0.773419,0.77327,0.773,0.773388,0.772177,0.534822,0.768139,0.736746,0.681292,0.72158,0.689059,0.720619,0.736746,0.681292,0.72158,0.689059,0.534822,0.69281,0.724548,0.774023,0.77327,0.534822,0.764887
tiny_imagenet,tiny_imagenet,0.844739,0.854657,0.85315,0.854657,0.850581,0.844739,0.854657,0.85315,0.854657,0.416754,0.851068,0.831461,0.754206,0.818794,0.800772,0.819192,0.831461,0.754206,0.818794,0.800772,0.416754,0.794594,0.813053,0.850815,0.844739,0.416754,0.846564


In [9]:
# Display final combined tables as DataFrames
print("FINAL COMBINED TABLES")
print("=" * 80)

# OOD Detection Table
print("\n1. OOD DETECTION TABLE")
print("-" * 40)
ood_final = results['final_tables']['ood_detection']
if not ood_final.empty:
    print(f"Shape: {ood_final.shape}")
    print("\nDataFrame:")
    display(ood_final.round(4))
else:
    print("No data available for OOD detection")

# Misclassification Detection Table
print("\n2. MISCLASSIFICATION DETECTION TABLE")
print("-" * 50)
miscls_final = results['final_tables']['misclassification_detection']
if not miscls_final.empty:
    print(f"Shape: {miscls_final.shape}")
    print("\nDataFrame:")
    display(miscls_final.round(4))
else:
    print("No data available for misclassification detection")

# Selective Prediction Table
print("\n3. SELECTIVE PREDICTION TABLE")
print("-" * 40)
selective_final = results['final_tables']['selective_prediction']
if not selective_final.empty:
    print(f"Shape: {selective_final.shape}")
    print("\nDataFrame:")
    display(selective_final.round(4))
else:
    print("No data available for selective prediction")


FINAL COMBINED TABLES

1. OOD DETECTION TABLE
----------------------------------------
Shape: (9, 27)

DataFrame:


Unnamed: 0_level_0,measure,R_b 1 (L)_v1,R_b 1 (B)_v1,R_b 1 (S)_v1,R_b 1 (Z)_v1,C_v1,R_b 1 (L)_v2,R_b 1 (B)_v2,R_b 1 (S)_v2,R_b 1 (Z)_v2,M_v2,C_v2,R_e 1 2 (L)_v3,R_e 1 2 (B)_v3,R_e 1 2 (S)_v3,R_e 1 2 (Z)_v3,C_v3,R_e 1 2 (L)_v4,R_e 1 2 (B)_v4,R_e 1 2 (S)_v4,R_e 1 2 (Z)_v4,M_v4,C_v4,R_e 1 1 (L)_v5,R_t 1 1 (L)_v5,R_b 1 (L)_v5,M_v5,C_v5
ind_dataset,ood_dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
cifar10,cifar100,0.9169,0.9144,0.9145,0.9132,0.9158,0.9169,0.9144,0.9145,0.9132,0.9122,0.9185,0.9072,0.9023,0.9024,0.7546,0.9059,0.9072,0.9023,0.9024,0.7546,0.9122,0.9103,0.9047,0.9115,0.9169,0.9122,0.9181
cifar10,svhn,0.963,0.9584,0.9585,0.9563,0.961,0.963,0.9584,0.9585,0.9563,0.9343,0.9572,0.9458,0.9402,0.9421,0.8246,0.944,0.9458,0.9402,0.9421,0.8246,0.9343,0.9423,0.9426,0.9565,0.963,0.9343,0.9573
cifar10,tiny_imagenet,0.9114,0.9086,0.9087,0.9072,0.9101,0.9114,0.9086,0.9087,0.9072,0.9103,0.914,0.8987,0.8932,0.8931,0.7524,0.8972,0.8987,0.8932,0.8931,0.7524,0.9103,0.9042,0.8957,0.9036,0.9114,0.9103,0.9123
cifar100,cifar10,0.7733,0.773,0.7734,0.7722,0.7734,0.7733,0.773,0.7734,0.7722,0.5348,0.7681,0.7367,0.6813,0.7216,0.6891,0.7206,0.7367,0.6813,0.7216,0.6891,0.5348,0.6928,0.7245,0.774,0.7733,0.5348,0.7649
cifar100,svhn,0.8701,0.8583,0.861,0.8559,0.8659,0.8701,0.8583,0.861,0.8559,0.6788,0.8677,0.7763,0.6621,0.7312,0.7057,0.7396,0.7763,0.6621,0.7312,0.7057,0.6788,0.7247,0.7558,0.8677,0.8701,0.6788,0.8717
cifar100,tiny_imagenet,0.8099,0.7895,0.8063,0.803,0.8054,0.8099,0.7895,0.8063,0.803,0.6229,0.8046,0.9999,0.9534,0.9944,0.9763,0.9999,0.9999,0.9534,0.9944,0.9763,0.6229,0.9996,0.9999,1.0,0.8099,0.6229,1.0
tiny_imagenet,imagenet_a,0.8354,0.8272,0.8301,0.8263,0.8331,0.8354,0.8272,0.8301,0.8263,0.441,0.8324,0.8014,0.6511,0.7319,0.7129,0.755,0.8014,0.6511,0.7319,0.7129,0.441,0.7293,0.7811,0.8463,0.8354,0.441,0.8469
tiny_imagenet,imagenet_o,0.7243,0.721,0.7226,0.7212,0.7234,0.7243,0.721,0.7226,0.7212,0.5127,0.7277,0.7552,0.6911,0.7236,0.7005,0.7368,0.7552,0.6911,0.7236,0.7005,0.5127,0.7213,0.7528,0.7535,0.7243,0.5127,0.7596
tiny_imagenet,imagenet_r,0.8253,0.8162,0.8192,0.8155,0.8225,0.8253,0.8162,0.8192,0.8155,0.4048,0.8203,0.7933,0.657,0.7308,0.7122,0.7501,0.7933,0.657,0.7308,0.7122,0.4048,0.7238,0.7744,0.8366,0.8253,0.4048,0.8347



2. MISCLASSIFICATION DETECTION TABLE
--------------------------------------------------
Shape: (3, 27)

DataFrame:


Unnamed: 0_level_0,measure,R_b 1 (L)_v1,R_b 1 (B)_v1,R_b 1 (S)_v1,R_b 1 (Z)_v1,C_v1,R_b 1 (L)_v2,R_b 1 (B)_v2,R_b 1 (S)_v2,R_b 1 (Z)_v2,M_v2,C_v2,R_e 1 2 (L)_v3,R_e 1 2 (B)_v3,R_e 1 2 (S)_v3,R_e 1 2 (Z)_v3,C_v3,R_e 1 2 (L)_v4,R_e 1 2 (B)_v4,R_e 1 2 (S)_v4,R_e 1 2 (Z)_v4,M_v4,C_v4,R_e 1 1 (L)_v5,R_t 1 1 (L)_v5,R_b 1 (L)_v5,M_v5,C_v5
ind_dataset,eval_dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
cifar10,cifar10,0.9423,0.9425,0.9422,0.9418,0.9423,0.9423,0.9425,0.9422,0.9418,0.9276,0.942,0.9416,0.9424,0.9419,0.7967,0.9426,0.9416,0.9424,0.9419,0.7967,0.9276,0.9418,0.9404,0.9432,0.9423,0.9276,0.9439
cifar100,cifar100,0.7733,0.773,0.7734,0.7722,0.7734,0.7733,0.773,0.7734,0.7722,0.5348,0.7681,0.7367,0.6813,0.7216,0.6891,0.7206,0.7367,0.6813,0.7216,0.6891,0.5348,0.6928,0.7245,0.774,0.7733,0.5348,0.7649
tiny_imagenet,tiny_imagenet,0.8447,0.8547,0.8532,0.8547,0.8506,0.8447,0.8547,0.8532,0.8547,0.4168,0.8511,0.8315,0.7542,0.8188,0.8008,0.8192,0.8315,0.7542,0.8188,0.8008,0.4168,0.7946,0.8131,0.8508,0.8447,0.4168,0.8466



3. SELECTIVE PREDICTION TABLE
----------------------------------------
Shape: (3, 27)

DataFrame:


Unnamed: 0_level_0,measure,R_b 1 (L)_v1,R_b 1 (B)_v1,R_b 1 (S)_v1,R_b 1 (Z)_v1,C_v1,R_b 1 (L)_v2,R_b 1 (B)_v2,R_b 1 (S)_v2,R_b 1 (Z)_v2,M_v2,C_v2,R_e 1 2 (L)_v3,R_e 1 2 (B)_v3,R_e 1 2 (S)_v3,R_e 1 2 (Z)_v3,C_v3,R_e 1 2 (L)_v4,R_e 1 2 (B)_v4,R_e 1 2 (S)_v4,R_e 1 2 (Z)_v4,M_v4,C_v4,R_e 1 1 (L)_v5,R_t 1 1 (L)_v5,R_b 1 (L)_v5,M_v5,C_v5
ind_dataset,eval_dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
cifar10,cifar10,0.9423,0.9425,0.9422,0.9418,0.9423,0.9423,0.9425,0.9422,0.9418,0.9276,0.942,0.9416,0.9424,0.9419,0.7967,0.9426,0.9416,0.9424,0.9419,0.7967,0.9276,0.9418,0.9404,0.9432,0.9423,0.9276,0.9439
cifar100,cifar100,0.7733,0.773,0.7734,0.7722,0.7734,0.7733,0.773,0.7734,0.7722,0.5348,0.7681,0.7367,0.6813,0.7216,0.6891,0.7206,0.7367,0.6813,0.7216,0.6891,0.5348,0.6928,0.7245,0.774,0.7733,0.5348,0.7649
tiny_imagenet,tiny_imagenet,0.8447,0.8547,0.8532,0.8547,0.8506,0.8447,0.8547,0.8532,0.8547,0.4168,0.8511,0.8315,0.7542,0.8188,0.8008,0.8192,0.8315,0.7542,0.8188,0.8008,0.4168,0.7946,0.8131,0.8508,0.8447,0.4168,0.8466


## Results Structure

The analysis returns:
1. **`final_tables`**: Combined tables for each problem type with the requested format
2. **`individual_results`**: Detailed results for each configuration
3. **`config_list`**: The original configuration list

### Final Table Format
Each final table has:
- **Index**: `(ind_dataset_clean, ood_dataset)` - the in-distribution and out-of-distribution dataset names
- **Columns**: Individual measures and composite measures, labeled by configuration


In [10]:
# Debug: Check individual configuration results and their sizes
print("INDIVIDUAL CONFIGURATION RESULTS - DEBUG")
print("=" * 60)

for i, individual_result in enumerate(results['individual_results']):
    config = individual_result['config']
    tables = individual_result['tables']
    transformed_df = individual_result['transformed_df']
    
    print(f"\nConfiguration {i+1}: {config['composite_name']}")
    print(f"Parameters: eps={config['eps']}, grid_size={config['grid_size']}, target={config['target']}")
    print(f"Original transformed_df shape: {transformed_df.shape}")
    print(f"Transformed_df index levels: {transformed_df.index.names}")
    print(f"Sample index values: {transformed_df.index[:5]}")
    
    for problem_type, table in tables.items():
        if not table.empty:
            print(f"\n{problem_type.replace('_', ' ').title()} - Shape: {table.shape}")
            print(f"Index: {table.index[:3]}")
            # Show first few rows
            display(table.head(3).round(4))
        else:
            print(f"\n{problem_type.replace('_', ' ').title()}: No data")


INDIVIDUAL CONFIGURATION RESULTS - DEBUG

Configuration 1: COMPOSITE BAYES ALL OUTER
Parameters: eps=0.5, grid_size=5, target=exp
Original transformed_df shape: (15, 128)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9158
cifar10,svhn [ood],0.963,0.9584,0.9585,0.9563,0.961
cifar10,tiny_imagenet [ood],0.9114,0.9086,0.9087,0.9072,0.9101



Misclassification Detection - Shape: (15, 5)
Index: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9423,0.9425,0.9422,0.9418,0.9423
cifar10,cifar10 [selective],0.9968,0.9968,0.9968,0.9968,0.9968
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9158



Selective Prediction - Shape: (15, 5)
Index: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9423,0.9425,0.9422,0.9418,0.9423
cifar10,cifar10 [selective],0.9968,0.9968,0.9968,0.9968,0.9968
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9158



Configuration 2: COMPOSITE BAYES ALL OUTER + M
Parameters: eps=0.5, grid_size=5, target=beta
Original transformed_df shape: (15, 132)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 6)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9122,0.9185
cifar10,svhn [ood],0.963,0.9584,0.9585,0.9563,0.9343,0.9572
cifar10,tiny_imagenet [ood],0.9114,0.9086,0.9087,0.9072,0.9103,0.914



Misclassification Detection - Shape: (15, 6)
Index: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar10 [miscls],0.9423,0.9425,0.9422,0.9418,0.9276,0.942
cifar10,cifar10 [selective],0.9968,0.9968,0.9968,0.9968,0.9963,0.9969
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9122,0.9185



Selective Prediction - Shape: (15, 6)
Index: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_b 1 (L),R_b 1 (B),R_b 1 (S),R_b 1 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar10 [miscls],0.9423,0.9425,0.9422,0.9418,0.9276,0.942
cifar10,cifar10 [selective],0.9968,0.9968,0.9968,0.9968,0.9963,0.9969
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9122,0.9185



Configuration 3: COMPOSITE EXCESS ALL OUTER INNER
Parameters: eps=0.5, grid_size=5, target=exp
Original transformed_df shape: (15, 128)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.9072,0.9023,0.9024,0.7546,0.9059
cifar10,svhn [ood],0.9458,0.9402,0.9421,0.8246,0.944
cifar10,tiny_imagenet [ood],0.8987,0.8932,0.8931,0.7524,0.8972



Misclassification Detection - Shape: (15, 5)
Index: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9416,0.9424,0.9419,0.7967,0.9426
cifar10,cifar10 [selective],0.9969,0.9969,0.9967,0.9828,0.9969
cifar10,cifar100 [ood],0.9072,0.9023,0.9024,0.7546,0.9059



Selective Prediction - Shape: (15, 5)
Index: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9416,0.9424,0.9419,0.7967,0.9426
cifar10,cifar10 [selective],0.9969,0.9969,0.9967,0.9828,0.9969
cifar10,cifar100 [ood],0.9072,0.9023,0.9024,0.7546,0.9059



Configuration 4: COMPOSITE EXCESS ALL OUTER INNER + M
Parameters: eps=0.5, grid_size=5, target=exp
Original transformed_df shape: (15, 132)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 6)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar100 [ood],0.9072,0.9023,0.9024,0.7546,0.9122,0.9103
cifar10,svhn [ood],0.9458,0.9402,0.9421,0.8246,0.9343,0.9423
cifar10,tiny_imagenet [ood],0.8987,0.8932,0.8931,0.7524,0.9103,0.9042



Misclassification Detection - Shape: (15, 6)
Index: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar10 [miscls],0.9416,0.9424,0.9419,0.7967,0.9276,0.9418
cifar10,cifar10 [selective],0.9969,0.9969,0.9967,0.9828,0.9963,0.9968
cifar10,cifar100 [ood],0.9072,0.9023,0.9024,0.7546,0.9122,0.9103



Selective Prediction - Shape: (15, 6)
Index: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 2 (L),R_e 1 2 (B),R_e 1 2 (S),R_e 1 2 (Z),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
cifar10,cifar10 [miscls],0.9416,0.9424,0.9419,0.7967,0.9276,0.9418
cifar10,cifar10 [selective],0.9969,0.9969,0.9967,0.9828,0.9963,0.9968
cifar10,cifar100 [ood],0.9072,0.9023,0.9024,0.7546,0.9122,0.9103



Configuration 5: COMPOSITE EAT LOGSCORE OUTER OUTER + M
Parameters: eps=0.5, grid_size=5, target=beta
Original transformed_df shape: (15, 132)
Transformed_df index levels: ['ind_dataset', 'eval']
Sample index values: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])

Ood Detection - Shape: (9, 5)
Index: MultiIndex([('cifar10',      'cifar100 [ood]'),
            ('cifar10',          'svhn [ood]'),
            ('cifar10', 'tiny_imagenet [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 1 (L),R_t 1 1 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar100 [ood],0.9047,0.9115,0.9169,0.9122,0.9181
cifar10,svhn [ood],0.9426,0.9565,0.963,0.9343,0.9573
cifar10,tiny_imagenet [ood],0.8957,0.9036,0.9114,0.9103,0.9123



Misclassification Detection - Shape: (15, 5)
Index: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 1 (L),R_t 1 1 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9404,0.9432,0.9423,0.9276,0.9439
cifar10,cifar10 [selective],0.9968,0.9969,0.9968,0.9963,0.9969
cifar10,cifar100 [ood],0.9047,0.9115,0.9169,0.9122,0.9181



Selective Prediction - Shape: (15, 5)
Index: MultiIndex([('cifar10',    'cifar10 [miscls]'),
            ('cifar10', 'cifar10 [selective]'),
            ('cifar10',      'cifar100 [ood]')],
           names=['ind_dataset', 'eval'])


Unnamed: 0_level_0,measure,R_e 1 1 (L),R_t 1 1 (L),R_b 1 (L),M,C
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cifar10,cifar10 [miscls],0.9404,0.9432,0.9423,0.9276,0.9439
cifar10,cifar10 [selective],0.9968,0.9969,0.9968,0.9963,0.9969
cifar10,cifar100 [ood],0.9047,0.9115,0.9169,0.9122,0.9181


## Compare Composite vs Components

You can also create custom comparisons between the composite measure and its individual components:


In [11]:
# Column information for final tables
print("COLUMN INFORMATION FOR FINAL TABLES")
print("=" * 60)

for problem_type, final_table in results['final_tables'].items():
    if not final_table.empty:
        print(f"\n{problem_type.replace('_', ' ').title()}:")
        print(f"Shape: {final_table.shape}")
        print(f"Index: {final_table.index.names}")
        print("Columns:")
        for col in final_table.columns:
            print(f"  - {col}")
    else:
        print(f"\n{problem_type.replace('_', ' ').title()}: No data")


COLUMN INFORMATION FOR FINAL TABLES

Ood Detection:
Shape: (9, 27)
Index: ['ind_dataset', 'ood_dataset']
Columns:
  - R_b 1 (L)_v1
  - R_b 1 (B)_v1
  - R_b 1 (S)_v1
  - R_b 1 (Z)_v1
  - C_v1
  - R_b 1 (L)_v2
  - R_b 1 (B)_v2
  - R_b 1 (S)_v2
  - R_b 1 (Z)_v2
  - M_v2
  - C_v2
  - R_e 1 2 (L)_v3
  - R_e 1 2 (B)_v3
  - R_e 1 2 (S)_v3
  - R_e 1 2 (Z)_v3
  - C_v3
  - R_e 1 2 (L)_v4
  - R_e 1 2 (B)_v4
  - R_e 1 2 (S)_v4
  - R_e 1 2 (Z)_v4
  - M_v4
  - C_v4
  - R_e 1 1 (L)_v5
  - R_t 1 1 (L)_v5
  - R_b 1 (L)_v5
  - M_v5
  - C_v5

Misclassification Detection:
Shape: (3, 27)
Index: ['ind_dataset', 'eval_dataset']
Columns:
  - R_b 1 (L)_v1
  - R_b 1 (B)_v1
  - R_b 1 (S)_v1
  - R_b 1 (Z)_v1
  - C_v1
  - R_b 1 (L)_v2
  - R_b 1 (B)_v2
  - R_b 1 (S)_v2
  - R_b 1 (Z)_v2
  - M_v2
  - C_v2
  - R_e 1 2 (L)_v3
  - R_e 1 2 (B)_v3
  - R_e 1 2 (S)_v3
  - R_e 1 2 (Z)_v3
  - C_v3
  - R_e 1 2 (L)_v4
  - R_e 1 2 (B)_v4
  - R_e 1 2 (S)_v4
  - R_e 1 2 (Z)_v4
  - M_v4
  - C_v4
  - R_e 1 1 (L)_v5
  - R_t 1 1 (L)_v

In [12]:
# Export final tables to CSV (optional)
print("EXPORT OPTIONS")
print("=" * 40)

for problem_type, final_table in results['final_tables'].items():
    if not final_table.empty:
        filename = f"final_{problem_type}_table.csv"
        print(f"To export {problem_type.replace('_', ' ').title()} table:")
        print(f"  results['final_tables']['{problem_type}'].to_csv('{filename}')")
        # Uncomment to actually export:
        # final_table.to_csv(filename)
    else:
        print(f"No data to export for {problem_type.replace('_', ' ').title()}")


EXPORT OPTIONS
To export Ood Detection table:
  results['final_tables']['ood_detection'].to_csv('final_ood_detection_table.csv')
To export Misclassification Detection table:
  results['final_tables']['misclassification_detection'].to_csv('final_misclassification_detection_table.csv')
To export Selective Prediction table:
  results['final_tables']['selective_prediction'].to_csv('final_selective_prediction_table.csv')


## Export Tables

You can export the tables to CSV files for further analysis:


In [13]:
# Example: Add more configurations to the analysis
additional_configs = [
    {
        'eps': 2.0,
        'grid_size': 2,
        'n_targets_multiplier': 1,
        'target': 'exp',
        'scaler_type': 'global_scaler',
        'composite_name': 'COMPOSITE BAYES ALL INNER'
    }
]

print("To analyze additional configurations, add them to the config_list:")
print("config_list.extend(additional_configs)")
print("results_extended = analyze_multiple_configs(config_list)")

# Uncomment to run with additional configs:
# config_list.extend(additional_configs)
# results_extended = analyze_multiple_configs(config_list)


To analyze additional configurations, add them to the config_list:
config_list.extend(additional_configs)
results_extended = analyze_multiple_configs(config_list)


In [14]:
# Test the shortened column names function
test_columns = [
    'R_b 1 (Logscore)',
    'R_e 2 3 (Brier)', 
    'R_t 1 2 (Spherical)',
    'composite bayes all outer',
    'mahalanobis',
    'R_b 3 (Zero-one)'
]

print("Column name shortening examples:")
print("Original -> Shortened")
print("-" * 40)
for col in test_columns:
    shortened = shorten_column_names(col)
    print(f"{col} -> {shortened}")


Column name shortening examples:
Original -> Shortened
----------------------------------------
R_b 1 (Logscore) -> R_b 1 (L)
R_e 2 3 (Brier) -> R_e 2 3 (B)
R_t 1 2 (Spherical) -> R_t 1 2 (S)
composite bayes all outer -> C
mahalanobis -> M
R_b 3 (Zero-one) -> R_b 3 (Z)


In [15]:
# Create concatenated tables per problem type (side-by-side by composite measure)
def create_concatenated_problem_tables(individual_results: List[Dict]) -> Dict[str, pd.DataFrame]:
    """
    Create concatenated tables for each problem type with all composite measures side-by-side.
    
    Args:
        individual_results: List of individual configuration results
        
    Returns:
        Dictionary with concatenated tables for each problem type
    """
    problem_types = ['ood_detection', 'misclassification_detection', 'selective_prediction']
    concatenated_tables = {}
    
    for problem_type in problem_types:
        tables_to_concat = []
        
        for i, result in enumerate(individual_results):
            config = result['config']
            tables = result['tables']
            
            if problem_type in tables and not tables[problem_type].empty:
                table = tables[problem_type].copy()
                
                # Add suffix to column names to identify the composite measure
                composite_suffix = config['composite_name'].replace(' ', '_').replace('+', 'PLUS')
                
                # Rename columns with composite suffix
                new_columns = {}
                for col in table.columns:
                    new_columns[col] = f"{col}_{composite_suffix}"
                
                table = table.rename(columns=new_columns)
                tables_to_concat.append(table)
        
        # Concatenate all tables side-by-side
        if tables_to_concat:
            # Use outer join to include all indices
            concatenated = pd.concat(tables_to_concat, axis=1, join='outer', sort=True)
            concatenated_tables[problem_type] = concatenated
        else:
            concatenated_tables[problem_type] = pd.DataFrame()
    
    return concatenated_tables

# Create the concatenated tables
concatenated_results = create_concatenated_problem_tables(results['individual_results'])

print("CONCATENATED TABLES BY PROBLEM TYPE")
print("=" * 80)
print("Each table combines all composite measures side-by-side")
print()


CONCATENATED TABLES BY PROBLEM TYPE
Each table combines all composite measures side-by-side



In [16]:
# Display the concatenated tables
for problem_type, table in concatenated_results.items():
    print(f"\n{problem_type.replace('_', ' ').upper()} - CONCATENATED TABLE")
    print("-" * 60)
    
    if not table.empty:
        print(f"Shape: {table.shape}")
        print(f"Index: {table.index.names}")
        print(f"Columns ({len(table.columns)}):")
        
        # Group columns by composite measure for better display
        column_groups = {}
        for col in table.columns:
            # Extract the composite measure suffix
            parts = col.split('_')
            if len(parts) >= 2:
                # Find the composite measure part (usually the last few parts)
                measure_parts = []
                for part in reversed(parts):
                    measure_parts.insert(0, part)
                    if any(comp_key in '_'.join(measure_parts) for comp_key in ['COMPOSITE', 'BAYES', 'EXCESS']):
                        break
                measure_key = '_'.join(measure_parts)
            else:
                measure_key = 'OTHER'
                
            if measure_key not in column_groups:
                column_groups[measure_key] = []
            column_groups[measure_key].append(col)
        
        # Display columns grouped by composite measure
        for measure, cols in column_groups.items():
            print(f"\n  {measure} ({len(cols)} columns):")
            for col in cols:
                print(f"    - {col}")
        
        print(f"\nDataFrame:")
        display(table.round(4))
        
    else:
        print("No data available")
    
    print("\n" + "="*80)



OOD DETECTION - CONCATENATED TABLE
------------------------------------------------------------
Shape: (9, 27)
Index: ['ind_dataset', 'eval']
Columns (27):

  BAYES_ALL_OUTER (5 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER
    - C_COMPOSITE_BAYES_ALL_OUTER

  BAYES_ALL_OUTER_PLUS_M (6 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M

  EXCESS_ALL_OUTER_INNER (5 columns):
    - R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - C_COMPOSITE_EXCESS_ALL_OUTER_INNER

  EXCES

Unnamed: 0_level_0,measure,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER,C_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER,C_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,M_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,C_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_t 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9158,0.9169,0.9144,0.9145,0.9132,0.9122,0.9185,0.9072,0.9023,0.9024,0.7546,0.9059,0.9072,0.9023,0.9024,0.7546,0.9122,0.9103,0.9047,0.9115,0.9169,0.9122,0.9181
cifar10,svhn [ood],0.963,0.9584,0.9585,0.9563,0.961,0.963,0.9584,0.9585,0.9563,0.9343,0.9572,0.9458,0.9402,0.9421,0.8246,0.944,0.9458,0.9402,0.9421,0.8246,0.9343,0.9423,0.9426,0.9565,0.963,0.9343,0.9573
cifar10,tiny_imagenet [ood],0.9114,0.9086,0.9087,0.9072,0.9101,0.9114,0.9086,0.9087,0.9072,0.9103,0.914,0.8987,0.8932,0.8931,0.7524,0.8972,0.8987,0.8932,0.8931,0.7524,0.9103,0.9042,0.8957,0.9036,0.9114,0.9103,0.9123
cifar100,cifar10 [ood],0.7733,0.773,0.7734,0.7722,0.7734,0.7733,0.773,0.7734,0.7722,0.5348,0.7681,0.7367,0.6813,0.7216,0.6891,0.7206,0.7367,0.6813,0.7216,0.6891,0.5348,0.6928,0.7245,0.774,0.7733,0.5348,0.7649
cifar100,svhn [ood],0.8701,0.8583,0.861,0.8559,0.8659,0.8701,0.8583,0.861,0.8559,0.6788,0.8677,0.7763,0.6621,0.7312,0.7057,0.7396,0.7763,0.6621,0.7312,0.7057,0.6788,0.7247,0.7558,0.8677,0.8701,0.6788,0.8717
cifar100,tiny_imagenet [ood],0.8099,0.7895,0.8063,0.803,0.8054,0.8099,0.7895,0.8063,0.803,0.6229,0.8046,0.9999,0.9534,0.9944,0.9763,0.9999,0.9999,0.9534,0.9944,0.9763,0.6229,0.9996,0.9999,1.0,0.8099,0.6229,1.0
tiny_imagenet,imagenet_a [ood],0.8354,0.8272,0.8301,0.8263,0.8331,0.8354,0.8272,0.8301,0.8263,0.441,0.8324,0.8014,0.6511,0.7319,0.7129,0.755,0.8014,0.6511,0.7319,0.7129,0.441,0.7293,0.7811,0.8463,0.8354,0.441,0.8469
tiny_imagenet,imagenet_o [ood],0.7243,0.721,0.7226,0.7212,0.7234,0.7243,0.721,0.7226,0.7212,0.5127,0.7277,0.7552,0.6911,0.7236,0.7005,0.7368,0.7552,0.6911,0.7236,0.7005,0.5127,0.7213,0.7528,0.7535,0.7243,0.5127,0.7596
tiny_imagenet,imagenet_r [ood],0.8253,0.8162,0.8192,0.8155,0.8225,0.8253,0.8162,0.8192,0.8155,0.4048,0.8203,0.7933,0.657,0.7308,0.7122,0.7501,0.7933,0.657,0.7308,0.7122,0.4048,0.7238,0.7744,0.8366,0.8253,0.4048,0.8347




MISCLASSIFICATION DETECTION - CONCATENATED TABLE
------------------------------------------------------------
Shape: (15, 27)
Index: ['ind_dataset', 'eval']
Columns (27):

  BAYES_ALL_OUTER (5 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER
    - C_COMPOSITE_BAYES_ALL_OUTER

  BAYES_ALL_OUTER_PLUS_M (6 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M

  EXCESS_ALL_OUTER_INNER (5 columns):
    - R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - C_COMPOSITE_EXCESS_ALL_OUTE

Unnamed: 0_level_0,measure,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER,C_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER,C_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,M_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,C_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_t 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
cifar10,cifar10 [miscls],0.9423,0.9425,0.9422,0.9418,0.9423,0.9423,0.9425,0.9422,0.9418,0.9276,0.942,0.9416,0.9424,0.9419,0.7967,0.9426,0.9416,0.9424,0.9419,0.7967,0.9276,0.9418,0.9404,0.9432,0.9423,0.9276,0.9439
cifar10,cifar10 [selective],0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9963,0.9969,0.9969,0.9969,0.9967,0.9828,0.9969,0.9969,0.9969,0.9967,0.9828,0.9963,0.9968,0.9968,0.9969,0.9968,0.9963,0.9969
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9158,0.9169,0.9144,0.9145,0.9132,0.9122,0.9185,0.9072,0.9023,0.9024,0.7546,0.9059,0.9072,0.9023,0.9024,0.7546,0.9122,0.9103,0.9047,0.9115,0.9169,0.9122,0.9181
cifar10,svhn [ood],0.963,0.9584,0.9585,0.9563,0.961,0.963,0.9584,0.9585,0.9563,0.9343,0.9572,0.9458,0.9402,0.9421,0.8246,0.944,0.9458,0.9402,0.9421,0.8246,0.9343,0.9423,0.9426,0.9565,0.963,0.9343,0.9573
cifar10,tiny_imagenet [ood],0.9114,0.9086,0.9087,0.9072,0.9101,0.9114,0.9086,0.9087,0.9072,0.9103,0.914,0.8987,0.8932,0.8931,0.7524,0.8972,0.8987,0.8932,0.8931,0.7524,0.9103,0.9042,0.8957,0.9036,0.9114,0.9103,0.9123
cifar100,cifar10 [ood],0.7733,0.773,0.7734,0.7722,0.7734,0.7733,0.773,0.7734,0.7722,0.5348,0.7681,0.7367,0.6813,0.7216,0.6891,0.7206,0.7367,0.6813,0.7216,0.6891,0.5348,0.6928,0.7245,0.774,0.7733,0.5348,0.7649
cifar100,cifar100 [miscls],0.8451,0.8578,0.8562,0.859,0.8527,0.8451,0.8578,0.8562,0.859,0.5739,0.8527,0.8315,0.7826,0.8372,0.8063,0.8289,0.8315,0.7826,0.8372,0.8063,0.5739,0.8034,0.818,0.8531,0.8451,0.5739,0.8489
cifar100,cifar100 [selective],0.9159,0.9202,0.9197,0.9206,0.9186,0.9159,0.9202,0.9197,0.9206,0.8106,0.9173,0.9133,0.9,0.9162,0.8789,0.913,0.9133,0.9,0.9162,0.8789,0.8106,0.9013,0.9097,0.9184,0.9159,0.8106,0.9154
cifar100,svhn [ood],0.8701,0.8583,0.861,0.8559,0.8659,0.8701,0.8583,0.861,0.8559,0.6788,0.8677,0.7763,0.6621,0.7312,0.7057,0.7396,0.7763,0.6621,0.7312,0.7057,0.6788,0.7247,0.7558,0.8677,0.8701,0.6788,0.8717
cifar100,tiny_imagenet [ood],0.8099,0.7895,0.8063,0.803,0.8054,0.8099,0.7895,0.8063,0.803,0.6229,0.8046,0.9999,0.9534,0.9944,0.9763,0.9999,0.9999,0.9534,0.9944,0.9763,0.6229,0.9996,0.9999,1.0,0.8099,0.6229,1.0




SELECTIVE PREDICTION - CONCATENATED TABLE
------------------------------------------------------------
Shape: (15, 27)
Index: ['ind_dataset', 'eval']
Columns (27):

  BAYES_ALL_OUTER (5 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER
    - C_COMPOSITE_BAYES_ALL_OUTER

  BAYES_ALL_OUTER_PLUS_M (6 columns):
    - R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M
    - C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M

  EXCESS_ALL_OUTER_INNER (5 columns):
    - R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER
    - C_COMPOSITE_EXCESS_ALL_OUTER_INNER

Unnamed: 0_level_0,measure,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER,C_COMPOSITE_BAYES_ALL_OUTER,R_b 1 (L)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (B)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (S)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_b 1 (Z)_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,M_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,C_COMPOSITE_BAYES_ALL_OUTER_PLUS_M,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER,C_COMPOSITE_EXCESS_ALL_OUTER_INNER,R_e 1 2 (L)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (B)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (S)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 2 (Z)_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,M_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,C_COMPOSITE_EXCESS_ALL_OUTER_INNER_PLUS_M,R_e 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_t 1 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,R_b 1 (L)_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,M_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M,C_COMPOSITE_EAT_LOGSCORE_OUTER_OUTER_PLUS_M
ind_dataset,eval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
cifar10,cifar10 [miscls],0.9423,0.9425,0.9422,0.9418,0.9423,0.9423,0.9425,0.9422,0.9418,0.9276,0.942,0.9416,0.9424,0.9419,0.7967,0.9426,0.9416,0.9424,0.9419,0.7967,0.9276,0.9418,0.9404,0.9432,0.9423,0.9276,0.9439
cifar10,cifar10 [selective],0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9968,0.9963,0.9969,0.9969,0.9969,0.9967,0.9828,0.9969,0.9969,0.9969,0.9967,0.9828,0.9963,0.9968,0.9968,0.9969,0.9968,0.9963,0.9969
cifar10,cifar100 [ood],0.9169,0.9144,0.9145,0.9132,0.9158,0.9169,0.9144,0.9145,0.9132,0.9122,0.9185,0.9072,0.9023,0.9024,0.7546,0.9059,0.9072,0.9023,0.9024,0.7546,0.9122,0.9103,0.9047,0.9115,0.9169,0.9122,0.9181
cifar10,svhn [ood],0.963,0.9584,0.9585,0.9563,0.961,0.963,0.9584,0.9585,0.9563,0.9343,0.9572,0.9458,0.9402,0.9421,0.8246,0.944,0.9458,0.9402,0.9421,0.8246,0.9343,0.9423,0.9426,0.9565,0.963,0.9343,0.9573
cifar10,tiny_imagenet [ood],0.9114,0.9086,0.9087,0.9072,0.9101,0.9114,0.9086,0.9087,0.9072,0.9103,0.914,0.8987,0.8932,0.8931,0.7524,0.8972,0.8987,0.8932,0.8931,0.7524,0.9103,0.9042,0.8957,0.9036,0.9114,0.9103,0.9123
cifar100,cifar10 [ood],0.7733,0.773,0.7734,0.7722,0.7734,0.7733,0.773,0.7734,0.7722,0.5348,0.7681,0.7367,0.6813,0.7216,0.6891,0.7206,0.7367,0.6813,0.7216,0.6891,0.5348,0.6928,0.7245,0.774,0.7733,0.5348,0.7649
cifar100,cifar100 [miscls],0.8451,0.8578,0.8562,0.859,0.8527,0.8451,0.8578,0.8562,0.859,0.5739,0.8527,0.8315,0.7826,0.8372,0.8063,0.8289,0.8315,0.7826,0.8372,0.8063,0.5739,0.8034,0.818,0.8531,0.8451,0.5739,0.8489
cifar100,cifar100 [selective],0.9159,0.9202,0.9197,0.9206,0.9186,0.9159,0.9202,0.9197,0.9206,0.8106,0.9173,0.9133,0.9,0.9162,0.8789,0.913,0.9133,0.9,0.9162,0.8789,0.8106,0.9013,0.9097,0.9184,0.9159,0.8106,0.9154
cifar100,svhn [ood],0.8701,0.8583,0.861,0.8559,0.8659,0.8701,0.8583,0.861,0.8559,0.6788,0.8677,0.7763,0.6621,0.7312,0.7057,0.7396,0.7763,0.6621,0.7312,0.7057,0.6788,0.7247,0.7558,0.8677,0.8701,0.6788,0.8717
cifar100,tiny_imagenet [ood],0.8099,0.7895,0.8063,0.803,0.8054,0.8099,0.7895,0.8063,0.803,0.6229,0.8046,0.9999,0.9534,0.9944,0.9763,0.9999,0.9999,0.9534,0.9944,0.9763,0.6229,0.9996,0.9999,1.0,0.8099,0.6229,1.0





In [17]:
# Access individual concatenated tables
ood_concatenated = concatenated_results['ood_detection']
miscls_concatenated = concatenated_results['misclassification_detection']
selective_concatenated = concatenated_results['selective_prediction']

print("INDIVIDUAL ACCESS TO CONCATENATED TABLES")
print("=" * 50)
print("You can now access each concatenated table individually:")
print()
print("# OOD Detection (all composite measures side-by-side)")
print("ood_concatenated = concatenated_results['ood_detection']")
print(f"Shape: {ood_concatenated.shape if not ood_concatenated.empty else 'Empty'}")
print()
print("# Misclassification Detection (all composite measures side-by-side)")  
print("miscls_concatenated = concatenated_results['misclassification_detection']")
print(f"Shape: {miscls_concatenated.shape if not miscls_concatenated.empty else 'Empty'}")
print()
print("# Selective Prediction (all composite measures side-by-side)")
print("selective_concatenated = concatenated_results['selective_prediction']")
print(f"Shape: {selective_concatenated.shape if not selective_concatenated.empty else 'Empty'}")
print()
print("Each table has:")
print("- Rows: (ind_dataset, eval) pairs")
print("- Columns: Individual measures + Comp, grouped by composite measure")
print("- All composite measures are side-by-side in the same table")


INDIVIDUAL ACCESS TO CONCATENATED TABLES
You can now access each concatenated table individually:

# OOD Detection (all composite measures side-by-side)
ood_concatenated = concatenated_results['ood_detection']
Shape: (9, 27)

# Misclassification Detection (all composite measures side-by-side)
miscls_concatenated = concatenated_results['misclassification_detection']
Shape: (15, 27)

# Selective Prediction (all composite measures side-by-side)
selective_concatenated = concatenated_results['selective_prediction']
Shape: (15, 27)

Each table has:
- Rows: (ind_dataset, eval) pairs
- Columns: Individual measures + Comp, grouped by composite measure
- All composite measures are side-by-side in the same table


In [18]:
# Create separate pandas DataFrames for each problem type

# Convert each problem type table to a pandas DataFrame
problem_dataframes = {}
for problem_type in results['final_tables'].keys():
    problem_dataframes[problem_type] = pd.DataFrame(results['final_tables'][problem_type])
    print(f"\n{problem_type.upper()} Problem DataFrame:")
    print(f"Shape: {problem_dataframes[problem_type].shape}")
    print(problem_dataframes[problem_type])


OOD_DETECTION Problem DataFrame:
Shape: (9, 27)
measure                      R_b 1 (L)_v1  R_b 1 (B)_v1  R_b 1 (S)_v1  \
ind_dataset   ood_dataset                                               
cifar10       cifar100           0.916906      0.914436      0.914506   
              svhn               0.962992      0.958357      0.958530   
              tiny_imagenet      0.911364      0.908611      0.908679   
cifar100      cifar10            0.773270      0.773000      0.773388   
              svhn               0.870120      0.858293      0.860970   
              tiny_imagenet      0.809888      0.789521      0.806267   
tiny_imagenet imagenet_a         0.835350      0.827188      0.830074   
              imagenet_o         0.724312      0.720970      0.722622   
              imagenet_r         0.825339      0.816241      0.819225   

measure                      R_b 1 (Z)_v1      C_v1  R_b 1 (L)_v2  \
ind_dataset   ood_dataset                                           
cifar10  

In [19]:
problem_dataframes['ood_detection']

Unnamed: 0_level_0,measure,R_b 1 (L)_v1,R_b 1 (B)_v1,R_b 1 (S)_v1,R_b 1 (Z)_v1,C_v1,R_b 1 (L)_v2,R_b 1 (B)_v2,R_b 1 (S)_v2,R_b 1 (Z)_v2,M_v2,C_v2,R_e 1 2 (L)_v3,R_e 1 2 (B)_v3,R_e 1 2 (S)_v3,R_e 1 2 (Z)_v3,C_v3,R_e 1 2 (L)_v4,R_e 1 2 (B)_v4,R_e 1 2 (S)_v4,R_e 1 2 (Z)_v4,M_v4,C_v4,R_e 1 1 (L)_v5,R_t 1 1 (L)_v5,R_b 1 (L)_v5,M_v5,C_v5
ind_dataset,ood_dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
cifar10,cifar100,0.916906,0.914436,0.914506,0.913191,0.915758,0.916906,0.914436,0.914506,0.913191,0.912238,0.918528,0.907203,0.902342,0.902404,0.754648,0.905882,0.907203,0.902342,0.902404,0.754648,0.912238,0.910281,0.90473,0.911507,0.916906,0.912238,0.918087
cifar10,svhn,0.962992,0.958357,0.95853,0.956294,0.960983,0.962992,0.958357,0.95853,0.956294,0.934311,0.957215,0.945786,0.940173,0.942129,0.824603,0.943957,0.945786,0.940173,0.942129,0.824603,0.934311,0.942297,0.942616,0.956506,0.962992,0.934311,0.957326
cifar10,tiny_imagenet,0.911364,0.908611,0.908679,0.90721,0.910094,0.911364,0.908611,0.908679,0.90721,0.910273,0.914034,0.898685,0.893229,0.893123,0.752376,0.897247,0.898685,0.893229,0.893123,0.752376,0.910273,0.904191,0.895731,0.903615,0.911364,0.910273,0.912274
cifar100,cifar10,0.77327,0.773,0.773388,0.772177,0.773419,0.77327,0.773,0.773388,0.772177,0.534822,0.768139,0.736746,0.681292,0.72158,0.689059,0.720619,0.736746,0.681292,0.72158,0.689059,0.534822,0.69281,0.724548,0.774023,0.77327,0.534822,0.764887
cifar100,svhn,0.87012,0.858293,0.86097,0.855937,0.865872,0.87012,0.858293,0.86097,0.855937,0.678832,0.867749,0.77634,0.662081,0.731242,0.705718,0.739645,0.77634,0.662081,0.731242,0.705718,0.678832,0.724697,0.755848,0.867727,0.87012,0.678832,0.871659
cifar100,tiny_imagenet,0.809888,0.789521,0.806267,0.803022,0.805415,0.809888,0.789521,0.806267,0.803022,0.622941,0.804597,0.999942,0.953406,0.994416,0.976322,0.999854,0.999942,0.953406,0.994416,0.976322,0.622941,0.999611,0.999852,0.999997,0.809888,0.622941,0.999998
tiny_imagenet,imagenet_a,0.83535,0.827188,0.830074,0.826338,0.833061,0.83535,0.827188,0.830074,0.826338,0.440974,0.832422,0.801424,0.651126,0.731905,0.712905,0.754996,0.801424,0.651126,0.731905,0.712905,0.440974,0.729263,0.78113,0.846305,0.83535,0.440974,0.846915
tiny_imagenet,imagenet_o,0.724312,0.72097,0.722622,0.721199,0.723431,0.724312,0.72097,0.722622,0.721199,0.512686,0.727679,0.755245,0.691078,0.723552,0.7005,0.736824,0.755245,0.691078,0.723552,0.7005,0.512686,0.721285,0.752832,0.753524,0.724312,0.512686,0.759562
tiny_imagenet,imagenet_r,0.825339,0.816241,0.819225,0.815484,0.822548,0.825339,0.816241,0.819225,0.815484,0.404755,0.820272,0.793349,0.65699,0.730844,0.712239,0.750137,0.793349,0.65699,0.730844,0.712239,0.404755,0.723755,0.774366,0.836572,0.825339,0.404755,0.834687
