In [4]:
import json
import math
from pathlib import Path

METHOD_ORDER = [
    'sac_ae',
    'drqv2',
    'madi',
    'sada',
    'segdac_sac_sam_enc_decoder_q_cond',
]
METHOD_DISPLAY_NAMES = {
    'sac_ae': 'SAC AE',
    'drqv2': 'DrQ-v2',
    'madi': 'MaDi',
    'sada': 'SADA',
    'segdac_sac_sam_enc_decoder_q_cond': 'SegDAC'
}

TASK_DISPLAY_NAMES = {
    "pushcubetest-v1": "PushCube",
    "pullcubetest-v1": "PullCube", 
    "pickcubetest-v1": "PickCube",
    "pokecubetest-v1": "PokeCube",
    "pullcubetooltest-v1": "PullCubeTool",
    "liftpeguprighttest-v1": "LiftPegUpright",
    "unitreeg1placeappleinbowltest-v1": "PlaceAppleInBowl",
    "unitreeg1transportboxtest-v1": "TransportBox"
}

DIFFICULTY_ORDER = ['easy', 'medium', 'hard']

def load_and_prepare_data(json_glob_path):
    """
    Loads JSON files in the specified order and discovers the dimensions
    of the experiment, excluding the 'overall' task and sorting perturbations alphabetically.
    """
    print("Loading and preparing data...")
    all_files = list(Path().glob(json_glob_path))
    if not all_files:
        raise FileNotFoundError(
            f"No JSON files found matching pattern: {json_glob_path}")

    file_map = {f.name.replace(
        '_final_aggregated_scores-0.json', ''): f for f in all_files}
    all_data = {}
    ordered_methods = []
    for method_name in METHOD_ORDER:
        if method_name in file_map:
            ordered_methods.append(method_name)
            filepath = file_map[method_name]
            with filepath.open('r') as file:
                all_data[method_name] = json.load(file)
        else:
            raise FileNotFoundError(f"Data file for method '{method_name}' not found. Expected file: {filepath}")

    if not ordered_methods:
        raise ValueError("No data could be loaded for any of the specified methods.")

    first_method = ordered_methods[0]
    first_run_id = next(iter(all_data[first_method]))
    sample_data = all_data[first_method][first_run_id]
    tasks = sorted([t for t in sample_data.get(
        'no_perturb', {}).keys() if t != 'overall'])

    perturbation_tests = set()
    perturb_indiv_data = sample_data.get('perturb_indiv', {})
    if perturb_indiv_data:
        for difficulty in DIFFICULTY_ORDER:
            if difficulty in perturb_indiv_data:
                valid_tasks_in_difficulty = [
                    t for t in perturb_indiv_data[difficulty].keys() if t in tasks]
                if valid_tasks_in_difficulty:
                    first_task = valid_tasks_in_difficulty[0]
                    perturbation_tests.update(
                        perturb_indiv_data[difficulty][first_task].keys())

    print(f"Loaded data for {len(ordered_methods)} methods in the specified order.")
    return {
        'data': all_data,
        'methods': ordered_methods,
        'tasks': tasks,
        'perturbations': sorted(list(perturbation_tests))
    }

def get_cell_performance(all_data, method, task, difficulty, perturb_test):
    """
    Extracts IQM, CI, and calculates the relative performance change (delta)
    for a single table cell.
    """
    try:
        run_id = next(iter(all_data[method]))
        method_data = all_data[method][run_id]
        no_perturb_iqm = method_data['no_perturb'][task]['return']['iqm'][0]
        perturb_run = method_data['perturb_indiv'][difficulty][task][perturb_test]['return']
        iqm = perturb_run['iqm'][0]
        ci = perturb_run.get('ci')
        delta = ((iqm - no_perturb_iqm) / abs(no_perturb_iqm)) * \
            100 if no_perturb_iqm and no_perturb_iqm != 0 else 0
        return {'iqm': iqm, 'ci': ci, 'delta': delta}
    except (KeyError, IndexError, TypeError):
        return {'iqm': None, 'ci': None, 'delta': None}

def format_latex_cell(data, is_bold, is_underlined):
    """
    Formats the data into a compact single-line cell with performance and delta.
    Bolds the cell if it has the highest IQM, underlines if it has the best (highest) delta.
    """
    if data['iqm'] is None:
        return 'N/A'

    if data['ci'] and len(data['ci']) == 2:
        half_width = (data['ci'][1][0] - data['ci'][0][0]) / 2
        score_str = f"{data['iqm']:.2f}$\\pm${half_width:.2f}"
    else:
        score_str = f"{data['iqm']:.2f}"

    delta_str = f"({data['delta']:+.1f}\\%)"
    
    full_cell_str = f"{score_str} \\scriptsize{{{delta_str}}}"
    
    if is_bold and is_underlined:
        return f"\\textbf{{\\underline{{{full_cell_str}}}}}"
    elif is_bold:
        return f"\\textbf{{{full_cell_str}}}"
    elif is_underlined:
        return f"\\underline{{{full_cell_str}}}"
    else:
        return full_cell_str

def generate_latex_tables(json_glob_path, output_dir):
    """Main function to generate the complete LaTeX file with compact formatting."""
    try:
        experimental_data = load_and_prepare_data(json_glob_path)
    except (FileNotFoundError, ValueError, IndexError) as e:
        print(f"Error initializing data: {e}")
        return

    output_path = Path(output_dir) / 'appendix_perturbation_tables.tex'
    output_path.parent.mkdir(parents=True, exist_ok=True)
    all_methods = experimental_data['methods']

    with output_path.open('w') as f:
        f.write("% This file was automatically generated by a Python script.\n")
        f.write("% Required packages: \\usepackage{booktabs}, \\usepackage{array}\n")
        f.write("% Optional for better spacing: \\usepackage{tabularx}\n\n")

        for difficulty in DIFFICULTY_ORDER:
            for perturb_test in experimental_data['perturbations']:
                table_has_data = False
                table_body_rows = []
                
                for task in experimental_data['tasks']:
                    row_results = [get_cell_performance(
                        experimental_data['data'], m, task, difficulty, perturb_test) 
                        for m in all_methods]
                    
                    if any(r['iqm'] is not None for r in row_results):
                        table_has_data = True

                    # Find highest IQM (for bold)
                    valid_iqms = [r['iqm'] for r in row_results if r['iqm'] is not None]
                    max_iqm = max(valid_iqms) if valid_iqms else -math.inf
                    max_iqm_rounded = round(max_iqm, 2)

                    # Find highest delta (for underline) - higher is better (less negative or more positive)
                    valid_deltas = [r['delta'] for r in row_results if r['delta'] is not None]
                    max_delta = max(valid_deltas) if valid_deltas else -math.inf
                    max_delta_rounded = round(max_delta, 1)

                    formatted_cells = []
                    for r in row_results:
                        iqm_round = round(r['iqm'], 2) if r['iqm'] is not None else None
                        delta_round = round(r['delta'], 1) if r['delta'] is not None else None

                        is_bold = (iqm_round is not None and iqm_round == max_iqm_rounded)
                        is_underlined = (delta_round is not None and delta_round == max_delta_rounded)

                        formatted_cells.append(format_latex_cell(r, is_bold, is_underlined))

                    task_name = TASK_DISPLAY_NAMES.get(task, task)
                    table_body_rows.append(f"{task_name} & {' & '.join(formatted_cells)} \\\\")

                if not table_has_data:
                    continue

                # Generate table
                caption_perturb = perturb_test.replace('_', ' ').title()
                label_perturb = perturb_test.replace('_', '')
                header_names = [METHOD_DISPLAY_NAMES.get(m, m) for m in all_methods]

                col_format = f"l*{{{len(all_methods)}}}{{c}}"
                
                bold_header_names = [f"\\textbf{{{name}}}" for name in header_names]
                header_line = f"\\textbf{{Task}} & {' & '.join(bold_header_names)} \\\\"

                f.write(f"% Table for: {caption_perturb} ({difficulty})\n")
                f.write("\\begin{table}[htbp]\n")
                f.write("\\centering\n")
                f.write("\\scriptsize\n")
                f.write(f"\\caption{{{difficulty.title()} {caption_perturb}}}\n")
                f.write(f"\\label{{tab:appendix_{label_perturb}_{difficulty}}}\n")
                f.write("\\begin{adjustbox}{max width=\\textwidth}\n")
                f.write(f"\\begin{{tabular}}{{{col_format}}}\n")
                f.write("\\toprule\n")
                f.write(header_line + "\n")
                f.write("\\midrule\n")
                f.write("\n".join(table_body_rows) + "\n")
                f.write("\\bottomrule\n")
                f.write("\\end{tabular}\n")
                f.write("\\end{adjustbox}\n")
                f.write("\\end{table}\n\n")

    print(f"Success! Compact LaTeX tables generated at {output_path}")

JSON_GLOB_PATH = 'results/test/*_final_aggregated_scores-0.json'
OUTPUT_DIRECTORY = 'results/test/tables'

generate_latex_tables(JSON_GLOB_PATH, OUTPUT_DIRECTORY)

Loading and preparing data...
Loaded data for 5 methods in the specified order.
Success! Compact LaTeX tables generated at results/test/tables/appendix_perturbation_tables.tex
