In [1]:
import numpy as np
import glob
import json
import matplotlib.pyplot as plt
import os
import pandas as pd

In [None]:
def load_runs(results_folder, problem_idx):
    problem_folder = os.path.join(results_folder, f"problem_{problem_idx}")
    if not os.path.isdir(problem_folder):
        raise ValueError(f"Problem folder not found: {problem_folder}")

    seed_folders = [os.path.join(problem_folder, d) for d in os.listdir(problem_folder)
                    if os.path.isdir(os.path.join(problem_folder, d)) and d.startswith("seed_")]
    
    metrics_list = []
    for seed_folder in seed_folders:
        metrics_path = os.path.join(seed_folder, "metrics.json")
        if os.path.isfile(metrics_path):
            with open(metrics_path) as f:
                metrics = json.load(f)
            metrics_list.append(metrics)
    
    if not metrics_list:
        raise ValueError("No metrics.json files found for this problem.")

    return metrics_list

def average_metrics(metrics_list, metric_name):
    """
    Averages a specific metric across different runs/seeds by iteration.

    Args:
        metrics_list (list): A list of metrics dictionaries from different runs.
        metric_name (str): The name of the metric to average (e.g., 'y_regret_pool').

    Returns:
        tuple: A tuple containing:
            - np.ndarray: The average of the metric per iteration.
            - np.ndarray: The standard deviation of the metric per iteration.
    """
    metric_values_all_runs = [metrics[metric_name] for metrics in metrics_list]
    
    # Ensure all runs have the same number of iterations
    it_counts = [len(run) for run in metric_values_all_runs]
    if len(set(it_counts)) > 1:
        print("Warning: Runs have different numbers of iterations. Truncating to the shortest run.")
        min_its = min(it_counts)
        metric_values_all_runs = [run[:min_its] for run in metric_values_all_runs]

    metric_array = np.array(metric_values_all_runs)
    num_runs = metric_array.shape[0]

    avg_metric = np.mean(metric_array, axis=0)
    std_metric = np.std(metric_array, axis=0)
    std_err = std_metric / np.sqrt(num_runs)
    
    return avg_metric, std_metric, std_err

In [30]:
metrics_bo_ucb = load_runs("results/GP-Forrester-UCB-BO-q09", 0)
avg_regret, std_regret, std_err_regret = average_metrics(metrics_bo_ucb, "y_regret_pool")

0.9940339968509615
1.2762338406615836
0.5823877826021331
0.9282537209441484
1.5547865512581656
