# Analysis Notebook for Log Data

In [19]:
%matplotlib notebook

## Load Data from Archive

In [20]:
import os
import pickle

log_path = "./log"

groups = {}   # header_without_seed -> list of values dicts

for run_dir in os.listdir(log_path):
    run_path = os.path.join(log_path, run_dir)

    if not os.path.isdir(run_path):
        continue

    header_path = os.path.join(run_path, "header.pickle")
    values_path = os.path.join(run_path, "42.pickle")

    if not (os.path.exists(header_path) and os.path.exists(values_path)):
        continue

    # Load header
    with open(header_path, "rb") as f:
        header = pickle.load(f)

    # Remove seed
    header_no_seed = {k: v for k, v in header.items() if k != "seed"}

    # Convert dict to immutable & sortable key
    key = tuple(sorted(header_no_seed.items()))

    # Load values
    with open(values_path, "rb") as f:
        values = pickle.load(f)

    # Insert into groups
    groups.setdefault(key, []).append(values)
    print(groups)


{(('env_dim', 35), ('goal', (34, 17)), ('map_name', 'random_map'), ('max_pareto_paths', 20), ('number_of_simulations', 200), ('per_sim_budget', 150), ('root_selection_method', 'epsilon_clustering_for_nodes'), ('simulation_method', 'light_rollout'), ('start', (0, 17)), ('total_budget', 600000), ('tree_selection_method', 'ucb_child_selection')): [[{'values': {'step_count': 82, 'weight_shifted': 30.211108945287958, 'distance_to_goal': 0}, 'path': [(0, 17), (0, 16), (1, 16), (2, 16), (2, 17), (2, 18), (3, 18), (4, 18), (4, 17), (5, 17), (6, 17), (6, 18), (6, 19), (7, 19), (8, 19), (9, 19), (10, 19), (11, 19), (12, 19), (12, 18), (12, 17), (13, 17), (14, 17), (15, 17), (16, 17), (17, 17), (18, 17), (19, 17), (20, 17), (21, 17), (22, 17), (23, 17), (24, 17), (25, 17), (26, 17), (27, 17), (28, 17), (29, 17), (30, 17), (31, 17), (32, 17), (33, 17), (33, 16), (34, 16), (34, 17), (33, 17), (32, 17), (31, 17), (30, 17), (29, 17), (28, 17), (27, 17), (26, 17), (25, 17), (24, 17), (23, 17), (22, 17

## Visualize the Data
- This was only used to see the data once

In [21]:
# import matplotlib.pyplot as plt

# os.mkdir("tmp/")

# for idx, (group_key, run_values) in enumerate(groups.items()):
#     # Extract the metrics
#     steps = [v["step_count"] for v in run_values]
#     weights = [v["weight_shifted"] for v in run_values]

#     # --- Plot ---
#     plt.figure(figsize=(7, 5))
#     plt.scatter(steps, weights)

#     plt.xlabel("Steps")
#     plt.ylabel("Weight Shifted")
#     plt.grid(True, alpha=0.3)

#     # Create a readable title (shortened)
#     title_params = {k: v for k, v in group_key}
#     short_title = (
#         f"{title_params['map_name']}, "
#         f"dim={title_params['env_dim']}, "
#         f"T={title_params['total_budget']}, "
#         f"sim={title_params['simulation_method']}"
#     )

#     plt.title(f"Group {idx+1}: {len(run_values)} runs\n{short_title}")

#     plt.tight_layout()
#     plt.savefig(f"./tmp/fig-{idx}.png")
#     plt.close()

#     # Also print full parameters to console for reference
#     print(f"\nGroup {idx+1} parameters:")
#     for k, v in group_key:
#         print(f"  {k}: {v}")


# Analysis for Best Parameters

In [22]:
import matplotlib.pyplot as plt
from IPython.display import display

def best_run_for_group(run_values):
    """Return the best (steps, weight_shifted) for a group (minimizing both)."""
    best = min(
        run_values,
        key=lambda v: (v["step_count"], v["weight_shifted"])
    )
    return best["step_count"], best["weight_shifted"]


def pareto_best_groups(groups):
    """
    Returns a list of (group_key, steps, weight_shifted)
    that lie on the Pareto front.
    """
    # Step 1: compute representative best points
    reps = []
    for key, values in groups.items():
        s, w = best_run_for_group(values)
        reps.append((key, s, w))

    # Step 2: compute non-dominated points
    pareto_front = []

    for i, (key_i, s_i, w_i) in enumerate(reps):
        dominated = False
        for j, (key_j, s_j, w_j) in enumerate(reps):
            if i == j:
                continue

            # j dominates i
            if (s_j <= s_i and w_j <= w_i) and (s_j < s_i or w_j < w_i):
                dominated = True
                break

        if not dominated:
            pareto_front.append((key_i, s_i, w_i))

    return pareto_front


def visualize_pareto_front(groups):
    # Compute representative best points
    reps = []
    for key, values in groups.items():
        steps, weight = best_run_for_group(values)
        reps.append((key, steps, weight))

    # Compute Pareto front
    pareto_front = pareto_best_groups(groups)
    pareto_keys = {id(k): (s, w) for k, s, w in pareto_front}

    # Build figure
    plt.figure(figsize=(8, 6))

    # Plot all configurations
    for i, (key, s, w) in enumerate(reps):
        label = f"C{i}"

        if id(key) in pareto_keys:
            plt.scatter(s, w, color="red", s=60, label="Pareto-optimal" if i == 0 else "")
            plt.annotate(label, (s, w), color="red", fontsize=8)
        else:
            plt.scatter(s, w, color="blue", s=40, label="Dominated" if i == 0 else "")
            plt.annotate(label, (s, w), color="blue", fontsize=8)

    plt.xlabel("Best Step Count (minimize)")
    plt.ylabel("Best Weight Shifted (minimize)")
    plt.title("Pareto Front Across Parameter Configurations")
    plt.grid(True, alpha=0.3)
    plt.legend()

    plt.tight_layout()
    display(plt.gcf())
    plt.close()

    # Print textual summary
    print("\nPareto-optimal configurations:")
    for key, s, w in pareto_front:
        print(f"\nConfig (best steps={s}, weight={w}):")
        for k, v in key:
            print(f"  {k}: {v}")

print(groups)
front = pareto_best_groups(groups)

visualize_pareto_front(groups)

{(('env_dim', 35), ('goal', (34, 17)), ('map_name', 'random_map'), ('max_pareto_paths', 20), ('number_of_simulations', 200), ('per_sim_budget', 150), ('root_selection_method', 'epsilon_clustering_for_nodes'), ('simulation_method', 'light_rollout'), ('start', (0, 17)), ('total_budget', 600000), ('tree_selection_method', 'ucb_child_selection')): [[{'values': {'step_count': 82, 'weight_shifted': 30.211108945287958, 'distance_to_goal': 0}, 'path': [(0, 17), (0, 16), (1, 16), (2, 16), (2, 17), (2, 18), (3, 18), (4, 18), (4, 17), (5, 17), (6, 17), (6, 18), (6, 19), (7, 19), (8, 19), (9, 19), (10, 19), (11, 19), (12, 19), (12, 18), (12, 17), (13, 17), (14, 17), (15, 17), (16, 17), (17, 17), (18, 17), (19, 17), (20, 17), (21, 17), (22, 17), (23, 17), (24, 17), (25, 17), (26, 17), (27, 17), (28, 17), (29, 17), (30, 17), (31, 17), (32, 17), (33, 17), (33, 16), (34, 16), (34, 17), (33, 17), (32, 17), (31, 17), (30, 17), (29, 17), (28, 17), (27, 17), (26, 17), (25, 17), (24, 17), (23, 17), (22, 17

TypeError: list indices must be integers or slices, not str