In [5]:
import wandb
import numpy as np
from tabulate import tabulate

api = wandb.Api()

entity = "tim-walter-tum"
project = "Safe Differentiable Reinforcement Learning"

runs = api.runs(f"{entity}/{project}")

In [None]:
group_map = {}
env_translation = {
    "Pendulum": "BalancePendulum",
    "Quadrotor": "BalanceQuadrotor",
    "Energy System": "LoadBalanceHousehold"
}
safeguard_translation = {
    "No Safeguard": "NP",
    "Boundary Projection Base": "P",
    "Boundary Projection Regularised": "P",
    "Ray Mask Base": "ZRP-Lin",
    "Ray Mask Regularised": "ZRP-Lin",
    "Ray Mask Passthrough": "ZRP-Lin-PT",
    "Hyperbolic Ray Mask Base": "ZRP-Tanh",
    "Hyperbolic Ray Mask Regularised": "ZRP-Tanh",
}

for env in env_translation.keys():
    for algo in ["SHAC", "SAC", "PPO"]:
        for safeguard in safeguard_translation.keys():
            key = env_translation[env] + "-" + "" + "-" + algo + "" if "Regularised" not in safeguard else "-Reg"
            if env == "Energy System":
                key = key.replace("ZRP", "ZRM")
                key = key.replace("P", "BP")
            group_map[key] = (env, algo, safeguard)

stuck_runs = {
    "Pendulum": {
        "SHAC": {
            "No Safeguard": ["likely-paper-5140"],
            "Boundary Projection Base": ["clean-universe-5155", "trim-shadow-5153"],
            "Boundary Projection Regularised": ["turtle-brulee-5032"],
            "Ray Mask Base": [],
            "Ray Mask Regularised": [],
            "Ray Mask Passthrough": ["happy-shadow-5213", "boysenberry-strudel-5040"],
            "Hyperbolic Ray Mask Base": ["different-terrain-5205"],
            "Hyperbolic Ray Mask Regularised": [""],
        },
        "PPO": {
            "No Safeguard": [],
            "Boundary Projection Base": [],
            "Ray Mask Base": [],
        },
        "SAC": {
            "No Safeguard": [],
            "Boundary Projection Base": [],
            "Ray Mask Base": [],
        }},
    "Quadrotor": {
        "SHAC": {
            "No Safeguard": [],
            "Boundary Projection Base": [],
            "Boundary Projection Regularised": [],
            "Ray Mask Base": [],
            "Ray Mask Regularised": [],
            "Ray Mask Passthrough": ["comfy-gorge-5215"],
            "Hyperbolic Ray Mask Base": [],
            "Hyperbolic Ray Mask Regularised": [],
        },
        "PPO": {
            "No Safeguard": [],
            "Boundary Projection Base": [],
            "Ray Mask Base": [],
        },
        "SAC": {
            "No Safeguard": [],
            "Boundary Projection Base": [],
            "Ray Mask Base": [],
        }},
    "Energy System": {
        "SHAC": {
            "No Safeguard": [],
            "Boundary Projection Base": [],
            "Boundary Projection Regularised": [],
            "Ray Mask Base": [],
            "Ray Mask Regularised": [],
            "Ray Mask Passthrough": [],
            "Hyperbolic Ray Mask Base": [],
            "Hyperbolic Ray Mask Regularised": [],
        },
        "PPO": {
            "No Safeguard": [],
            "Boundary Projection Base": [],
            "Ray Mask Base": [],
        },
        "SAC": {
            "No Safeguard": [],
            "Boundary Projection Base": [],
            "Ray Mask Base": [],
        }},
}

In [7]:
results = {}

for run in api.runs(f"{entity}/{project}"):
    group = run.group
    if group not in group_map:
        continue
    env, algo, safeguard = group_map[group]
    if run.id in stuck_runs[env][algo][safeguard]:
        continue

    # Rewards und Steps extrahieren
    rewards = run.history(keys=["eval/Episodic Reward"])["eval/Episodic Reward"].dropna().values
    steps = run.history(keys=["global_step"])["global_step"].dropna().values
    if len(rewards) == 0 or len(steps) == 0:
        continue

    final_reward = rewards[-1]
    # Schritte bis 5% an final_reward
    threshold = final_reward * 0.95 if final_reward > 0 else final_reward * 1.05
    if final_reward > 0:
        idx = np.where(rewards >= threshold)[0]
    else:
        idx = np.where(rewards <= threshold)[0]
    steps_to_best = steps[idx[0]] if len(idx) > 0 else steps[-1]

    results[env].append({
        "Algorithm": algo,
        "Steps": steps_to_best,
        "Reward": final_reward,
    })

# Tabelle vorbereiten
table = []
for env in ["Pendulum", "Quadrotor"]:
    algos = ["SHAC", "PPO", "SAC"]
    table.append([f"\\multicolumn{{6}}{{c}}{{{env}}}"])
    table.append(["Algorithm", "# Steps", "Reward", "Std", "# Stuck", ""])
    for algo in algos:
        runs = [r for r in results[env] if r["Algorithm"] == algo]
        rewards = [r["Reward"] for r in runs]
        steps = [r["Steps"] for r in runs]
        n_stuck = len(stuck_runs.get(env, {}).get(algo, []))
        if rewards:
            mean_reward = np.mean(rewards)
            std_reward = np.std(rewards)
            mean_steps = int(np.mean(steps))
        else:
            mean_reward = std_reward = mean_steps = "-"
        table.append([algo, mean_steps, f"{mean_reward:.2f}", f"{std_reward:.2f}", n_stuck, ""])

# LaTeX-Tabelle ausgeben
print(tabulate(table, tablefmt="latex_raw"))

SyntaxError: invalid syntax (867653196.py, line 2)