# 06 - Task Completion Analysis

Computes sparse reward task completion metrics.

**Output:**
- `/content/drive/MyDrive/task_completion_per_seed.csv`
- `/content/drive/MyDrive/task_completion_agg.csv`

In [None]:
# ============================================
# Script B: Sparse Reward Task Completion
# ============================================
import os
import pandas as pd
import numpy as np

# Path to your training results CSV (adjust if needed)
RESULTS_CSV = "/content/drive/MyDrive/results_combined_new.csv"

# Output paths
TASK_PER_SEED = "/content/drive/MyDrive/task_completion_per_seed.csv"
TASK_AGG      = "/content/drive/MyDrive/task_completion_agg.csv"

# Penalty floor for the sparse reward regime.
# For asymmetric_advantages this is around -40 in your runs.
# You can tweak this if you want a different floor.
R_MIN = -40.0

print(f"Loading training results from: {RESULTS_CSV}")
df = pd.read_csv(RESULTS_CSV)

# Only use final policies
df_final = df[df["phase"] == "final"].copy()

expected_cols = {
    "baseline", "env", "seed",
    "phase", "mean_return", "std_dev", "train_minutes"
}
missing = expected_cols - set(df_final.columns)
if missing:
    raise ValueError(f"CSV is missing columns: {missing}")

# Clean env labels a bit
df_final["env_clean"] = df_final["env"].str.strip()

# One row per (baseline, env, seed)
grouped = (
    df_final
    .groupby(["baseline", "env_clean", "seed"], as_index=False)
    .agg(
        mean_return=("mean_return", "mean"),
        std_return=("mean_return", "std"),   # across any dup rows (usually just 1)
        mean_std_dev=("std_dev", "mean"),    # mean of per-run episode stds
    )
)

# ----- Completion metrics -----
# completion_score = distance above penalty floor
grouped["completion_score"] = grouped["mean_return"] - R_MIN
# normalized to approx [0, 1] for R in [R_MIN, 0]
grouped["completion_norm"]  = grouped["completion_score"] / abs(R_MIN)

# Save per-seed metrics
grouped.to_csv(TASK_PER_SEED, index=False)
print(f"Saved per-seed task completion metrics to: {TASK_PER_SEED}")

# ----- Aggregate over seeds per (baseline, env) -----
agg = (
    grouped
    .groupby(["baseline", "env_clean"], as_index=False)
    .agg(
        mean_return_mean=("mean_return", "mean"),
        mean_return_std=("mean_return", "std"),

        completion_score_mean=("completion_score", "mean"),
        completion_score_std=("completion_score", "std"),

        completion_norm_mean=("completion_norm", "mean"),
        completion_norm_std=("completion_norm", "std"),
    )
)

agg.to_csv(TASK_AGG, index=False)
print(f"Saved aggregated task completion metrics to: {TASK_AGG}")

print("\nPreview (aggregated task-completion stats):")
print(agg.sort_values(["env_clean", "baseline"]))