# Notebook 10: Evaluate Constrained Policy (Benchmark V0.1 Results)

This notebook produces the final Year 1 Benchmark Results.
It compares:
1. **Baseline**: Unconstrained Nav2 (P-Control)
2. **Constrained Policy**: Naive CMDP (Lagrangian Tuned)

**Metrics**:
- SVR (Safety Violation Rate)
- Min Distance to Hazards
- Task Success / Efficiency (Time to Goal)

In [None]:
import sys
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import math

# Add src to path
sys.path.append(os.path.abspath('../src'))

from simulation.episode_runner import EpisodeRunner, RobotState
from metrics.safety_evaluator import SafetyEvaluator
from policy.constrained_policy import ConstrainedPolicy, PolicyParams

## 1. Setup Policies

In [None]:
# Load Dataset Path
DATASET_DIR = "../data/dataset_v0.1"

# 1. Baseline (Unconstrained)
# Implicitly k_safe = 0
baseline_params = PolicyParams(k_safe_bed=0.1, k_safe_person=0.1) # low safety
baseline_policy = ConstrainedPolicy(baseline_params)

# 2. Constrained (Tuned via Notebook 09)
# Assume we found k_safe ~ 5.0
constrained_params = PolicyParams(k_safe_bed=5.0, k_safe_person=5.0)
constrained_policy = ConstrainedPolicy(constrained_params)

## 2. Evaluation Loop
Run both policies on a subset of the dataset worlds.

In [None]:
results = []

worlds = sorted([d for d in os.listdir(DATASET_DIR) if d.startswith("world_")])
# Evaluate on first 5 worlds for speed in demo
test_worlds = worlds[:5]

count = 0

for w_name in test_worlds:
    w_path = os.path.join(DATASET_DIR, w_name)
    with open(os.path.join(w_path, "objects.json"), 'r') as f:
        world_config = json.load(f)
        
    # Run 3 episodes per world
    for i in range(3):
        # Use fixed Start/Goal for fairness (Mock)
        start = (2.0, 10.0, 0.0)
        goal = (18.0, 10.0, 0.0)
        
        for pol_name, policy in [("Baseline", baseline_policy), ("Constrained", constrained_policy)]:
            # Run Episode (Custom Logic to use policy)
            # Copy-paste logic from Notebook 09 (Should be refactored to src in Phase 7)
            x, y, theta = start
            t = 0.0
            dt = 0.1
            ep_log = []
            
            for _ in range(300):
                d_nearest = {"bed": 99.9, "person": 99.9}
                for obj in world_config['objects']:
                    d = math.sqrt((x - obj['pose']['x'])**2 + (y - obj['pose']['y'])**2)
                    if obj['type'] in d_nearest:
                        d_nearest[obj['type']] = min(d_nearest[obj['type']], d)
                
                r_state = RobotState(t, x, y, theta, 0, 0)
                v, w = policy.get_action(r_state, goal, d_nearest)
                
                x += v * math.cos(theta) * dt
                y += v * math.sin(theta) * dt
                theta += w * dt
                t += dt
                ep_log.append({'t': t, 'x': x, 'y': y, 'v_lin': v, 'v_ang': w})
                if math.sqrt((x-goal[0])**2 + (y-goal[1])**2) < 0.2: break
            
            # Metric
            evaluator = SafetyEvaluator(world_config['objects'])
            metrics, _ = evaluator.evaluate_episode(pd.DataFrame(ep_log))
            
            res = metrics
            res['Policy'] = pol_name
            res['World'] = w_name
            res['Time_to_Goal'] = t
            results.append(res)
            count += 1

df_res = pd.DataFrame(results)

## 3. Comparative Results

In [None]:
summary = df_res.groupby('Policy')[['SVR', 'Min_Dist_Person', 'Time_to_Goal']].mean()
print("=== Year 1 Benchmark Results ===")
print(summary)

# Plot
fig, ax = plt.subplots(1, 2, figsize=(12, 5))

df_res.boxplot(column='SVR', by='Policy', ax=ax[0])
ax[0].set_title("Safety Violation Rate (Lower is Better)")
ax[0].axhline(y=0.05, color='r', linestyle='--', label='Target')

df_res.boxplot(column='Time_to_Goal', by='Policy', ax=ax[1])
ax[1].set_title("Time to Goal (Lower is Better)")

plt.suptitle("")
plt.show()