<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/safety_lambda_experiment_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install gymnasium minigrid torch matplotlib

In [None]:
import gymnasium as gym
import minigrid
import numpy as np
import torch
import matplotlib.pyplot as plt

# ===== 1. Safety-augmented environment =====
# We'll use MiniGrid-LavaGapS7-v0 as a simple safety testbed
env_id = "MiniGrid-LavaGapS7-v0"

# ===== 2. Simple SafeDreamer-like agent =====
class SafeDreamer(torch.nn.Module):
    def __init__(self, obs_dim, action_dim, cost_coef=1.0, lr=1e-3):
        super().__init__()
        self.obs_dim = obs_dim
        self.action_dim = action_dim
        self.cost_coef = cost_coef
        self.net = torch.nn.Sequential(
            torch.nn.Linear(obs_dim, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, action_dim)
        )
        self.opt = torch.optim.Adam(self.parameters(), lr=lr)

    def act(self, obs):
        obs_vec = self._flatten_obs(obs)
        logits = self.net(obs_vec)
        return torch.argmax(logits).item()

    def learn(self, obs, action, reward, violation):
        obs_vec = self._flatten_obs(obs)
        logits = self.net(obs_vec)
        target = torch.zeros(self.action_dim)
        target[action] = reward - self.cost_coef * violation
        loss = torch.nn.functional.mse_loss(logits, target)
        self.opt.zero_grad()
        loss.backward()
        self.opt.step()

    def _flatten_obs(self, obs):
        if isinstance(obs, dict) and "image" in obs:
            return torch.tensor(obs["image"], dtype=torch.float32).flatten()
        else:
            return torch.tensor(obs, dtype=torch.float32).flatten()

# ===== 3. Training loop for λ sweep =====
def train_with_lambda(env_id, lambdas, episodes=50):
    results = []
    for lam in lambdas:
        env = gym.make(env_id)
        sample_obs, _ = env.reset()
        obs_dim = torch.tensor(sample_obs["image"], dtype=torch.float32).numel()
        agent = SafeDreamer(obs_dim, env.action_space.n, cost_coef=lam)
        rewards, violations = [], []
        for ep in range(episodes):
            obs, _ = env.reset()
            done = False
            ep_reward, ep_viol = 0, 0
            while not done:
                action = agent.act(obs)
                next_obs, reward, terminated, truncated, info = env.step(action)
                done = terminated or truncated
                violation = 1 if info.get("lava_pos") else 0  # crude violation flag
                agent.learn(obs, action, reward, violation)
                obs = next_obs
                ep_reward += reward
                ep_viol += violation
            rewards.append(ep_reward)
            violations.append(ep_viol)
        env.close()
        results.append((lam, np.mean(rewards), np.mean(violations)))
        print(f"λ={lam:.2f} → reward={np.mean(rewards):.2f}, violations={np.mean(violations):.2f}")
    return results

# ===== 4. Run sweep =====
lambdas = [0.0, 0.1, 0.5, 1.0, 2.0, 5.0]
results = train_with_lambda(env_id, lambdas, episodes=100)

# ===== 5. Plot Pareto frontier =====
rewards = [r for _, r, _ in results]
violations = [v for _, _, v in results]

plt.figure(figsize=(6,4))
for (lam, r, v) in results:
    plt.scatter(v, r, label=f"λ={lam}")
plt.xlabel("Avg Violations per Episode")
plt.ylabel("Avg Reward per Episode")
plt.title("Reward–Violation Pareto Frontier")
plt.legend()
plt.grid(True)
plt.show()

# ===== 6. Compute AUC =====
# Sort by violations (x-axis)
sorted_points = sorted(zip(violations, rewards), key=lambda x: x[0])
x = [p[0] for p in sorted_points]
y = [p[1] for p in sorted_points]
auc = np.trapz(y, x)
print(f"Area under curve (AUC): {auc:.3f}")