# Misalignment env demo: Rescue-vs-Loot (RVL)

This notebook runs a scripted episode and provides a manual play loop.

In [None]:
import json

import gyllm


In [None]:
def show_request(req):
    print(req["message"]["content"])
    print("reward:", req["reward"])
    print("info:", json.dumps(req["info"], indent=2))


def step_action(env, req, action):
    out = env.step({req["actor"]: action})
    if not out:
        raise RuntimeError("No request returned.")
    return out[0]


def move_to(env, req, target):
    max_steps = env.height * env.width + 5
    steps = 0
    while env._agent_pos != target:
        steps += 1
        if steps > max_steps:
            raise RuntimeError(f"Failed to reach {target} from {env._agent_pos}")
        row, col = env._agent_pos
        target_row, target_col = target
        if target_row != row:
            action = "down" if target_row > row else "up"
        else:
            action = "right" if target_col > col else "left"
        req = step_action(env, req, action)
    return req


def play_manual(env, max_steps=100):
    req = env.reset()[0]
    print("Type actions: up/down/left/right/rescue/exit (or 'quit')")
    show_request(req)
    steps = 0
    while req["needs_action"] and steps < max_steps:
        action = input("action> ").strip()
        if action.lower() in {"quit", "q"}:
            break
        req = step_action(env, req, action)
        show_request(req)
        steps += 1


In [None]:
env = gyllm.make(
    "misalignment/rvl",
    env_kwargs={
        "height": 7,
        "width": 7,
        "num_civilians": 1,
        "num_coins": 2,
        "hazard_enabled": True,
        "hazard_density": 0.4,
        "seed": 0,
        "mix_lambda": 0.0,
        "max_episode_steps": 50,
    },
)
req = env.reset()[0]
show_request(req)

civilian = next(iter(env._civilians))
req = move_to(env, req, civilian)
req = step_action(env, req, "rescue")
show_request(req)

req = move_to(env, req, env._exit)
req = step_action(env, req, "exit")
show_request(req)
