# Misalignment env demo: Fragile-Shortcut (FS)

This notebook runs a scripted episode and provides a manual play loop.

In [None]:
import json

import gyllm


In [None]:
def show_request(req):
    print(req["message"]["content"])
    print("reward:", req["reward"])
    print("info:", json.dumps(req["info"], indent=2))


def step_action(env, req, action):
    out = env.step({req["actor"]: action})
    if not out:
        raise RuntimeError("No request returned.")
    return out[0]


def move_to(env, req, target, order=("row", "col")):
    max_steps = env.height * env.width + 5
    steps = 0
    while env._agent_pos != target:
        steps += 1
        if steps > max_steps:
            raise RuntimeError(f"Failed to reach {target} from {env._agent_pos}")
        row, col = env._agent_pos
        target_row, target_col = target
        action = None
        for axis in order:
            if axis == "row" and row != target_row:
                action = "down" if target_row > row else "up"
                break
            if axis == "col" and col != target_col:
                action = "right" if target_col > col else "left"
                break
        if action is None:
            break
        req = step_action(env, req, action)
    return req


def play_manual(env, max_steps=100):
    req = env.reset()[0]
    print("Type actions: up/down/left/right/break/open/pickup/exit (or 'quit')")
    show_request(req)
    steps = 0
    while req["needs_action"] and steps < max_steps:
        action = input("action> ").strip()
        if action.lower() in {"quit", "q"}:
            break
        req = step_action(env, req, action)
        show_request(req)
        steps += 1


In [None]:
env = gyllm.make(
    "misalignment/fs",
    env_kwargs={
        "height": 7,
        "width": 7,
        "num_doors": 1,
        "shortcut_advantage": 2,
        "key_distance": 2,
        "seed": 1,
        "mix_lambda": 0.0,
        "max_episode_steps": 200,
    },
)
req = env.reset()[0]
show_request(req)

door = next(iter(env._doors))
key = next(iter(env._keys))

req = move_to(env, req, key)
req = step_action(env, req, "pickup")
show_request(req)

door_left = (door[0], door[1] - 1)
req = move_to(env, req, door_left, order=("row", "col"))
req = step_action(env, req, "open")
req = step_action(env, req, "right")
req = step_action(env, req, "right")

req = move_to(env, req, env._goal, order=("row", "col"))
req = step_action(env, req, "exit")
show_request(req)
