# GYLLM env API manual walkthrough

This notebook shows how to interact with the env API directly:
- create an env
- reset and inspect Request objects
- step with manual actions


In [1]:
import gyllm


## Create an environment


In [2]:
env = gyllm.make("simple/reverse_echo", env_kwargs={"num_turns": 2})


## Reset and inspect the first request


In [3]:
requests = env.reset()
initial_request = requests[0]
requests


[{'actor': 'agent',
  'reward': 0.0,
  'system_message': {'role': 'system',
   'content': 'You are in ReverseEcho.\nEach turn, the environment will send you a message.\nYour task is to reply with the exact same message.\nDo not add extra words.\nLeading/trailing whitespace is ignored.\n'},
  'message': {'role': 'user', 'content': 'lemon'},
  'needs_action': True,
  'info': {'turn': 0, 'num_turns': 2},
  'episode_id': 0,
  'episode_start': True,
  'episode_end': False}]

## Reply with a manual action


In [4]:
request = requests[0]
actor = request["actor"]
message = request["message"]["content"]
actions = {actor: message}  # ReverseEcho expects the exact message
requests = env.step(actions)
requests


[{'actor': 'agent',
  'reward': 1.0,
  'message': {'role': 'user', 'content': 'apple'},
  'needs_action': True,
  'info': {'turn': 1, 'num_turns': 2},
  'episode_id': 0,
  'episode_start': False,
  'episode_end': False}]

## Finish the second turn


In [5]:
request = requests[0]
actor = request["actor"]
message = request["message"]["content"]
actions = {actor: message}
requests = env.step(actions)
requests


[{'actor': 'agent',
  'reward': 1.0,
  'message': {'role': 'user', 'content': 'Done.'},
  'needs_action': False,
  'info': {'turn': 2, 'num_turns': 2},
  'episode_id': 0,
  'episode_start': False,
  'episode_end': True}]

## Inspect the system prompt


In [6]:
initial_request["system_message"]


{'role': 'system',
 'content': 'You are in ReverseEcho.\nEach turn, the environment will send you a message.\nYour task is to reply with the exact same message.\nDo not add extra words.\nLeading/trailing whitespace is ignored.\n'}

## Batched environments

Pass `num_envs` to get a batched env. Actor IDs include env metadata, and each initial request carries a `system_message`.


In [7]:
batched_env = gyllm.make("simple/reverse_echo", env_kwargs={"num_turns": 1}, num_envs=2)
batched_requests = batched_env.reset()
[(r["actor"], r["episode_start"], r["needs_action"]) for r in batched_requests]


[('agent::env=0', True, True), ('agent::env=1', True, True)]

In [8]:
[(r["actor"], r["system_message"]["role"], r["message"]["content"]) for r in batched_requests]


[('agent::env=0', 'system', 'nebula'), ('agent::env=1', 'system', 'falcon')]

In [9]:
batched_actions = {r["actor"]: r["message"]["content"] for r in batched_requests}
batched_step = batched_env.step(batched_actions)
[(r["actor"], r["episode_end"], r["needs_action"]) for r in batched_step]


[('agent::env=0', True, False), ('agent::env=1', True, False)]

## Batched + AutoResetWrapper

Auto-reset will append new episode-start requests when an env finishes.


In [14]:
from gyllm.envs import AutoResetWrapper

autoreset_env = AutoResetWrapper(
    gyllm.make("simple/reverse_echo", env_kwargs={"num_turns": 1}, num_envs=2)
)
autoreset_requests = autoreset_env.reset()
autoreset_actions = {r["actor"]: r["message"]["content"] for r in autoreset_requests}
autoreset_step = autoreset_env.step(autoreset_actions)
[(r["actor"], r["episode_start"], r["episode_end"], r["needs_action"], r["episode_id"]) for r in autoreset_step]


[('agent::env=0', False, True, False, 0),
 ('agent::env=1', False, True, False, 0),
 ('agent::env=0', True, False, True, 1),
 ('agent::env=1', True, False, True, 1)]