In [1]:
import os
from openai import OpenAI
import gymnasium as gym
from agents.generator import Generator
from agents.reflector import Reflector
from agents.curator import Curator
from environments.FrozenLake import FrozenLake
from prompts.FrozenLakePrompt import FrozenLakePrompt

In [2]:
DEBUG = True
max_iterations = 10
# Pipeline Setup
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key= os.getenv("OPENROUTER_API_KEY")
) 
big_map = [
    "SFFFFFHFFFF",
    "FFFFFHFFFFF",
    "FFFFFFHFHFF",
    "FFFFFFFFFFF",
    "FFFFFFFFFFF",
    "FFFFFFFFFFF",
    "FFHFFFFFFFF",
    "FFFFFFFFFHF",
    "FFFFFFFFFFF",
    "FFFFFFFHFFF",
    "FFFFFFFFFFG"
]
medium_map = [
    "SFFFFHF",
    "FFFFFHF",
    "FHFFFFH",
    "FFFFFFF",
    "FFFHFFF",
    "FFHFFFG"
    ]
small_map = [
    "SFFF",
    "FFFH",
    "HFFH",
    "FHFF",
    "FFFG"
]
    
custom_map = small_map
model = "openai/gpt-oss-120b"
FrozenLakeGame = FrozenLake(env = gym.make("FrozenLake-v1",
                      render_mode="ansi", 
                      desc=custom_map,
                      map_name=None,
                      is_slippery=True,
                      success_rate=0.7,
                      reward_schedule=(1, 0, 0)))

# TODO: Update Refelecotr + Curator Prompts with Ground Truth
playbook = ""
reflection = ""
prompt = FrozenLakePrompt(playbook=playbook, reflection=reflection)

for i in range(max_iterations):
    FrozenLakeGame.reset() 
    if DEBUG:
        print(f"===== Iteration {i+1} =====")
        print("=== Playbook ===")
        print(prompt.getPlaybookAsString())
        print("================")


    # Generator Setup
    if DEBUG:
        print(f"===== Generator Run =====")
         
    model = "openai/gpt-oss-120b"
    generatorLake = Generator(client, model, FrozenLakeGame, prompt=prompt)
    generatorOutput, step = generatorLake.run(debug=DEBUG)
    generatorOutput = generatorOutput[1:] # skip generator prompt
    prompt.setGeneratorOutput(generatorOutput)
    
    if DEBUG:
        print("=======================")


    # Reflector Setup; Possible Iteration: Gen Traces -> Reflector Insights -> Gen Traces -> Improved Reflector Insights -> etc.
    if DEBUG:
        print(f"=== Reflector Run ===")
        print(f"== Prompt: {prompt.getReflectorPrompt()}")
    
    model = "openai/gpt-oss-120b"
    reflectorLake = Reflector(client, model, prompt)
    reflection = reflectorLake.run(debug=DEBUG)
    prompt.setReflection(reflection)
    
    if DEBUG:
        print("========================")


    # Curator Setup: TODO: Add Metadata: Count how often bulletpoint was marked helpful or harmful
    if DEBUG:
        print(f"=== Curator Run ===")
        print(f"== Prompt: {prompt.getCuratorPrompt()}")
    
    model = "openai/gpt-oss-120b"
    curatorLake = Curator(client, model, prompt)
    curatorLake.run(debug=DEBUG) # updates playbook
    prompt.refreshPlaybook()   
    
    if DEBUG:
        print("=======================")
        print(f"=== End of Iteration {i+1} ===\n\n")

===== Iteration 1 =====
=== Playbook ===
1. Safe Navigation Strategies
   - [1326dc23-065d-4f0f-8872-be525c72cd71] After each action, parse the returned state string line‑by‑line. Locate the pair of brackets "[ ]" that mark the agent’s current cell, count the lines from the top to get the row index and the characters between spaces to get the column index (using a consistent 0‑based or 1‑based convention). Update the internal position to these exact coordinates. If the observed position differs from the expected one, discard the current plan and recompute a safe path from the actual location. When a discrepancy is observed, record the specific action as *uncertain* (e.g., "move_right behaved like move_down") and treat future uses of that action as uncertain until re‑tested. Before issuing any move, verify that the intended target cell (based on the updated map) is free, and if an action is marked uncertain, first test it on a known safe neighboring cell to observe its actual effect bef