In [None]:
import os
from openai import OpenAI
import gymnasium as gym
from agents.generator import Generator
from agents.reflector import Reflector
from agents.curator import Curator
from environments.FrozenLake import FrozenLake
from prompts.FrozenLakePrompt import FrozenLakePrompt

In [None]:
DEBUG = True
max_iterations = 3
# Pipeline Setup
client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key= os.getenv("OPENROUTER_API_KEY")
) 
custom_map = [
    "SFFF",
    "FFFH",
    "HFFH",
    "FHFF",
    "FFFG"
]
model = "x-ai/grok-4-fast"
FrozenLakeGame = FrozenLake(env = gym.make("FrozenLake-v1",
                      render_mode="ansi", 
                      desc=custom_map,
                      map_name=None,
                      is_slippery=True,
                      success_rate=0.66,
                      reward_schedule=(1, 0, 0)))

# TODO: Update Refelecotr + Curator Prompts with Ground Truth
playbook = ""
reflection = ""
prompt = FrozenLakePrompt(playbook=playbook, reflection=reflection)

for i in range(max_iterations):
    FrozenLakeGame.reset() 
    if DEBUG:
        print(f"===== Iteration {i+1} =====")
        print("=== Playbook ===")
        print(prompt.getPlaybookAsString())
        print("================")


    # Generator Setup
    if DEBUG:
        print(f"===== Generator Run =====")
         
    model = "x-ai/grok-4-fast"
    generatorLake = Generator(client, model, FrozenLakeGame, prompt=prompt)
    generatorOutput, step = generatorLake.run(debug=DEBUG)
    prompt.setGeneratorOutput(generatorOutput)
    
    if DEBUG:
        print("=======================")


    # Reflector Setup; Possible Iteration: Gen Traces -> Reflector Insights -> Gen Traces -> Improved Reflector Insights -> etc.
    if DEBUG:
        print(f"=== Reflector Run ===")
        print(f"== Prompt: {prompt.getReflectorPrompt()}")
    
    model = "openai/gpt-4o"
    reflectorLake = Reflector(client, model, prompt)
    reflection = reflectorLake.run(debug=DEBUG)
    prompt.setReflection(reflection)
    
    if DEBUG:
        print("========================")


    # Curator Setup: TODO: Add Metadata: Count how often bulletpoint was marked helpful or harmful
    if DEBUG:
        print(f"=== Curator Run ===")
        print(f"== Prompt: {prompt.getCuratorPrompt()}")
    
    model = "x-ai/grok-4-fast"
    curatorLake = Curator(client, model, prompt)
    curatorLake.run(debug=DEBUG) # updates playbook
    prompt.refreshPlaybook()   
    
    if DEBUG:
        print("=======================")
        print(f"=== End of Iteration {i+1} ===\n\n")