In [None]:
# Cell 1 – Imports and setup
import numpy as np
import matplotlib.pyplot as plt
from env.generalized_env import GeneralizedOvercooked  # Your multi-layout wrapper
from models.ppo import PPOAgent  # Your custom PPO class

# Use inline plotting
%matplotlib inline

In [None]:
# Cell 2 – Initialize environment and agent

# Define training layouts
layouts = ['cramped_room', 'counter_circuit_o', 'coordination_ring']
env = GeneralizedOvercooked(layouts)

# Get dimensions
obs_dim = env.observation_space.shape[0]
act_dim = env.action_space.n

# Initialize shared PPO agent for both players
agent = PPOAgent(obs_dim=obs_dim, act_dim=act_dim)


In [None]:
# Cell 3 – Run one evaluation episode (no training)

obs = env.reset()
done = False
episode_reward = 0
frames = []

while not done:
    # Split the joint observation into two agent views
    obs_0 = obs[:obs_dim // 2]
    obs_1 = obs[obs_dim // 2:]

    # Both agents share the same policy for now
    act_0, _ = agent.select_action(obs_0)
    act_1, _ = agent.select_action(obs_1)

    # Step in environment
    obs, reward, done, info = env.step([act_0, act_1])
    episode_reward += reward

    # Render RGB frame
    frame = env.cur_env.render()
    frames.append(frame)

print(f"Episode finished. Total reward: {episode_reward}")


In [None]:
# Cell 4 – Display last frame (optional)

from PIL import Image
from IPython.display import display

# Show the last frame of the episode
display(Image.fromarray(frames[-1]))


In [None]:
# Cell 5 – Animate episode (optional)

from IPython.display import HTML
from matplotlib import animation

fig = plt.figure(figsize=(5, 5))
plt.axis('off')
im = plt.imshow(frames[0])

def animate(i):
    im.set_array(frames[i])
    return [im]

anim = animation.FuncAnimation(fig, animate, frames=len(frames), interval=100)
HTML(anim.to_jshtml())


In [None]:
anim.save("episode.gif", writer="pillow", fps=10)
