Import libraries

In [1]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

Initialize the environment

In [3]:
env_id = "LunarLander-v3"

# Create vectorized environments
vec_env = make_vec_env(env_id, n_envs=12, seed=42)

Initialize the agent

In [5]:
ppo_kwargs = dict(
    n_steps=1024,
    batch_size=64,
    n_epochs=6,
    gamma=0.999,
    gae_lambda=0.98,
    ent_coef=0.005,
    vf_coef=0.5,
)

# PPO agent with an MLP policy
model = PPO(
    "MlpPolicy",
    vec_env,
    verbose=1,
    **ppo_kwargs
)

Using cuda device


Train the agent

In [6]:
model.learn(total_timesteps=5_000_000)
model.save("ppo_lunarlander")

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91       |
|    ep_rew_mean     | -192     |
| time/              |          |
|    fps             | 2623     |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 12288    |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 92          |
|    ep_rew_mean          | -136        |
| time/                   |             |
|    fps                  | 1259        |
|    iterations           | 2           |
|    time_elapsed         | 19          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.009620388 |
|    clip_fraction        | 0.0829      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -5.66e-05   |
|    learning_rate        | 0.

Visualize the agent

In [8]:
import numpy as np
import imageio
import tempfile
from IPython.display import Video, display
import os
os.environ['IMAGEIO_FFMPEG_EXE'] = "ffmpeg"

# Load the trained PPO agent
model = PPO.load("ppo_lunarlander")

# Create evaluation env in rgb_array mode to capture frames
eval_env = gym.make("LunarLander-v3", render_mode="rgb_array")

frames = []

# Run one rollout
obs, info = eval_env.reset()
done = False
while not done:
    # For vector models, wrap obs into batch of size 1
    action, _ = model.predict(np.array(obs)[None, :], deterministic=True)
    obs, reward, terminated, truncated, info = eval_env.step(action[0])
    done = terminated or truncated

    # Get the rendered frame (RGB)
    frame = eval_env.render()
    if frame is not None:
        frames.append(frame)

eval_env.close()
print(f"Captured {len(frames)} frames.")

# Save captured frames to a temporary mp4 video
tmp_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
writer = imageio.get_writer(tmp_path, fps=30, codec="libx264")
for frame in frames:
    writer.append_data(frame)
writer.close()

# Display video inline
display(Video(tmp_path, embed=True))



Captured 198 frames.
