Import libraries

In [1]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

Initialize the environment

In [4]:
env_id = "BipedalWalker-v3"

# Create vectorized environments
vec_env = make_vec_env(env_id, n_envs=32, seed=42)

Initialize the agent

In [6]:
ppo_kwargs = dict(
    n_steps=2048,         
    batch_size=64,
    n_epochs=10,          
    gamma=0.999,          
    gae_lambda=0.95,      
    ent_coef=0.0,         
    learning_rate=3e-4,   
)

# PPO agent with an MLP policy
model = PPO(
    "MlpPolicy",
    vec_env,
    verbose=1,
    **ppo_kwargs
)

Using cuda device


Train the agent

In [7]:
model.learn(total_timesteps=5_000_000)
model.save("ppo_bipedal_walker")

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 503      |
|    ep_rew_mean     | -113     |
| time/              |          |
|    fps             | 4151     |
|    iterations      | 1        |
|    time_elapsed    | 15       |
|    total_timesteps | 65536    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 534          |
|    ep_rew_mean          | -111         |
| time/                   |              |
|    fps                  | 1191         |
|    iterations           | 2            |
|    time_elapsed         | 109          |
|    total_timesteps      | 131072       |
| train/                  |              |
|    approx_kl            | 0.0046990905 |
|    clip_fraction        | 0.0384       |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.66        |
|    explained_variance   | 0.00232      |
|    learning_r

Visualize the agent

In [8]:
import numpy as np
import imageio
import tempfile
from IPython.display import Video, display

# Load the trained PPO agent
model = PPO.load("ppo_bipedal_walker")

# Create evaluation env in rgb_array mode to capture frames
eval_env = gym.make("BipedalWalker-v3", render_mode="rgb_array")

frames = []

# Run one rollout
obs, info = eval_env.reset()
done = False
while not done:
    # For vector models, wrap obs into batch of size 1
    action, _ = model.predict(np.array(obs)[None, :], deterministic=True)
    obs, reward, terminated, truncated, info = eval_env.step(action[0])
    done = terminated or truncated

    # Get the rendered frame (RGB)
    frame = eval_env.render()
    if frame is not None:
        frames.append(frame)

eval_env.close()
print(f"Captured {len(frames)} frames.")

# Save captured frames to a temporary mp4 video
tmp_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
writer = imageio.get_writer(tmp_path, fps=30, codec="libx264")
for frame in frames:
    writer.append_data(frame)
writer.close()

# Display video inline
display(Video(tmp_path, embed=True))



Captured 1108 frames.
