Import libraries

In [19]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecNormalize, VecTransposeImage

Initialize the environment

In [20]:
env_id = "CarRacing-v3"

# Create vectorized environments
vec_env = make_vec_env(env_id, n_envs=12, seed=42)

vec_env = VecTransposeImage(vec_env)

Initialize the agent

In [21]:
# PPO agent with an CNN policy
model = PPO(
    "CnnPolicy",
    vec_env,
    verbose=1,

    # Exploration (very important for CarRacing)
    use_sde=True,
    sde_sample_freq=4,
    ent_coef=0.05,

    # PPO stability
    learning_rate=1e-4,
    n_steps=2048,
    batch_size=256,
    n_epochs=10,
    gamma=0.995,
    gae_lambda=0.95,
    clip_range=0.1,

    # IMPORTANT: images already scaled
    policy_kwargs=dict(normalize_images=False),
)

Using cuda device


Train the agent

In [22]:
model.learn(total_timesteps=5_000_000)
model.save("ppo_car_racing")

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -60.4    |
| time/              |          |
|    fps             | 103      |
|    iterations      | 1        |
|    time_elapsed    | 236      |
|    total_timesteps | 24576    |
---------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1e+03    |
|    ep_rew_mean          | -60.4    |
| time/                   |          |
|    fps                  | 100      |
|    iterations           | 2        |
|    time_elapsed         | 489      |
|    total_timesteps      | 49152    |
| train/                  |          |
|    approx_kl            | 80.42634 |
|    clip_fraction        | 0.989    |
|    clip_range           | 0.1      |
|    entropy_loss         | -24.7    |
|    explained_variance   | 0.441    |
|    learning_rate        | 0.0001   |
|    loss                 | 5.88    

Visualize the agent

In [23]:
import numpy as np
import imageio
import tempfile
from IPython.display import Video, display

# Load the trained PPO agent
model = PPO.load("ppo_car_racing")

# Create evaluation env in rgb_array mode to capture frames
eval_env = gym.make("CarRacing-v3", render_mode="rgb_array")

frames = []

# Run one rollout
obs, info = eval_env.reset()
done = False
while not done:
    # For vector models, wrap obs into batch of size 1
    action, _ = model.predict(np.array(obs)[None, :], deterministic=True)
    obs, reward, terminated, truncated, info = eval_env.step(action[0])
    done = terminated or truncated

    # Get the rendered frame (RGB)
    frame = eval_env.render()
    if frame is not None:
        frames.append(frame)

eval_env.close()
print(f"Captured {len(frames)} frames.")

# Save captured frames to a temporary mp4 video
tmp_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
writer = imageio.get_writer(tmp_path, fps=30, codec="libx264")
for frame in frames:
    writer.append_data(frame)
writer.close()

# Display video inline
display(Video(tmp_path, embed=True))



Captured 762 frames.
