In [17]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import CheckpointCallback


In [18]:
# Create the environment
env = gym.make("CarRacing-v2")

# Wrap it with Monitor to keep track of statistics
env = Monitor(env)

# Vectorize the environment
env = DummyVecEnv([lambda: env])

# Optionally, stack frames
env = VecFrameStack(env, n_stack=4)

In [19]:
# Define the model - PPO is a commonly used algorithm for this environment
model = PPO("CnnPolicy", env, verbose=1, tensorboard_log="./ppo_carracing_tensorboard/")
model.learning_rate = 0.0002  # Reduce learning rate
model.batch_size = 32  # Adjust batch size
model.vf_coef = 0.3  # Adjusted value function coefficient


# Define a checkpoint callback to save the model periodically
checkpoint_callback = CheckpointCallback(save_freq=10000, save_path='./logs/',
                                         name_prefix='ppo_carracing_model_v4')

# Train the model
model.learn(total_timesteps=1_000_000, callback=checkpoint_callback)

# Save the final model
model.save("videos/ppo_carracing_model_v4")

# To load the model later:
# model = PPO.load("ppo_carracing_model")

Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./ppo_carracing_tensorboard/PPO_10
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -54.3    |
| time/              |          |
|    fps             | 76       |
|    iterations      | 1        |
|    time_elapsed    | 26       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1e+03       |
|    ep_rew_mean          | -57.6       |
| time/                   |             |
|    fps                  | 55          |
|    iterations           | 2           |
|    time_elapsed         | 73          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.007887762 |
|    clip_fraction        | 0.058       |
|    clip_range           | 0.2         |
|    entro

KeyboardInterrupt: 