In [1]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import gymnasium as gym
import optuna
import torch.nn as nn

  from .autonotebook import tqdm as notebook_tqdm


In [23]:
env = DummyVecEnv([lambda: gym.make("CarRacing-v3") for _ in range(8)])

In [25]:



policy_kwargs = {
    "net_arch": [256,256],  # Three hidden layers
    "activation_fn" : nn.GELU,
    "log_std_init": -2,
}
# Initialize the PPO agent with the environment
model = PPO(
    policy="CnnPolicy",  # Multi-layer perceptron policy
    env=env,             # Environment
    verbose=1,           # Logging level
    learning_rate= 1e-4,  # Learning rate
    gamma=0.99,          # Discount factor
    n_steps=512,        # Number of steps to run for each environment per update
    batch_size=128,       # Mini-batch size
    gae_lambda=0.95,         # lambda
    ent_coef=0.0,
    clip_range= 0.2,
    n_epochs= 10,
    sde_sample_freq = 4,
    vf_coef =  0.5,
    max_grad_norm =  0.5,
    policy_kwargs= policy_kwargs,
)

# Train the agent
model.learn(total_timesteps=1500000)  # Train for 1,500,000 steps

# Save the model
model.save("ppo_carracingtest_best")


: 

: 

In [20]:
env = gym.make("CarRacing-v3")
model = PPO.load("ppo_carracing_best", env = env)
env.reset()
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=1)
print(f"Mean Reward = {mean_reward}, Std Reward = {std_reward}")


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Mean Reward = 870.5882054790854, Std Reward = 0.0


In [21]:
from gymnasium.wrappers import RecordVideo
video_folder = "carracevideos2"  # Directory to save the video
env = gym.make("CarRacing-v3", render_mode="rgb_array")
env = RecordVideo(env, video_folder=video_folder, episode_trigger=lambda x: True)
model = PPO.load("ppo_carracing_best", env = env)

# Test the trained agent
obs, info = env.reset()
done = False

while not done:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = env.step(action)

env.close()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


KeyboardInterrupt: 