In [11]:
# ppo_minimal_fixed.py
"""
Fixed PPO example for CartPole using Gymnasium (new API)
"""

import gymnasium as gym
from stable_baselines3 import PPO

# 1️⃣ Create the environment
env = gym.make("CartPole-v1")   # Gymnasium version

# 2️⃣ Initialize PPO model
model = PPO(
    policy="MlpPolicy",
    env=env,
    verbose=1,
    learning_rate=3e-4,
    n_steps=2048,
    batch_size=64,
    n_epochs=10,
    gamma=0.99,
    gae_lambda=0.95,
    clip_range=0.2,
    ent_coef=0.01
)

# 3️⃣ Train the model
print("🚀 Training PPO agent...")
model.learn(total_timesteps=50_000)  # lower steps for quick test
print("✅ Training finished.")


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
🚀 Training PPO agent...
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24       |
|    ep_rew_mean     | 24       |
| time/              |          |
|    fps             | 1722     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 25.5        |
|    ep_rew_mean          | 25.5        |
| time/                   |             |
|    fps                  | 1229        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.009212196 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2       

In [10]:
# 4️⃣ Save the model
model.save("ppo_cartpole_model_fixed")
print("📁 Model saved as ppo_cartpole_model_fixed.zip")

# 5️⃣ Test the trained agent
obs, _ = env.reset()   # ✅ Gymnasium reset returns (obs, info)
for step in range(10):
    action, _states = model.predict(obs)
    obs, reward, terminated, truncated, info = env.step(action)  # ✅ New step API returns 5 values
    done = terminated or truncated

    env.render()  # Show simulation

    if done:
        obs, _ = env.reset()

env.close()

📁 Model saved as ppo_cartpole_model_fixed.zip
