In [4]:
import gym
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

# Load your custom gym environment
env_name = "YourCustomEnv-v0"
env = gym.make(env_name)
env = DummyVecEnv([lambda: env])  # Wrap the environment to vectorize it

# Initialize the agent
model = PPO("MlpPolicy", env, verbose=1)

# Training parameters
n_steps = 50_000
eval_freq = 1000

# Lists to store results
eval_rewards = []
steps_list = []

for step in range(0, n_steps, eval_freq):
    # Train the agent
    model.learn(total_timesteps=eval_freq)
    
    # Evaluate the agent's performance
    mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=10)
    
    # Store the results
    eval_rewards.append(mean_reward)
    steps_list.append(step + eval_freq)  # Adjust to indicate the end of the current training segment

# Plotting the results
plt.plot(steps_list, eval_rewards)
plt.xlabel('Training Steps')
plt.ylabel('Mean Reward')
plt.title('Training Performance')
plt.grid()
plt.savefig("graph.png")

-----------------------------
| time/              |      |
|    fps             | 262  |
|    iterations      | 1    |
|    time_elapsed    | 7    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 267         |
|    iterations           | 2           |
|    time_elapsed         | 15          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.006441292 |
|    clip_fraction        | 0.0484      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.383      |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 4.97e+03    |
|    n_updates            | 990         |
|    policy_gradient_loss | -0.00216    |
|    value_loss           | 1.2e+04     |
-----------------------------------------
----------------------------------

KeyboardInterrupt: 