In [1]:
import os

import numpy as np
import torch

from src.ppo import PPO
from src.tools import evaluate, load_model, train
from src.utils import mp4_to_gif


In [2]:
# Set random seeds for reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

ENV_NAME = "InvertedDoublePendulum-v5"
os.makedirs("results", exist_ok=True)

Standart Reward

In [3]:
agent = train(
    env_name=ENV_NAME,
    agent=PPO,
    num_epochs=300,
    steps_per_epoch=4096,
    gamma=0.99,
    reward_type="rewards",
)

PPO training: 100%|██████████| 300/300 [07:00<00:00,  1.40s/it]


In [4]:
agent = load_model("./results/ppo-rewards-best.pt", agent)
evaluate(ENV_NAME, agent, num_episodes=1000, record_video=True, reward_type="rewards")


  logger.warn(
PPO evaluation: 100%|██████████| 1000/1000 [01:24<00:00, 11.79it/s]

Evaluation Results over 1000 episodes:
Average Episode Length: 996.08
Average Episode Reward: 9322.03
Average Episode Energy Reward: 11718.47





(9322.027089178797, 11718.466751257445, 996.079)

Energy Reward

In [5]:
agent = train(
    env_name=ENV_NAME,
    agent=PPO,
    num_epochs=300,
    steps_per_epoch=4096,
    gamma=0.99,
    reward_type="energies",
)

PPO training: 100%|██████████| 300/300 [07:30<00:00,  1.50s/it]


In [6]:
agent = load_model("./results/ppo-energies-best.pt", agent)
evaluate(ENV_NAME, agent, num_episodes=1000, record_video=True, reward_type="energies")


  logger.warn(
PPO evaluation: 100%|██████████| 1000/1000 [01:24<00:00, 11.77it/s]

Evaluation Results over 1000 episodes:
Average Episode Length: 994.08
Average Episode Reward: 9302.96
Average Episode Energy Reward: 11695.11





(9302.960647160366, 11695.111179150905, 994.081)

In [8]:
mp4_to_gif("./results")

Video files conversion: 0it [00:00, ?it/s]
