In [1]:
import os

import numpy as np
import torch

from src.trpo import TRPO
from src.tools import train, evaluate, load_model
from src.utils import mp4_to_gif

In [2]:
# Set random seeds for reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

ENV_NAME = "InvertedDoublePendulum-v5"
os.makedirs("results", exist_ok=True)

Standart Reward

In [3]:
agent = train(
    env_name=ENV_NAME,
    agent=TRPO,
    num_epochs=350,
    steps_per_epoch=4096,
    gamma=0.99,
    reward_type="rewards",
)

TRPO training: 100%|██████████| 350/350 [05:39<00:00,  1.03it/s]


In [4]:
agent = load_model("./results/trpo-rewards-best.pt", agent)
evaluate(ENV_NAME, agent, num_episodes=1000, record_video=True, reward_type="rewards")


  logger.warn(
TRPO evaluation: 100%|██████████| 1000/1000 [01:24<00:00, 11.80it/s]

Evaluation Results over 1000 episodes:
Average Episode Length: 977.67
Average Episode Reward: 9149.17
Average Episode Energy Reward: 11498.76





(9149.168230721583, 11498.764168183889, 977.668)

Energy Reward

In [5]:
agent = train(
    env_name=ENV_NAME,
    agent=TRPO,
    num_epochs=350,
    steps_per_epoch=4096,
    gamma=0.99,
    reward_type="energies",
)

TRPO training: 100%|██████████| 350/350 [06:07<00:00,  1.05s/it]


In [6]:
agent = load_model("./results/trpo-energies-best.pt", agent)
evaluate(ENV_NAME, agent, num_episodes=1000, record_video=True, reward_type="energies")


  logger.warn(
TRPO evaluation: 100%|██████████| 1000/1000 [00:17<00:00, 58.50it/s]

Evaluation Results over 1000 episodes:
Average Episode Length: 193.64
Average Episode Reward: 1799.57
Average Episode Energy Reward: 2236.63





(1799.571527722269, 2236.628630881899, 193.643)

In [7]:
mp4_to_gif("./results")

Video files conversion: 100%|██████████| 2/2 [00:12<00:00,  6.02s/it]
