In [1]:
import os
import supersuit as ss

from ray import tune
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.tune.registry import register_env
from torch import nn

from pettingzoo.butterfly import pistonball_v6

In [2]:
def env_creator(args):
    env = pistonball_v6.parallel_env(
        n_pistons=20,
        time_penalty=-0.1,
        continuous=True,
        random_drop=True,
        random_rotate=True,
        ball_mass=0.75,
        ball_friction=0.3,
        ball_elasticity=1.5,
        max_cycles=125,
    )
    env = ss.color_reduction_v0(env, mode="B")
    env = ss.dtype_v0(env, "float32")
    env = ss.resize_v1(env, x_size=84, y_size=84)
    env = ss.frame_stack_v1(env, 3)
    env = ss.normalize_obs_v0(env, env_min=0, env_max=1)
    return env

In [3]:
env_name = "pistonball_v6"
register_env(env_name, lambda config: ParallelPettingZooEnv(env_creator(config)))

config = (
    PPOConfig()
    .rollouts(num_rollout_workers=4, rollout_fragment_length=128)
    .training(
        train_batch_size=512,
        lr=2e-5,
        gamma=0.99,
        lambda_=0.9,
        use_gae=True,
        clip_param=0.4,
        grad_clip=None,
        entropy_coeff=0.1,
        vf_loss_coeff=0.25,
        sgd_minibatch_size=64,
        num_sgd_iter=10,
    )
    .environment(env=env_name, clip_actions=True)
    .debugging(log_level="ERROR")
    .framework(framework="torch")
)

tune.run(
    "PPO",
    name="PPO",
    stop={"timesteps_total": 5000000},
    checkpoint_freq=10,
    config=config.to_dict(),
)

2023-03-23 18:26:08,045	INFO worker.py:1553 -- Started a local Ray instance.


0,1
Current time:,2023-03-23 18:26:16
Running for:,00:00:07.44
Memory:,9.3/15.6 GiB

Trial name,# failures,error file
PPO_pistonball_v6_ccadd_00000,1,/home/clem/ray_results/PPO/PPO_pistonball_v6_ccadd_00000_0_2023-03-23_18-26-08/error.txt

Trial name,status,loc
PPO_pistonball_v6_ccadd_00000,ERROR,


[2m[36m(PPO pid=65919)[0m 2023-03-23 18:26:12,594	INFO algorithm.py:506 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


TuneError: ('Trials did not complete', [PPO_pistonball_v6_ccadd_00000])