In [1]:
import gymnasium as gym
from gymnasium.spaces import Discrete, Box
import numpy as np
import os
import random

import ray
from ray import air, tune
from ray.rllib.env.env_context import EnvContext
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.test_utils import check_learning_achieved
from ray.tune.logger import pretty_print
from ray.tune.registry import get_trainable_cls

from custom_env import CustomEnvironment
from config import run_config

tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()


if __name__ == "__main__":
    
    class Args:
        def __init__(self):
            self.run = "PPO"
            self.framework = "torch"
            self.as_test = False
            self.stop_iters = 50
            self.stop_timesteps = 100000
            self.stop_reward = 0.1
            self.local_mode = False
    
    args = Args()
    print(f"Running with following options: {args.__dict__}")

    ray.init(local_mode=args.local_mode)
    env = CustomEnvironment(run_config["env"])

    config = (
        get_trainable_cls(args.run)
        .get_default_config()
        .environment(CustomEnvironment, env_config=run_config["env"])
        .framework(args.framework)
        .rollouts(num_rollout_workers=1)
        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
        .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
    )


    config["env"] = CustomEnvironment
    config["env_config"] = run_config["env"]

    print(config)
    config.multi_agent(
        policies= {
            "prey": (None, env.observation_space, env.action_space, {}),
            "predator": (None, env.observation_space, env.action_space, {}),
        },
        policy_mapping_fn = lambda x: "prey" if x <=env.num_prey else "predator",
    )

    stop = {
        "training_iteration": args.stop_iters,
        "timesteps_total": args.stop_timesteps,
        "episode_reward_mean": args.stop_reward,
    }
    
    # automated run with Tune and grid search and TensorBoard
    print("Training automatically with Ray Tune")
    tuner = tune.Tuner(
        args.run,
        param_space=config.to_dict(),
        run_config=air.RunConfig(stop=stop),
    )
    results = tuner.fit()

    if args.as_test:
        print("Checking if learning goals were achieved")
        check_learning_achieved(results, args.stop_reward)

    ray.shutdown()

0,1
Current time:,2023-08-07 13:54:35
Running for:,00:00:13.63
Memory:,3.9/31.3 GiB

Trial name,# failures,error file
PPO_CustomEnvironment_e9642_00000,1,/home/tcazalet/ray_results/PPO/PPO_CustomEnvironment_e9642_00000_0_2023-08-07_13-54-22/error.txt

Trial name,status,loc
PPO_CustomEnvironment_e9642_00000,ERROR,


2023-08-07 13:54:35,721	ERROR tune_controller.py:911 -- Trial task failed for trial PPO_CustomEnvironment_e9642_00000
Traceback (most recent call last):
  File "/project_ghent/MARL2/MARL2_env/lib/python3.8/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "/project_ghent/MARL2/MARL2_env/lib/python3.8/site-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/project_ghent/MARL2/MARL2_env/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/project_ghent/MARL2/MARL2_env/lib/python3.8/site-packages/ray/_private/worker.py", line 2495, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::PPO.__init__()[39m (pid=53220, ip=172.17.0.3, actor_id=109191e7d37e2b1c3d2c203401000000, repr=PPO)
  File "/project_ghent/MARL2/