## Some preliminary checks

In [1]:
import torch
import ray 
import os 

# Can be modified
os.environ["RAY_DEDUP_LOGS"] = "0"

print("Ray version :", ray.__version__)
print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
print("MPS Available:", torch.backends.mps.is_available())

torch._dynamo.list_backends()

Ray version : 2.41.0
PyTorch Version: 2.5.1
CUDA Available: False
MPS Available: True


['cudagraphs', 'inductor', 'onnxrt', 'openxla', 'tvm']

## Number of CPUs and GPUs available

In [2]:
import psutil

print("Number of CPUs: ", psutil.cpu_count())

num_cpus = 12
num_gpus = 0

assert num_cpus <= psutil.cpu_count()

Number of CPUs:  12


## Algorithm configuration

In [3]:
from ray.tune.registry import get_trainable_cls
from ray.rllib.policy.policy import PolicySpec

from particle_2d_env import Particle2dEnvironment, MyCallbacks
from config import env_config

ALGO = "PPO"        
FRAMEWORK= "torch"
env = Particle2dEnvironment(env_config)

ppo_config = (
    get_trainable_cls(ALGO).get_default_config()
    .environment(Particle2dEnvironment, env_config=env_config)
    .framework(FRAMEWORK,)
    .api_stack(enable_rl_module_and_learner=True, enable_env_runner_and_connector_v2=True,)
    .callbacks(MyCallbacks)
    .rl_module(
        model_config={
            "fcnet_hiddens" : [128, 128, 128], 
            "use_attention" : True,
        }
    )
    .multi_agent(
        policies= {
            "prey": PolicySpec(
                policy_class=None,  # infer automatically from Algorithm
                observation_space=env.observation_space[0],  # if None infer automatically from env
                action_space=env.action_space[0],  # if None infer automatically from env
                config={"gamma": 0.85},  # use main config plus <- this override here
            ),
            "predator": PolicySpec(
                policy_class=None,
                observation_space=env.observation_space[0],
                action_space=env.action_space[0],
                config={"gamma": 0.85},
            ),
        },
        policy_mapping_fn = lambda id, *arg, **karg: "prey" if env.particule_agents[id].entity_type == 0 else "predator",
        policies_to_train=["prey", "predator"],
        count_steps_by="agent_steps",
    )
    .training(
        num_epochs=10,
        train_batch_size_per_learner=512, 
    )
    .learners(
        num_learners=1,          # or >2
        num_cpus_per_learner=3,  # <- default 1
        num_gpus_per_learner=0,  # <- default 0
    )
    .resources(
        num_cpus_for_main_process=1  # <- default  1
    )
    .env_runners(
        num_env_runners=1, 
        num_envs_per_env_runner=1,
        num_cpus_per_env_runner=2,
        rollout_fragment_length="auto",
        batch_mode= 'complete_episodes', # truncate_episodes or complete_episodes
    )
    .checkpointing(export_native_model_files=True)
)


  gym.logger.warn(
  gym.logger.warn(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


## Wandb Callback

In [4]:
from ray.air.integrations.wandb import WandbLoggerCallback

# Read the API key from the file to use Wanddb
with open('../project/wandb_api_key.txt', 'r') as file:
    api_key = file.read().strip()
tune_callbacks = [
    WandbLoggerCallback(                   
        project="marl-rllib", 
        group=None,
        api_key=api_key,
        log_config=True,
        upload_checkpoints=False
    ), 
]

## Training

In [5]:
from ray import train, tune
import os

ray.init(
    num_cpus=num_cpus, 
    num_gpus=num_gpus,
    ignore_reinit_error = True,
)

############################################
# Where to save 
############################################
# absolute path + ray_results directory
storage_path=os.getcwd() + "/ray_results"
checkpoint_folder = None # is something like "PPO_2024-12-19_01-09-51"


############################################
# Config
############################################
config_dict = ppo_config.to_dict()

# Environment parameters
#config_dict["env_config"]["friction_regime"] = tune.grid_search(["linear", "quadratic"])
#config_dict["env_config"]["periodical_boundary"] = tune.grid_search([True, False])
config_dict["env_config"]["prey_consumed"] = tune.grid_search([True, False])

#config_dict["env_config"]["num_food_patch"] = tune.grid_search([0, 2])

# RLlib parameters
#config_dict["train_batch_size_per_learner"] = tune.grid_search([256, 512, 1024])


############################################
# Build the Tuner
############################################
if checkpoint_folder is None : 
    tuner = tune.Tuner(
        trainable = ALGO,                                     # Defined before
        param_space=config_dict,                              # Defined before
        run_config=train.RunConfig(    
            storage_path=storage_path,
            stop={"training_iteration": 1500},
            verbose=3,
            callbacks=tune_callbacks,
            checkpoint_config=train.CheckpointConfig(         
                checkpoint_at_end=True,
                checkpoint_frequency=100,
            ),
        ),
    )
else:  # If we start a training that failed
    path = storage_path + "/" + checkpoint_folder
    # Restore the training
    tuner = tune.Tuner.restore(
        trainable = ALGO,
        path = path, 
        resume_unfinished=True, 
        resume_errored=True,
        restart_errored=False,
    )
    

# Run the experiment 
results = tuner.fit()

ray.shutdown()


2025-02-01 13:31:38,723	INFO worker.py:1841 -- Started a local Ray instance.
2025-02-01 13:31:39,209	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2025-02-02 10:32:18
Running for:,21:00:39.51
Memory:,26.9/64.0 GiB

Trial name,status,loc,env_config/prey_cons umed,iter,total time (s),num_training_step_ca lls_per_iteration,num_env_steps_sample d_lifetime,...env_steps_sampled _lifetime_throughput
PPO_Particle2dEnvironment_7c04b_00001,PENDING,,False,,,,,
PPO_Particle2dEnvironment_7c04b_00000,TERMINATED,127.0.0.1:9602,True,1500.0,4573.27,1.0,508336.0,139.559


[36m(PPO pid=9602)[0m Install gputil for GPU system monitoring.
[36m(_WrappedExecutable pid=9625)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(MultiAgentEnvRunner pid=9612)[0m Error importing optional module moviepy.editor
[36m(MultiAgentEnvRunner pid=9612)[0m Traceback (most recent call last):
[36m(MultiAgentEnvRunner pid=9612)[0m   File "/opt/homebrew/Caskroom/miniforge/base/envs/collective_env/lib/python3.11/site-packages/wandb/util.py", line 215, in import_module_lazy
[36m(MultiAgentEnvRunner pid=9612)[0m     return sys.modules[name]
[36m(MultiAgentEnvRunner pid=9612)[0m            ~~~~~~~~~~~^^^^^^
[36m(MultiAgentEnvRunner pid=9612)[0m KeyError: 'moviepy.editor'
[36m(MultiAgentEnvRunner pid=9612)[0m 
[36m(MultiAgentEnvRunner pid=9612)[0m During handling of the above exception, another exception occurred:
[36m(MultiAgentEnvRunner pid=9612)[0m 
[36m(MultiAgentEnvRunner pid=9612)[0m Traceback (most recent call last):
[36m(MultiAgentEnvR

Trial name,date,done,env_runner_group,env_runners,fault_tolerance,hostname,iterations_since_restore,learners,node_ip,num_env_steps_sampled_lifetime,num_env_steps_sampled_lifetime_throughput,num_training_step_calls_per_iteration,perf,pid,time_since_restore,time_this_iter_s,time_total_s,timers,timestamp,training_iteration,trial_id
PPO_Particle2dEnvironment_7c04b_00000,2025-02-01_14-49-51,True,{'actor_manager_num_outstanding_async_reqs': 0},"{'num_agent_steps_sampled': {'16': 22, '23': 92, '20': 701, '5': 45, '21': 123, '33': 701, '30': 701, '24': 393, '14': 262, '0': 48, '2': 33, '22': 220, '12': 0, '3': 118, '8': 158, '13': 39, '1': 132, '15': 42, '6': 127, '10': 73, '17': 105, '29': 208, '9': 33, '28': 34, '26': 250, '32': 701, '31': 701, '7': 56, '25': 506, '4': 415, '19': 14, '11': 184, '18': 302, '27': 83}, 'num_agent_steps_sampled_lifetime': {'8': 162398, '13': 157473, '1': 159983, '15': 162997, '6': 160842, '31': 508620, '10': 162014, '3': 158719, '17': 159416, '29': 158343, '9': 164284, '28': 156992, '26': 159559, '32': 508620, '7': 155292, '25': 157906, '19': 159588, '4': 162540, '16': 165869, '23': 157918, '20': 163776, '5': 164158, '21': 161100, '33': 508620, '30': 508620, '14': 159171, '24': 163958, '0': 161484, '2': 161535, '22': 154471, '12': 156066, '18': 166504, '11': 155368, '27': 159705}, 'agent_episode_returns_mean': {'23': -1.26359, '20': -1.2841299999999998, '5': -1.1454600000000001, '33': 9.23, '24': -1.1535699999999998, '13': -1.21499, '14': -1.20283, '1': -1.27369, '11': -1.3235699999999997, '12': -1.27475, '3': -1.17594, '8': -1.1857, '18': -1.18561, '27': -1.28338, '10': -1.15618, '15': -1.18568, '6': -1.2763200000000001, '17': -1.1368899999999997, '29': -1.24432, '31': 9.39, '7': -1.25256, '4': -1.2531500000000002, '16': -1.23377, '9': -1.2532100000000002, '28': -1.2266599999999999, '21': -1.24573, '26': -1.20716, '32': 8.91, '30': 8.83, '0': -1.22372, '22': -1.2155199999999997, '2': -1.2934799999999997, '25': -1.17421, '19': -1.25403}, 'timers': {'connectors': {'UnBatchToIndividualItems': 3.833129678411242e-05, 'AgentToModuleMapping': 4.520600436544991e-06, 'BatchIndividualItems': 1.837650708361411e-05, 'GetActions': 9.121889368500947e-05, 'NumpyToTensor': 1.7328826125543432e-05, 'ModuleToAgentUnmapping': 2.5481854407737e-06, 'AddObservationsFromEpisodesToBatch': 5.286020093557301e-05, 'AddStatesFromEpisodesToBatch': 5.102603283959297e-06, 'ListifyDataForVectorEnv': 5.966468561960993e-06, 'RemoveSingleTsTimeRankFromBatch': 6.847419789095492e-07, 'NormalizeAndClipActions': 0.00014668391338332705, 'TensorToNumpy': 2.5466847587532545e-05}}, 'agent_steps': {'29': 122.56, '31': 383.43, '7': 97.21, '4': 111.05, '27': 96.35, '16': 88.78, '9': 94.19, '28': 107.55, '26': 107.5, '32': 383.43, '0': 124.34, '22': 108.01, '25': 103.84, '19': 97.29, '23': 111.36, '20': 113.6, '5': 122.3, '21': 98.74, '33': 383.43, '30': 383.43, '24': 117.37, '13': 115.01, '14': 107.87, '2': 88.93, '11': 103.92, '12': 98.51, '3': 105.53, '8': 113.0, '18': 112.69, '1': 106.01, '10': 122.86, '15': 99.65, '6': 101.98, '17': 114.66}, 'episode_len_mean': 383.43, 'mean_dos': np.float64(0.26486855840129225), 'num_episodes_lifetime': 1485, 'episode_len_max': 701, 'max_dos': 0.8363477934304366, 'num_env_steps_sampled': 701, 'max_doa': 0.24683915988549995, 'env_to_module_sum_episodes_length_out': 327.15858221091224, 'episode_duration_sec_mean': 0.942505417490056, 'episode_return_max': -0.22699999999999676, 'mean_doa': np.float64(0.0910370534873915), 'episode_return_mean': -0.4398000000000007, 'env_to_module_sum_episodes_length_in': 327.15858221091224, 'num_module_steps_sampled_lifetime': {'prey': 4809429, 'predator': 2034480}, 'episode_len_min': 85, 'episode_videos_best': [], 'num_module_steps_sampled': {'prey': 4818, 'predator': 2804}, 'num_episodes': 1, 'module_episode_returns_mean': {'prey': -1.24432, 'predator': 9.23}, 'num_env_steps_sampled_lifetime': 508336, 'episode_return_min': -0.6019999999999968, 'time_between_sampling': 1.443436662608461, 'num_env_steps_sampled_lifetime_throughput': 139.5585100141166}","{'num_healthy_workers': 1, 'num_remote_worker_restarts': 5}",MacBook-Pro-de-Tanguy.fritz.box,1500,"{'__all_modules__': {'timers': {'connectors': {'AddColumnsFromEpisodesToTrainBatch': 0.03617516712056098, 'AddOneTsToEpisodesAndTruncate': 0.0030597222971253737, 'AddStatesFromEpisodesToBatch': 1.3661796671831872e-05, 'GeneralAdvantageEstimation': 0.0068145446802137565, 'AddObservationsFromEpisodesToBatch': 0.00015022291679985772, 'AgentToModuleMapping': 0.0022745045488960546, 'NumpyToTensor': 0.0002468781685097811, 'BatchIndividualItems': 0.039348483496475795}}, 'num_env_steps_trained': 701, 'learner_connector_sum_episodes_length_out': 326.32219255761856, 'num_module_steps_trained_lifetime': 6929328, 'num_non_trainable_parameters': 0.0, 'learner_connector_sum_episodes_length_in': 326.32219255761856, 'num_env_steps_trained_lifetime': 508620, 'num_module_steps_trained': 7685, 'num_trainable_parameters': 159498.0, 'num_env_steps_trained_lifetime_throughput': 139.5582599308655}, 'prey': {'curr_kl_coeff': 1.1546030044555664, 'policy_loss': 0.04992106929421425, 'vf_loss_unclipped': 0.2210424691438675, 'num_module_steps_trained_lifetime': 4888908, 'module_train_batch_size_mean': 3069.6878863544484, 'diff_num_grad_updates_vs_sampler_policy': 0.0, 'default_optimizer_learning_rate': 5e-05, 'num_non_trainable_parameters': 0.0, 'gradients_default_optimizer_global_norm': 4.412717819213867, 'total_loss': 0.28007662296295166, 'mean_kl_loss': 0.007892820052802563, 'entropy': 16.667478561401367, 'vf_explained_var': 0.30255717039108276, 'vf_loss': 0.2210424691438675, 'curr_entropy_coeff': 0.0, 'num_module_steps_trained': 4877, 'num_trainable_parameters': 79749.0, 'weights_seq_no': 1485.0}, 'predator': {'vf_loss_unclipped': 0.5194647312164307, 'num_module_steps_trained_lifetime': 2040420, 'module_train_batch_size_mean': 1309.288770230473, 'num_non_trainable_parameters': 0.0, 'diff_num_grad_updates_vs_sampler_policy': 0.0, 'gradients_default_optimizer_global_norm': 72.67535400390625, 'total_loss': 0.5664339661598206, 'default_optimizer_learning_rate': 5e-05, 'mean_kl_loss': 0.01416424848139286, 'entropy': -5.4502105712890625, 'curr_entropy_coeff': 0.0, 'num_module_steps_trained': 2808, 'vf_explained_var': 0.7355270385742188, 'vf_loss': 0.5194647312164307, 'num_trainable_parameters': 79749.0, 'weights_seq_no': 1485.0, 'curr_kl_coeff': 3.97477388381958, 'policy_loss': -0.009330473840236664}}",127.0.0.1,508336,139.559,1,"{'cpu_util_percent': np.float64(23.84285714285715), 'ram_util_percent': np.float64(47.471428571428575)}",9602,4573.27,3.69886,4573.27,"{'training_iteration': 2.9793158648683744, 'restore_workers': 0.015002854687042579, 'training_step': 2.9634760071230857, 'env_runner_sampling_timer': 1.8965182139333392, 'learner_update_timer': 1.0727236388052572, 'synch_weights': 0.0035421026736569042, 'synch_env_connectors': 0.010433492102718683}",1738417791,1500,7c04b_00000


[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/tanguy/Code/Finebouche/collective_behavior/project/ray_results/PPO_2025-02-01_13-31-39/PPO_Particle2dEnvironment_7c04b_00000_0_prey_consumed=True_2025-02-01_13-31-39/checkpoint_000000)
[36m(MultiAgentEnvRunner pid=9612)[0m 2025-02-01 13:38:10,367	ERROR actor_manager.py:187 -- Worker exception caught during `apply()`: Agent 24 acted and then got truncated, but did NOT receive a last (truncation) observation, required for e.g. value function bootstrapping!
[36m(MultiAgentEnvRunner pid=9612)[0m Traceback (most recent call last):
[36m(MultiAgentEnvRunner pid=9612)[0m   File "/opt/homebrew/Caskroom/miniforge/base/envs/collective_env/lib/python3.11/site-packages/ray/rllib/utils/actor_manager.py", line 183, in apply
[36m(MultiAgentEnvRunner pid=9612)[0m     return func(self, *args, **kwarg

[33m(raylet)[0m A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffff24d3dc8ef8d4f6a7cfacb44401000000 Worker ID: 1519c1157d56a9d86884dc5de82064a6f919b895c29cabdb887383d4 Node ID: c2fed0cd03b5a3c90434566716fc84270f2a54145d53f9c7eeeddc76 Worker IP address: 127.0.0.1 Worker port: 50345 Worker PID: 9612 Worker exit type: SYSTEM_ERROR Worker exit detail: Worker exits unexpectedly. Worker exits with an exit code 1.


[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m 2025-02-01 13:39:11,467	ERROR actor_manager.py:815 -- Ray error (The actor 24d3dc8ef8d4f6a7cfacb44401000000 is unavailable: The actor is temporarily unavailable: IOError: Fail all inflight tasks due to actor state change.. The task may or maynot have been executed on the actor.), taking actor 1 out of service.
[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m 2025-02-01 13:39:11,467	ERROR actor_manager.py:646 -- The actor 24d3dc8ef8d4f6a7cfacb44401000000 is unavailable: The actor is temporarily unavailable: IOError: Fail all inflight tasks due to actor state change.. The task may or maynot have been executed on the actor.
[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m NoneType: None
[36m(MultiAgentEnvRunner pid=9984)[0m

[33m(raylet)[0m A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffff24d3dc8ef8d4f6a7cfacb44401000000 Worker ID: f0ec47967d13070bb638c2d81c60b3ddf22a1542e61353d28fdb9c0f Node ID: c2fed0cd03b5a3c90434566716fc84270f2a54145d53f9c7eeeddc76 Worker IP address: 127.0.0.1 Worker port: 52684 Worker PID: 9984 Worker exit type: SYSTEM_ERROR Worker exit detail: Worker exits unexpectedly. Worker exits with an exit code 1.


[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m 2025-02-01 13:40:22,524	ERROR actor_manager.py:815 -- Ray error (The actor 24d3dc8ef8d4f6a7cfacb44401000000 is unavailable: The actor is temporarily unavailable: RpcError: RPC Error message: Cancelling all calls; RPC Error details: . The task may or maynot have been executed on the actor.), taking actor 1 out of service.
[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m 2025-02-01 13:40:22,524	ERROR actor_manager.py:646 -- The actor 24d3dc8ef8d4f6a7cfacb44401000000 is unavailable: The actor is temporarily unavailable: RpcError: RPC Error message: Cancelling all calls; RPC Error details: . The task may or maynot have been executed on the actor.
[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m NoneType: None
[36m(MultiAgentE

[33m(raylet)[0m A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffff24d3dc8ef8d4f6a7cfacb44401000000 Worker ID: 377b8b52b4bb361ebe45ddcb5b143aee06be2bd4f3c36ee84760585a Node ID: c2fed0cd03b5a3c90434566716fc84270f2a54145d53f9c7eeeddc76 Worker IP address: 127.0.0.1 Worker port: 52715 Worker PID: 10044 Worker exit type: SYSTEM_ERROR Worker exit detail: Worker exits unexpectedly. Worker exits with an exit code 1.


[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m 2025-02-01 13:41:43,593	ERROR actor_manager.py:815 -- Ray error (The actor 24d3dc8ef8d4f6a7cfacb44401000000 is unavailable: The actor is temporarily unavailable: RpcError: RPC Error message: Cancelling all calls; RPC Error details: . The task may or maynot have been executed on the actor.), taking actor 1 out of service.
[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m 2025-02-01 13:41:43,593	ERROR actor_manager.py:646 -- The actor 24d3dc8ef8d4f6a7cfacb44401000000 is unavailable: The actor is temporarily unavailable: RpcError: RPC Error message: Cancelling all calls; RPC Error details: . The task may or maynot have been executed on the actor.
[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m NoneType: None
[36m(MultiAgentE

[33m(raylet)[0m A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffff24d3dc8ef8d4f6a7cfacb44401000000 Worker ID: db89c6211c37cac60791bc61ee4a96efc12ce2039d263f375144d062 Node ID: c2fed0cd03b5a3c90434566716fc84270f2a54145d53f9c7eeeddc76 Worker IP address: 127.0.0.1 Worker port: 52752 Worker PID: 10070 Worker exit type: SYSTEM_ERROR Worker exit detail: Worker exits unexpectedly. Worker exits with an exit code 1.


[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m 2025-02-01 14:41:09,657	ERROR actor_manager.py:815 -- Ray error (The actor 24d3dc8ef8d4f6a7cfacb44401000000 is unavailable: The actor is temporarily unavailable: IOError: Fail all inflight tasks due to actor state change.. The task may or maynot have been executed on the actor.), taking actor 1 out of service.
[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m 2025-02-01 14:41:09,657	ERROR actor_manager.py:646 -- The actor 24d3dc8ef8d4f6a7cfacb44401000000 is unavailable: The actor is temporarily unavailable: IOError: Fail all inflight tasks due to actor state change.. The task may or maynot have been executed on the actor.
[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m NoneType: None
[36m(MultiAgentEnvRunner pid=12389)[0

[33m(raylet)[0m A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. RayTask ID: ffffffffffffffff24d3dc8ef8d4f6a7cfacb44401000000 Worker ID: 29fa4c24429b55684b80186f813947a96fe5e51c45569e6f6248d9cc Node ID: c2fed0cd03b5a3c90434566716fc84270f2a54145d53f9c7eeeddc76 Worker IP address: 127.0.0.1 Worker port: 53510 Worker PID: 12389 Worker exit type: SYSTEM_ERROR Worker exit detail: Worker exits unexpectedly. Worker exits with an exit code 1.


[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m 2025-02-01 14:42:20,772	ERROR actor_manager.py:815 -- Ray error (The actor 24d3dc8ef8d4f6a7cfacb44401000000 is unavailable: The actor is temporarily unavailable: RpcError: RPC Error message: Cancelling all calls; RPC Error details: . The task may or maynot have been executed on the actor.), taking actor 1 out of service.
[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m 2025-02-01 14:42:20,772	ERROR actor_manager.py:646 -- The actor 24d3dc8ef8d4f6a7cfacb44401000000 is unavailable: The actor is temporarily unavailable: RpcError: RPC Error message: Cancelling all calls; RPC Error details: . The task may or maynot have been executed on the actor.
[36m(PPO(env=<class 'particle_2d_env.Particle2dEnvironment'>; env-runners=1; learners=1; multi-agent=True) pid=9602)[0m NoneType: None
[36m(MultiAgentE

KeyboardInterrupt: 

In [None]:
ray.nodes()[0]

ray.shutdown()