# Setting up a Ray cluster with SmartSim

## 1. Start the cluster
We set up a SmartSim experiment, which will handle the launch of the Ray cluster.

First we import the relevant modules.

In [1]:
import numpy as np
import time
import argparse
import os

from ray.tune.progress_reporter import JupyterNotebookReporter
import ray
from ray import tune
import ray.util

from smartsim import Experiment
from smartsim.ray import RayCluster

NUM_WORKERS = 3
alloc=None
#alloc=slurm.get_allocation(nodes=1+NUM_WORKERS, time="12:00:00", options={"ntasks": str(1+NUM_WORKERS), "partition": "spider", "C": "V100"})

In [2]:
exp = Experiment("ray-cluster", launcher='pbs')
cluster = RayCluster(name="ray-cluster", run_args={}, path='',
                     launcher='pbs', workers=NUM_WORKERS, alloc=alloc, batch=True, ray_num_cpus=18)

if cluster.batch:
    cluster.head_model.batch_settings._preamble += ["source ~/.bashrc", "conda activate smartsim"]
    if NUM_WORKERS:
        cluster.worker_model.batch_settings._preamble += ["source ~/.bashrc", "conda activate smartsim"]

exp.generate(cluster, overwrite=True)

Executable: /lus/scratch/arigazzi/anaconda3/envs/smartsim/bin/python
Executable arguments: ['/lus/scratch/arigazzi/smartsim-dev/SmartSim/smartsim/ray/raystarter.py', '--num-cpus=18', '--port=6780', '--redis-password=cb2af36a-7d31-492b-a822-f895a9d426df']
Run Command: aprun
Run arguments: {'sync-output': None}
07:49:02 swan SmartSim[9468] INFO Working in previously created experiment


In [3]:
exp.start(cluster, block=False, summary=False)

07:49:27 swan SmartSim[9468] DEBUG Added step command to batch for head-CBLHL6YRZQ0B
07:49:27 swan SmartSim[9468] DEBUG Gleaned batch job id: 37800.sdb for head-CBLHL6YRZ3P9
07:49:27 swan SmartSim[9468] DEBUG Launching head
07:49:41 swan SmartSim[9468] DEBUG WLM Ray head node acquisition unsupported
07:49:41 swan SmartSim[9468] DEBUG Added step command to batch for workers-CBLHLDHBME4H
07:49:41 swan SmartSim[9468] DEBUG Gleaned batch job id: 37801.sdb for workers-CBLHLDHBLATC
07:49:41 swan SmartSim[9468] DEBUG Launching workers
07:49:41 swan SmartSim[9468] DEBUG WLM Ray worker node acquisition unsupported
07:49:41 swan SmartSim[9468] INFO Ray cluster launched.
07:49:41 swan SmartSim[9468] DEBUG Starting Job Manager


## 2. Start the ray driver script

In [4]:
ray.util.connect(cluster.head_model.address +":10001")

tune.run(
    "PPO",
    stop={"episode_reward_max": 200},
    config={
        "framework": "torch",
        "env": "CartPole-v0",
        "num_gpus": 0,
        "lr": tune.grid_search(np.linspace (0.001, 0.01, 50).tolist()),
        "log_level": "ERROR",
        "num_cpus_per_worker": 1,
    },
    local_dir="/lus/scratch/arigazzi/ray_local/",
    verbose=1,
    fail_fast=True,
    log_to_file=True,
)

[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 5.0/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 3.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (49 PENDING, 1 RUNNING)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 5.4/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 18.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (44 PENDING, 6 RUNNING)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=74676)[0m 2021-05-24 07:53:58,592	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=74672)[0m 2021-05-24 07:53:58,821	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=74674)[0m 2021-05-24 07:53:58,835	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=74670)[0m 2021-05-24 07:53:59,145	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=74667)[0m 2021-05-24 07:53:59,225	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=74665)[0m 2021-05-24 07:53:59,355	INFO trainer.py:694 -- Cur

[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.6/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 18.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (44 PENDING, 6 RUNNING)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.6/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (26 PENDING, 24 RUNNING)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=61437)[0m 2021-05-24 07:56:07,687	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=61519)[0m 2021-05-24 07:56:07,691	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=61507)[0m 2021-05-24 07:56:07,685	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=61536)[0m 2021-05-24 07:56:07,678	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=61545)[0m 2021-05-24 07:56:07,683	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=61571)[0m 2021-05-24 07:56:07,680	INFO trainer.py:694 -- Cur

[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.6/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (26 PENDING, 23 RUNNING, 1 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.5/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (25 PENDING, 23 RUNNING, 2 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 




[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.6/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (24 PENDING, 24 RUNNING, 2 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.8/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (24 PENDING, 24 RUNNING, 2 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 




[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.4/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (24 PENDING, 24 RUNNING, 2 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=77710)[0m 2021-05-24 07:57:51,084	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=77712)[0m 2021-05-24 07:57:51,081	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=77710)[0m 2021-05-24 07:58:25,382	INFO trainable.py:101 -- Trainable.setup took 34.298 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=77712)[0m 2021-05-24 07:58:25,393	INFO trainable.py:101 -- Trainable.setup took 34.312 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (24 PENDING, 24 RUNNING, 2 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (24 PENDING, 24 RUNNING, 2 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74

[2m[36m(pid=31977)[0m 2021-05-24 07:59:28,301	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=64181)[0m 2021-05-24 07:59:30,299	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=31977)[0m 2021-05-24 07:59:39,929	INFO trainable.py:101 -- Trainable.setup took 11.628 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=64181)[0m 2021-05-24 07:59:42,528	INFO trainable.py:101 -- Trainable.setup took 12.229 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=79720)[0m 2021-05-24 07:59:48,995	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.

[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.5/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (18 PENDING, 24 RUNNING, 8 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.8/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (18 PENDING, 23 RUNNING, 9 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 




[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 7.3/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (15 PENDING, 24 RUNNING, 11 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 




[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 7.5/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (13 PENDING, 23 RUNNING, 14 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


The actor or task with ID fffffffffffffffff37ac4a84fb0183e6290f1a301000000 cannot be scheduled right now. It requires {CPU_group_0_37ac7e201c3d77d338995747f50fd7c8: 1.000000}, {CPU_group_37ac7e201c3d77d338995747f50fd7c8: 1.000000} for placement, but this node only has remaining {0.000000/18.000000 CPU, 87.214506 GiB/87.214506 GiB memory, 37.377645 GiB/37.377645 GiB object_store_memory, 1.000000/1.000000 CPU_group_0_0cef4fda88e0f3dda0dabb9b54abd0c3, 3.000000/3.000000 CPU_group_0cef4fda88e0f3dda0dabb9b54abd0c3, 3.000000/3.000000 CPU_group_2f27b18ff881043eed191c1135919674, 1.000000/1.000000 CPU_group_0_a158e48561f1ec7f26fb152eeb4699ee, 1.000000/1.000000 CPU_group_1_0cef4fda88e0f3dda0dabb9b54abd0c3, 1.000000/1.000000 CPU_group_2_2c8a52f769e89d5cc7bb50f8b377179f, 3.000000/3.000000 CPU_group_2c8a52f769e89d5cc7bb50f8b377179f, 1.000000/1.000000 CPU_group_2_2f27b18ff881043eed191c1135919674, 1.000000/1.000000 CPU_group_1_2f27b18ff881043eed191c1135919674, 0.000000/3.000000 CPU_group_9903891578ca4

[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 7.0/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (12 PENDING, 23 RUNNING, 15 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=79720)[0m 2021-05-24 08:00:23,807	INFO trainable.py:101 -- Trainable.setup took 34.812 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=79731)[0m 2021-05-24 08:00:23,888	INFO trainable.py:101 -- Trainable.setup took 34.602 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=79722)[0m 2021-05-24 08:00:23,910	INFO trainable.py:101 -- Trainable.setup took 34.807 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=79726)[0m 2021-05-24 08:00:24,202	INFO trainable.py:101 -- Trainable.setup took 34.939 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.6/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (9 PENDING, 24 RUNNING, 17 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (9 PENDING, 24 RUNNING, 17 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=582)[0m 2021-05-24 08:00:36,207	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=596)[0m 2021-05-24 08:00:36,211	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=602)[0m 2021-05-24 08:00:36,223	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=611)[0m 2021-05-24 08:00:36,395	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=612)[0m 2021-05-24 08:00:36,402	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (7 PENDING, 23 RUNNING, 20 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=33318)[0m 2021-05-24 08:00:48,543	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=33321)[0m 2021-05-24 08:00:48,541	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=33327)[0m 2021-05-24 08:00:48,635	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=33365)[0m 2021-05-24 08:00:49,823	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=33371)[0m 2021-05-24 08:00:49,825	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=582)[0m 2021-05-24 08:00:50,213	INFO trainable.py:101 -- Tra

[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (5 PENDING, 24 RUNNING, 21 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=33321)[0m 2021-05-24 08:01:18,963	INFO trainable.py:101 -- Trainable.setup took 30.423 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=33318)[0m 2021-05-24 08:01:19,060	INFO trainable.py:101 -- Trainable.setup took 30.517 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=33327)[0m 2021-05-24 08:01:19,587	INFO trainable.py:101 -- Trainable.setup took 30.952 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (5 PENDING, 24 RUNNING, 21 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=33365)[0m 2021-05-24 08:01:31,643	INFO trainable.py:101 -- Trainable.setup took 41.821 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=33371)[0m 2021-05-24 08:01:32,038	INFO trainable.py:101 -- Trainable.setup took 42.216 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (4 PENDING, 23 RUNNING, 23 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.8/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (3 PENDING, 23 RUNNING, 24 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=1946)[0m 2021-05-24 08:02:05,638	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 7.2/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (1 PENDING, 24 RUNNING, 25 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=1946)[0m 2021-05-24 08:02:15,904	INFO trainable.py:101 -- Trainable.setup took 10.267 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.5/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (1 PENDING, 24 RUNNING, 25 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=81986)[0m 2021-05-24 08:02:30,231	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=67043)[0m 2021-05-24 08:02:32,780	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=67047)[0m 2021-05-24 08:02:32,778	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (1 PENDING, 24 RUNNING, 25 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.9/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (1 PENDING, 24 RUNNING, 25 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=67043)[0m 2021-05-24 08:02:43,775	INFO trainable.py:101 -- Trainable.setup took 10.997 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=67047)[0m 2021-05-24 08:02:43,803	INFO trainable.py:101 -- Trainable.setup took 11.025 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 7.1/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (1 PENDING, 24 RUNNING, 25 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=81986)[0m 2021-05-24 08:03:05,572	INFO trainable.py:101 -- Trainable.setup took 35.341 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (1 PENDING, 24 RUNNING, 25 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (1 PENDING, 24 RUNNING, 25 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


The actor or task with ID fffffffffffffffffba1ddc99f7d189bac39fe9901000000 cannot be scheduled right now. It requires {CPU_group_0_2c8a52f769e89d5cc7bb50f8b377179f: 1.000000}, {CPU_group_2c8a52f769e89d5cc7bb50f8b377179f: 1.000000} for placement, but this node only has remaining {0.000000/18.000000 CPU, 87.214506 GiB/87.214506 GiB memory, 37.377645 GiB/37.377645 GiB object_store_memory, 0.000000/1.000000 CPU_group_0_0cef4fda88e0f3dda0dabb9b54abd0c3, 0.000000/3.000000 CPU_group_0cef4fda88e0f3dda0dabb9b54abd0c3, 0.000000/3.000000 CPU_group_2f27b18ff881043eed191c1135919674, 0.000000/1.000000 CPU_group_0_a158e48561f1ec7f26fb152eeb4699ee, 1.000000/1.000000 CPU_group_1_0cef4fda88e0f3dda0dabb9b54abd0c3, 1.000000/1.000000 CPU_group_2_2c8a52f769e89d5cc7bb50f8b377179f, 3.000000/3.000000 CPU_group_2c8a52f769e89d5cc7bb50f8b377179f, 1.000000/1.000000 CPU_group_2_2f27b18ff881043eed191c1135919674, 1.000000/1.000000 CPU_group_1_2f27b18ff881043eed191c1135919674, 0.000000/3.000000 CPU_group_9903891578ca4

[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (24 RUNNING, 26 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 6.7/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (23 RUNNING, 27 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==


[2m[36m(pid=3831)[0m 2021-05-24 08:04:16,991	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 5.9/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 57.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (19 RUNNING, 31 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 


[2m[36m(pid=3831)[0m 2021-05-24 08:04:29,025	INFO trainable.py:101 -- Trainable.setup took 12.034 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 5.9/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 54.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (18 RUNNING, 32 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==
[2m[36m(pid=74649)[0m Memory usage on this node: 5.4/125.8 GiB
[2m[36m(pid=74649)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=74649)[0m Resources requested: 33.0/72 CPUs, 0/0 GPUs, 0.0/339.53 GiB heap, 0.0/149.51 GiB objects
[2m[36m(pid=74649)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=74649)[0m Number of trials: 50/50 (11 RUNNING, 39 TERMINATED)
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m 
[2m[36m(pid=74649)[0m == Status ==


[2m[36m(pid=3831)[0m 2021-05-24 08:06:08,388	ERROR worker.py:382 -- SystemExit was raised from the worker
[2m[36m(pid=3831)[0m Traceback (most recent call last):
[2m[36m(pid=3831)[0m   File "python/ray/_raylet.pyx", line 495, in ray._raylet.execute_task
[2m[36m(pid=3831)[0m   File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
[2m[36m(pid=3831)[0m   File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
[2m[36m(pid=3831)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
[2m[36m(pid=3831)[0m     return method(__ray_actor, *args, **kwargs)
[2m[36m(pid=3831)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/actor.py", line 1001, in __ray_terminate__
[2m[36m(pid=3831)[0m     ray.actor.exit_actor()
[2m[36m(pid=3831)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim

<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x7f19f8ea9be0>

## 3. Stop cluster and release allocation

In [5]:
if alloc:
    slurm.release_allocation(alloc)

In [9]:
exp.stop(cluster)

06:39:40 swan SmartSim[1694] INFO Stopping model workers with job name workers-CBLEF8M2VDYD
06:39:40 swan SmartSim[1694] DEBUG Process terminated with kill 2252
06:39:40 swan SmartSim[1694] INFO Stopping model head with job name head-CBLEF2ZK1XJI
06:39:40 swan SmartSim[1694] DEBUG Process terminated with kill 1992
06:39:42 swan SmartSim[1694] DEBUG Sleeping, no jobs to monitor


In [6]:

tune.run(
    "PPO",
    stop={"episode_reward_max": 200},
    config={
        "framework": "torch",
        "env": "CartPole-v0",
    #    "num_gpus": 0,
        "lr": tune.grid_search(np.linspace (0.001, 0.01, 50).tolist()),
    #    "log_level": "ERROR",
    #    "num_cpus_per_worker": 1,
    },
    #local_dir="/lus/scratch/arigazzi/ray_local/",
    #verbose=1,
    #fail_fast=True,
    #log_to_file=True,
    reporter = JupyterNotebookReporter(True),
)

[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 4.8/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (50 PENDING)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 5.2/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (50 PENDING)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory us



[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 5.9/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 9.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (47 PENDING, 3 RUNNING)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=24081)[0m 2021-05-24 05:48:14,554	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=24080)[0m 2021-05-24 05:48:14,633	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=25721)[0m 2021-05-24 05:48:14,861	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=25724)[0m 2021-05-24 05:48:14,957	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=24494)[0m 2021-05-24 05:48:15,035	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=24085)[0m 2021-05-24 05:48:15,047	INFO trainer.py:694 -- Current 

[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.4/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (26 PENDING, 24 RUNNING)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.4/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (26 PENDING, 24 RUNNING)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m == Status ==
[2m

[2m[36m(pid=26390)[0m 2021-05-24 05:51:22,263	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=26393)[0m 2021-05-24 05:51:22,339	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.4/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (24 PENDING, 24 RUNNING, 2 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=26393)[0m 2021-05-24 05:51:36,163	INFO trainable.py:101 -- Trainable.setup took 13.825 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=26390)[0m 2021-05-24 05:51:36,395	INFO trainable.py:101 -- Trainable.setup took 14.132 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.9/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (23 PENDING, 23 RUNNING, 4 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 




[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 8.1/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (16 PENDING, 24 RUNNING, 10 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 




[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.2/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (13 PENDING, 24 RUNNING, 13 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=36059)[0m 2021-05-24 05:52:12,906	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.3/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (12 PENDING, 23 RUNNING, 15 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=28879)[0m 2021-05-24 05:52:22,434	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=28880)[0m 2021-05-24 05:52:22,432	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=28881)[0m 2021-05-24 05:52:22,487	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=28884)[0m 2021-05-24 05:52:22,572	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=28887)[0m 2021-05-24 05:52:22,704	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=28888)[0m 2021-05-24 05:52:22,881	INFO trainer.py:694 -- Current 

[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.4/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (8 PENDING, 24 RUNNING, 18 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=27723)[0m 2021-05-24 05:52:59,949	INFO trainable.py:101 -- Trainable.setup took 15.062 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=27725)[0m 2021-05-24 05:52:59,992	INFO trainable.py:101 -- Trainable.setup took 15.081 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=27726)[0m 2021-05-24 05:52:59,853	INFO trainable.py:101 -- Trainable.setup took 14.945 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=27724)[0m 2021-05-24 05:53:00,062	INFO trainable.py:101 -- Trainable.setup took 15.167 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.5/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (8 PENDING, 23 RUNNING, 19 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 




[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.2/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (5 PENDING, 24 RUNNING, 21 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.1/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 69.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (5 PENDING, 23 RUNNING, 22 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=

[2m[36m(pid=28990)[0m 2021-05-24 05:53:58,361	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=37905)[0m 2021-05-24 05:54:00,005	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=37909)[0m 2021-05-24 05:54:00,003	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 6.5/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (3 PENDING, 24 RUNNING, 23 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=37918)[0m 2021-05-24 05:54:00,407	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=28990)[0m 2021-05-24 05:54:10,023	INFO trainable.py:101 -- Trainable.setup took 11.662 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.2/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (2 PENDING, 24 RUNNING, 24 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=38040)[0m 2021-05-24 05:54:12,428	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.9/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (2 PENDING, 24 RUNNING, 24 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=37905)[0m 2021-05-24 05:54:31,991	INFO trainable.py:101 -- Trainable.setup took 31.989 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=37909)[0m 2021-05-24 05:54:31,974	INFO trainable.py:101 -- Trainable.setup took 31.972 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=29211)[0m 2021-05-24 05:54:37,231	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=37918)[0m 2021-05-24 05:54:42,116	INFO trainable.py:101 -- Trainable.setup took 41.709 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=38040)[0m 2021-05-24 05:54:42,098	INFO trainable.py:101 -- Trainable.setup took 29.671 seconds. If your trainable is slow to initial

[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.4/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (2 PENDING, 24 RUNNING, 24 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=29211)[0m 2021-05-24 05:54:53,741	INFO trainable.py:101 -- Trainable.setup took 16.511 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.4/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 72.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (24 RUNNING, 26 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.4/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 66.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (22 RUNNING, 28 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=31284)[0m 2021-05-24 05:55:31,935	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=31291)[0m 2021-05-24 05:55:32,007	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.0/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 63.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (21 RUNNING, 29 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 


[2m[36m(pid=31284)[0m 2021-05-24 05:55:45,750	INFO trainable.py:101 -- Trainable.setup took 13.816 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=31291)[0m 2021-05-24 05:55:45,800	INFO trainable.py:101 -- Trainable.setup took 13.793 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.0/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 60.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (20 RUNNING, 30 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.0/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 60.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (20 RUNNING, 30 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m == Status =



[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.0/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 57.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (19 RUNNING, 31 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m == Status ==
[2m[36m(pid=15462)[0m Memory usage on this node: 7.0/187.6 GiB
[2m[36m(pid=15462)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=15462)[0m Resources requested: 36.0/72 CPUs, 0/0 GPUs, 0.0/512.51 GiB heap, 0.0/223.64 GiB objects
[2m[36m(pid=15462)[0m Result logdir: /home/users/arigazzi/ray_results/PPO
[2m[36m(pid=15462)[0m Number of trials: 50/50 (12 RUNNING, 38 TERMINATED)
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m 
[2m[36m(pid=15462)[0m == Status =

[2m[36m(pid=38507)[0m 2021-05-24 05:57:29,347	ERROR worker.py:382 -- SystemExit was raised from the worker
[2m[36m(pid=38507)[0m Traceback (most recent call last):
[2m[36m(pid=38507)[0m   File "python/ray/_raylet.pyx", line 595, in ray._raylet.task_execution_handler
[2m[36m(pid=38507)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 18, in _disable_client_hook
[2m[36m(pid=38507)[0m     def _disable_client_hook():
[2m[36m(pid=38507)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/worker.py", line 379, in sigterm_handler
[2m[36m(pid=38507)[0m     sys.exit(1)
[2m[36m(pid=38507)[0m SystemExit: 1
[2m[36m(pid=38508)[0m 2021-05-24 05:57:29,347	ERROR worker.py:382 -- SystemExit was raised from the worker
[2m[36m(pid=38508)[0m Traceback (most recent call last):
[2m[36m(pid=38508)[0m   File "python/ray/_raylet.pyx", line 595, in ray._raylet.task_ex

<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x2b3f29d4ffa0>

In [8]:
analysis = tune.run(
    "PPO",
    stop={"episode_reward_max": 200},
    config={
        "framework": "torch",
        "env": "CartPole-v0",
        "lr": tune.grid_search([0.001, 0.002, 0.003, 0.004, 0.005]),
    },
    progress_reporter = JupyterNotebookReporter(True)
)

<IPython.core.display.HTML object>
<IPython.core.display.HTML object>


[2m[36m(pid=31372)[0m 2021-05-24 05:58:41,433	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=32926)[0m 2021-05-24 05:58:42,009	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


<IPython.core.display.HTML object>


[2m[36m(pid=40976)[0m 2021-05-24 05:58:55,399	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=31101)[0m 2021-05-24 05:58:55,836	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=31167)[0m 2021-05-24 05:58:55,834	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


<IPython.core.display.HTML object>




<IPython.core.display.HTML object>




<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>


[2m[36m(pid=31125)[0m 2021-05-24 05:59:50,120	ERROR worker.py:382 -- SystemExit was raised from the worker
[2m[36m(pid=31125)[0m Traceback (most recent call last):
[2m[36m(pid=31125)[0m   File "python/ray/_raylet.pyx", line 495, in ray._raylet.execute_task
[2m[36m(pid=31125)[0m   File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
[2m[36m(pid=31125)[0m   File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
[2m[36m(pid=31125)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
[2m[36m(pid=31125)[0m     return method(__ray_actor, *args, **kwargs)
[2m[36m(pid=31125)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/actor.py", line 1001, in __ray_terminate__
[2m[36m(pid=31125)[0m     ray.actor.exit_actor()
[2m[36m(pid=31125)[0m   File "/lus/scratch/arigazzi/anaconda3/env