# Setting up a Ray cluster with SmartSim

## 1. Start the cluster
We set up a SmartSim experiment, which will handle the launch of the Ray cluster.

First we import the relevant modules.

In [1]:
import numpy as np
import time
import argparse
import os

from ray.tune.progress_reporter import JupyterNotebookReporter
import ray
from ray import tune
import ray.util

from smartsim import Experiment
from smartsim.ray import RayCluster

NUM_WORKERS = 3
alloc=None
#alloc=slurm.get_allocation(nodes=1+NUM_WORKERS, time="12:00:00", options={"ntasks": str(1+NUM_WORKERS), "partition": "spider", "C": "V100"})

In [2]:
exp = Experiment("ray-cluster", launcher='slurm')
cluster = RayCluster(name="ray-cluster", run_args={}, path='',
                     launcher='slurm', workers=NUM_WORKERS, alloc=alloc, batch=True, ray_num_cpus=38)

if cluster.batch:
    cluster.head_model.batch_settings.add_preamble( ["source ~/.bashrc", "conda activate smartsim"])
    if NUM_WORKERS:
        cluster.worker_model.batch_settings.add_preamble ( ["source ~/.bashrc", "conda activate smartsim"])

exp.generate(cluster, overwrite=True)

10:01:37 horizon SmartSim[11054] INFO Working in previously created experiment


In [3]:
exp.start(cluster, block=False, summary=False)

10:01:49 horizon SmartSim[11054] INFO Ray cluster launched on nodes: ['nid00000', 'nid00002', 'nid00003', 'nid00001']


## 2. Start the ray driver script

In [4]:
ray.util.connect(cluster.head_model.address +":10001")

tune.run(
    "PPO",
    stop={"episode_reward_max": 200},
    config={
        "framework": "torch",
        "env": "CartPole-v0",
        "num_gpus": 0,
        "lr": tune.grid_search(np.linspace (0.001, 0.01, 100).tolist()),
        "log_level": "ERROR",
    },
    local_dir="/lus/scratch/arigazzi/ray_local/",
    verbose=1,
    fail_fast=True,
    log_to_file=True,
)

[2m[36m(pid=110452)[0m Instructions for updating:
[2m[36m(pid=110452)[0m non-resource variables are not supported in the long term


[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 5.8/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 3.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (99 PENDING, 1 RUNNING)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 


[2m[36m(pid=110537)[0m Instructions for updating:
[2m[36m(pid=110537)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110478)[0m Instructions for updating:
[2m[36m(pid=110478)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110536)[0m Instructions for updating:
[2m[36m(pid=110536)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110472)[0m Instructions for updating:
[2m[36m(pid=110472)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110538)[0m Instructions for updating:
[2m[36m(pid=110538)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110466)[0m Instructions for updating:
[2m[36m(pid=110466)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110474)[0m Instructions for updating:
[2m[36m(pid=110474)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110473)[0m Instructions fo

[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 11.4/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 150.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (50 PENDING, 50 RUNNING)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 


[2m[36m(pid=85862)[0m Instructions for updating:
[2m[36m(pid=85862)[0m non-resource variables are not supported in the long term
[2m[36m(pid=85861)[0m Instructions for updating:
[2m[36m(pid=85861)[0m non-resource variables are not supported in the long term
[2m[36m(pid=85858)[0m Instructions for updating:
[2m[36m(pid=85858)[0m non-resource variables are not supported in the long term
[2m[36m(pid=85863)[0m Instructions for updating:
[2m[36m(pid=85863)[0m non-resource variables are not supported in the long term
[2m[36m(pid=85870)[0m Instructions for updating:
[2m[36m(pid=85870)[0m non-resource variables are not supported in the long term
[2m[36m(pid=85902)[0m Instructions for updating:
[2m[36m(pid=85902)[0m non-resource variables are not supported in the long term
[2m[36m(pid=86095)[0m Instructions for updating:
[2m[36m(pid=86095)[0m non-resource variables are not supported in the long term
[2m[36m(pid=86025)[0m Instructions for updating:
[2

[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 13.5/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 150.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (50 PENDING, 50 RUNNING)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 13.5/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 150.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (50 PENDING, 50 RUNNING)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 
[2m[36m(pid=1104

[2m[36m(pid=113372)[0m Instructions for updating:
[2m[36m(pid=113372)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113420)[0m Instructions for updating:
[2m[36m(pid=113420)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113372)[0m 2021-05-26 10:03:38,141	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=113421)[0m Instructions for updating:
[2m[36m(pid=113421)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113419)[0m Instructions for updating:
[2m[36m(pid=113419)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113420)[0m 2021-05-26 10:03:39,304	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=113421)[0m 2021-05-26 10:03:40,243	INFO trainer.py:694 -- Current

[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 13.0/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 150.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (44 PENDING, 50 RUNNING, 6 TERMINATED)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 


[2m[36m(pid=113929)[0m Instructions for updating:
[2m[36m(pid=113929)[0m non-resource variables are not supported in the long term
[2m[36m(pid=114002)[0m Instructions for updating:
[2m[36m(pid=114002)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113928)[0m Instructions for updating:
[2m[36m(pid=113928)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113917)[0m Instructions for updating:
[2m[36m(pid=113917)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113920)[0m Instructions for updating:
[2m[36m(pid=113920)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113927)[0m Instructions for updating:
[2m[36m(pid=113927)[0m non-resource variables are not supported in the long term
[2m[36m(pid=13577)[0m Instructions for updating:
[2m[36m(pid=13577)[0m non-resource variables are not supported in the long term
[2m[36m(pid=13577)[0m 2021-05-26 10:03:4

[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 12.9/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 147.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (36 PENDING, 49 RUNNING, 15 TERMINATED)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 


[2m[36m(pid=114448)[0m 2021-05-26 10:03:46,964	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=114442)[0m 2021-05-26 10:03:47,274	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=15705)[0m Instructions for updating:
[2m[36m(pid=15705)[0m non-resource variables are not supported in the long term
[2m[36m(pid=15716)[0m Instructions for updating:
[2m[36m(pid=15716)[0m non-resource variables are not supported in the long term
[2m[36m(pid=89995)[0m Instructions for updating:
[2m[36m(pid=89995)[0m non-resource variables are not supported in the long term
[2m[36m(pid=89972)[0m Instructions for updating:
[2m[36m(pid=89972)[0m non-resource variables are not supported in the long term
[2m[36m(pid=89914)[0m Instructions for updating:
[2m[36m(pid=89914)[0m non-resourc

[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 12.4/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 147.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (16 PENDING, 49 RUNNING, 35 TERMINATED)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 


[2m[36m(pid=1589)[0m Instructions for updating:
[2m[36m(pid=1589)[0m non-resource variables are not supported in the long term
[2m[36m(pid=1594)[0m Instructions for updating:
[2m[36m(pid=1594)[0m non-resource variables are not supported in the long term
[2m[36m(pid=1593)[0m Instructions for updating:
[2m[36m(pid=1593)[0m non-resource variables are not supported in the long term
[2m[36m(pid=1579)[0m Instructions for updating:
[2m[36m(pid=1579)[0m non-resource variables are not supported in the long term
[2m[36m(pid=1578)[0m Instructions for updating:
[2m[36m(pid=1578)[0m non-resource variables are not supported in the long term
[2m[36m(pid=1615)[0m Instructions for updating:
[2m[36m(pid=1615)[0m non-resource variables are not supported in the long term
[2m[36m(pid=1606)[0m Instructions for updating:
[2m[36m(pid=1606)[0m non-resource variables are not supported in the long term
[2m[36m(pid=1555)[0m Instructions for updating:
[2m[36m(pid=1555

[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 13.6/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 150.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (6 PENDING, 50 RUNNING, 44 TERMINATED)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 


[2m[36m(pid=89233)[0m Instructions for updating:
[2m[36m(pid=89233)[0m non-resource variables are not supported in the long term
[2m[36m(pid=89231)[0m Instructions for updating:
[2m[36m(pid=89231)[0m non-resource variables are not supported in the long term
[2m[36m(pid=89232)[0m Instructions for updating:
[2m[36m(pid=89232)[0m non-resource variables are not supported in the long term
[2m[36m(pid=91399)[0m Instructions for updating:
[2m[36m(pid=91399)[0m non-resource variables are not supported in the long term
[2m[36m(pid=91402)[0m Instructions for updating:
[2m[36m(pid=91402)[0m non-resource variables are not supported in the long term
[2m[36m(pid=91401)[0m Instructions for updating:
[2m[36m(pid=91401)[0m non-resource variables are not supported in the long term
[2m[36m(pid=91400)[0m Instructions for updating:
[2m[36m(pid=91400)[0m non-resource variables are not supported in the long term
[2m[36m(pid=91410)[0m Instructions for updating:
[2

[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 13.5/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 147.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (49 RUNNING, 51 TERMINATED)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 


[2m[36m(pid=17654)[0m Instructions for updating:
[2m[36m(pid=17654)[0m non-resource variables are not supported in the long term
[2m[36m(pid=16424)[0m 2021-05-26 10:04:04,354	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=90129)[0m Instructions for updating:
[2m[36m(pid=90129)[0m non-resource variables are not supported in the long term
[2m[36m(pid=17688)[0m Instructions for updating:
[2m[36m(pid=17688)[0m non-resource variables are not supported in the long term
[2m[36m(pid=90122)[0m Instructions for updating:
[2m[36m(pid=90122)[0m non-resource variables are not supported in the long term
[2m[36m(pid=17654)[0m 2021-05-26 10:04:05,139	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=17688)[0m 2021-05-26 10:04:05,457	INFO trainer.py:694 -- Current log_level 

[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 12.8/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 147.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (49 RUNNING, 51 TERMINATED)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m == Status ==
[2m[36m(pid=110452)[0m Memory usage on this node: 12.2/187.6 GiB
[2m[36m(pid=110452)[0m Using FIFO scheduling algorithm.
[2m[36m(pid=110452)[0m Resources requested: 132.0/152 CPUs, 0/0 GPUs, 0.0/512.36 GiB heap, 0.0/223.58 GiB objects
[2m[36m(pid=110452)[0m Result logdir: /lus/scratch/arigazzi/ray_local/PPO
[2m[36m(pid=110452)[0m Number of trials: 100/100 (44 RUNNING, 56 TERMINATED)
[2m[36m(pid=110452)[0m 
[2m[36m(pid=110452)[0m 
[2m[36m(pi

[2m[36m(pid=88341)[0m 2021-05-26 10:04:36,744	ERROR worker.py:382 -- SystemExit was raised from the worker
[2m[36m(pid=88341)[0m Traceback (most recent call last):
[2m[36m(pid=88341)[0m   File "python/ray/_raylet.pyx", line 488, in ray._raylet.execute_task
[2m[36m(pid=88341)[0m   File "python/ray/_raylet.pyx", line 495, in ray._raylet.execute_task
[2m[36m(pid=88341)[0m   File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
[2m[36m(pid=88341)[0m   File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
[2m[36m(pid=88341)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
[2m[36m(pid=88341)[0m     return method(__ray_actor, *args, **kwargs)
[2m[36m(pid=88341)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/actor.py", line 1001, in __ray_terminate__
[2m[36m(pid=88341)[0m

Instructions for updating:
non-resource variables are not supported in the long term


<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x7f35befb2d00>

## 3. Stop cluster and release allocation

In [5]:
if alloc:
    slurm.release_allocation(alloc)

In [7]:
exp.stop(cluster)

09:04:54 horizon SmartSim[34190] INFO Stopping model workers with job name workers-CBN834UC8A6R
09:04:54 horizon SmartSim[34190] INFO Stopping model head with job name head-CBN831L2EAET


The autoscaler failed with the following error:
Terminated with signal 15
  File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/_private/monitor.py", line 376, in <module>
    monitor.run()
  File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/_private/monitor.py", line 284, in run
    self._run()
  File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/_private/monitor.py", line 202, in _run
    time.sleep(AUTOSCALER_UPDATE_INTERVAL_S)
  File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/_private/monitor.py", line 273, in _signal_handler
    "".join(traceback.format_stack(frame)))
  File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/traceback.py", line 197, in format_stack
    return format_list(extract_stack(f, limit=limit))
  File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/traceback.py", line 211, in extract_stack
    stack = 

In [9]:

tune.run(
    "PPO",
    stop={"episode_reward_max": 200},
    config={
        "framework": "torch",
        "env": "CartPole-v0",
    #    "num_gpus": 0,
        "lr": tune.grid_search(np.linspace (0.001, 0.01, 50).tolist()),
        "log_level": "ERROR",
    #    "num_cpus_per_worker": 1,
    },
    local_dir="/lus/scratch/arigazzi/ray_local/",
    verbose=1,
    #fail_fast=True,
    #log_to_file=True,
    progress_reporter = JupyterNotebookReporter(True),
)

<IPython.core.display.HTML object>


[2m[36m(pid=39779)[0m Instructions for updating:
[2m[36m(pid=39779)[0m non-resource variables are not supported in the long term
[2m[36m(pid=39781)[0m Instructions for updating:
[2m[36m(pid=39781)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113189)[0m Instructions for updating:
[2m[36m(pid=113189)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113188)[0m Instructions for updating:
[2m[36m(pid=113188)[0m non-resource variables are not supported in the long term
[2m[36m(pid=39780)[0m Instructions for updating:
[2m[36m(pid=39780)[0m non-resource variables are not supported in the long term
[2m[36m(pid=39774)[0m Instructions for updating:
[2m[36m(pid=39774)[0m non-resource variables are not supported in the long term
[2m[36m(pid=39770)[0m Instructions for updating:
[2m[36m(pid=39770)[0m non-resource variables are not supported in the long term
[2m[36m(pid=113191)[0m Instructions for updating

<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
[2K[36m(pid=27027)[0m [2K
[2m[36m(pid=27027)[0m <IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>


[2m[36m(pid=27027)[0m 2021-05-25 10:02:22,533	INFO tune.py:549 -- Total run time: 55.35 seconds (55.19 seconds for the tuning loop).


<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x7fd356fd07c0>

In [10]:
analysis = tune.run(
    "PPO",
    stop={"episode_reward_max": 200},
    config={
        "framework": "torch",
        "env": "CartPole-v0",
        "lr": tune.grid_search([0.001, 0.002, 0.003, 0.004, 0.005]),
    },
    progress_reporter = JupyterNotebookReporter(True)
)

<IPython.core.display.HTML object>


[2m[36m(pid=110180)[0m Instructions for updating:
[2m[36m(pid=110180)[0m non-resource variables are not supported in the long term
[2m[36m(pid=40586)[0m Instructions for updating:
[2m[36m(pid=40586)[0m non-resource variables are not supported in the long term
[2m[36m(pid=110180)[0m 2021-05-25 10:04:22,637	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=42854)[0m Instructions for updating:
[2m[36m(pid=42854)[0m non-resource variables are not supported in the long term
[2m[36m(pid=40586)[0m 2021-05-25 10:04:22,978	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=34269)[0m Instructions for updating:
[2m[36m(pid=34269)[0m non-resource variables are not supported in the long term
[2m[36m(pid=2297)[0m Instructions for updating:
[2m[36m(pid=2297)[0m non-resource v

[2K[36m(pid=27027)[0m [2K
[2m[36m(pid=27027)[0m <IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>


[2m[36m(pid=27027)[0m 2021-05-25 10:04:54,258	INFO tune.py:549 -- Total run time: 34.48 seconds (34.15 seconds for the tuning loop).
[2m[36m(pid=42854)[0m 2021-05-25 10:04:54,221	ERROR worker.py:382 -- SystemExit was raised from the worker
[2m[36m(pid=42854)[0m Traceback (most recent call last):
[2m[36m(pid=42854)[0m   File "python/ray/_raylet.pyx", line 495, in ray._raylet.execute_task
[2m[36m(pid=42854)[0m   File "python/ray/_raylet.pyx", line 505, in ray._raylet.execute_task
[2m[36m(pid=42854)[0m   File "python/ray/_raylet.pyx", line 449, in ray._raylet.execute_task.function_executor
[2m[36m(pid=42854)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/_private/function_manager.py", line 556, in actor_method_executor
[2m[36m(pid=42854)[0m     return method(__ray_actor, *args, **kwargs)
[2m[36m(pid=42854)[0m   File "/lus/scratch/arigazzi/anaconda3/envs/smartsim/lib/python3.8/site-packages/ray/actor.py", line 1001, in __