# Setting up a Ray cluster with SmartSim

## 1. Start the cluster
We set up a SmartSim experiment, which will handle the launch of the Ray cluster.

First we import the relevant modules.

In [1]:
from smartsim import Experiment
from smartsim.ray import RayCluster

NUM_WORKERS = 8
alloc=None;#slurm.get_allocation(nodes=1+NUM_WORKERS, time="12:00:00", options={"ntasks": str(1+NUM_WORKERS), "partition": "spider", "C": "V100"})

In [2]:
exp = Experiment("ray-cluster", launcher='pbs')
cluster = RayCluster(name="ray-cluster", run_args={}, path='',
                     launcher='pbs', workers=NUM_WORKERS, alloc=alloc, batch=True, ray_num_cpus=38)

if cluster.batch:
    cluster.head_model.batch_settings._preamble += ["source ~/.bashrc", "conda activate smartsim"]
    if NUM_WORKERS:
        cluster.worker_model.batch_settings._preamble += ["source ~/.bashrc", "conda activate smartsim"]

exp.generate(cluster, overwrite=True)

16:00:50 crystal SmartSim[9377] INFO Working in previously created experiment


In [3]:
exp.start(cluster, block=False, summary=False)

16:00:53 crystal SmartSim[9377] ERROR An error occurred when launching head 
Check error and output files for details.
Name: head
Type: RayHead
Executable: /lus/scratch/arigazzi/anaconda3/envs/smartsim/bin/python
Executable arguments: ['/lus/snx11242/arigazzi/smartsim-dev/SmartSim/smartsim/ray/rayserverstarter.py', '--num-cpus=38', '--port=6780', '--redis-password=a8c08c1b-44fd-45c4-a7e6-d6756783a222']
Run Command: aprun

16:00:53 crystal SmartSim[9377] ERROR Job step head failed to launch


SmartSimError: Job step head failed to launch

## 2. Start the ray driver script

In [None]:
cluster.start_ray_job('/lus/scratch/arigazzi/smartsim-dev/SmartSim/tutorials/05_starting_ray/templates/ppo_tune.py')

In [None]:
cluster.start_ray_job('/lus/scratch/arigazzi/smartsim-dev/SmartSim/tutorials/05_starting_ray/templates/ppo_train.py')

In [None]:
cluster.start_ray_job('/lus/scratch/arigazzi/smartsim-dev/SmartSim/tutorials/05_starting_ray/templates/mnist_pytorch_trainable.py')

## 3. Stop cluster and release allocation

In [None]:
if alloc:
    slurm.release_allocation(alloc)

In [4]:
import ray
from ray import tune
import ray.util
import time
import numpy as np
import argparse
import os
from ray.tune.progress_reporter import JupyterNotebookReporter

ray.util.connect(cluster.head_model.address +":10001")
reporter = JupyterNotebookReporter(overwrite=True)

print("initialized")
tune.run(
    "PPO",
    stop={"episode_reward_max": 200},
    config={
        "framework": "torch",
        "env": "CartPole-v0",
        "num_gpus": 0,
        "lr": tune.grid_search(np.linspace (0.001, 0.01, 100).tolist()),
        "log_level": "ERROR",
        "num_cpus_per_worker": 1,
        "num_cpus_for_driver": 1,
    },
    local_dir="/lus/scratch/arigazzi/ray_local/",
    verbose=2,
    fail_fast=True,
    progress_reporter = reporter,
    log_to_file=True,
)

initialized
<IPython.core.display.HTML object>


[2m[36m(pid=32602)[0m 2021-05-19 15:38:52,005	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


<IPython.core.display.HTML object>


[2m[36m(pid=32598)[0m 2021-05-19 15:38:57,975	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=32600)[0m 2021-05-19 15:38:57,981	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=32595)[0m 2021-05-19 15:38:58,191	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=32597)[0m 2021-05-19 15:38:58,188	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=32594)[0m 2021-05-19 15:38:58,229	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=32602)[0m 2021-05-19 15:39:24,282	INFO trainable.py:101 -- T

[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00000 reported episode_reward_max=86.0,episode_reward_min=9.0,episode_reward_mean=22.982758620689655,episode_len_mean=22.982758620689655,episode_media={},episodes_this_iter=174,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.6501029991226115, 'mean_inference_ms': 17.685853087411164, 'mean_action_processing_ms': 0.6844336755076307, 'mean_env_wait_ms': 0.8950426797003957, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 43255.553, 'sample_throughput': 92.474, 'learn_time_ms': 64934.756, 'learn_throughput': 61.6, 'update_time_ms': 7.278},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.001, 'total_loss': 83.75345146656036, 'policy_loss': -0.05583573368494399, 'vf_loss': 83.80137264728546, 'v

[2m[36m(pid=39406, ip=172.30.49.190)[0m 2021-05-19 15:41:23,961	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=39480, ip=172.30.49.190)[0m 2021-05-19 15:41:23,967	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=39509, ip=172.30.49.190)[0m 2021-05-19 15:41:23,963	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=39526, ip=172.30.49.190)[0m 2021-05-19 15:41:23,970	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=39544, ip=172.30.49.190)[0m 2021-05-19 15:41:23,965	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v 

[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00004 reported episode_reward_max=91.0,episode_reward_min=8.0,episode_reward_mean=22.11731843575419,episode_len_mean=22.11731843575419,episode_media={},episodes_this_iter=179,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.0451030319036347, 'mean_inference_ms': 13.869340846331925, 'mean_action_processing_ms': 0.6118810980377736, 'mean_env_wait_ms': 0.9321321311947851, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 45110.224, 'sample_throughput': 88.672, 'learn_time_ms': 65871.576, 'learn_throughput': 60.724, 'update_time_ms': 124.506},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.0013636363636363637, 'total_loss': 77.54526734352112, 'policy_loss': -0.04880880439304747, 'vf_loss': 77.

[2m[36m(pid=39406, ip=172.30.49.190)[0m 2021-05-19 15:41:41,784	INFO trainable.py:101 -- Trainable.setup took 17.824 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=39480, ip=172.30.49.190)[0m 2021-05-19 15:41:41,799	INFO trainable.py:101 -- Trainable.setup took 17.836 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=39509, ip=172.30.49.190)[0m 2021-05-19 15:41:41,816	INFO trainable.py:101 -- Trainable.setup took 17.856 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=39526, ip=172.30.49.190)[0m 2021-05-19 15:41:41,828	INFO trainable.py:101 -- Trainable.setup took 17.863 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=39544, ip=172.30.49.19

[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00000 reported episode_reward_max=188.0,episode_reward_min=9.0,episode_reward_mean=46.85,episode_len_mean=46.85,episode_media={},episodes_this_iter=67,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.4564239079623877, 'mean_inference_ms': 16.402095843358897, 'mean_action_processing_ms': 0.6417709862110046, 'mean_env_wait_ms': 0.8880732257390872, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=8000,timers={'sample_time_ms': 38492.344, 'sample_throughput': 103.917, 'learn_time_ms': 64690.736, 'learn_throughput': 61.833, 'update_time_ms': 7.032},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.30000000000000004, 'cur_lr': 0.001, 'total_loss': 446.1968250274658, 'policy_loss': -0.03938280988950282, 'vf_loss': 446.2305965423584, 'vf_expla



<IPython.core.display.HTML object>
[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00007 reported episode_reward_max=60.0,episode_reward_min=9.0,episode_reward_mean=21.06878306878307,episode_len_mean=21.06878306878307,episode_media={},episodes_this_iter=189,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.828155612580164, 'mean_inference_ms': 14.442360556331888, 'mean_action_processing_ms': 0.6357064856156606, 'mean_env_wait_ms': 0.8940045761700655, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 34890.46, 'sample_throughput': 114.645, 'learn_time_ms': 52703.412, 'learn_throughput': 75.896, 'update_time_ms': 7.871},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.0016363636363636363, 'total_loss': 56.70728224515915, 'policy_loss': -0.0



[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00011 reported episode_reward_max=72.0,episode_reward_min=9.0,episode_reward_mean=23.197674418604652,episode_len_mean=23.197674418604652,episode_media={},episodes_this_iter=172,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.0360927510186442, 'mean_inference_ms': 14.335171140007951, 'mean_action_processing_ms': 0.7109910115666653, 'mean_env_wait_ms': 0.81544464527809, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 34873.709, 'sample_throughput': 114.7, 'learn_time_ms': 52829.169, 'learn_throughput': 75.716, 'update_time_ms': 7.749},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.002, 'total_loss': 94.76134920120239, 'policy_loss': -0.04660388032789342, 'vf_loss': 94.80022430419922, 'vf



[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00006 reported episode_reward_max=125.0,episode_reward_min=8.0,episode_reward_mean=24.714285714285715,episode_len_mean=24.714285714285715,episode_media={},episodes_this_iter=161,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.9822105554057337, 'mean_inference_ms': 14.296846717232409, 'mean_action_processing_ms': 0.5160799470851087, 'mean_env_wait_ms': 0.7743891812214223, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 34729.229, 'sample_throughput': 115.177, 'learn_time_ms': 52569.878, 'learn_throughput': 76.089, 'update_time_ms': 50.26},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.0015454545454545456, 'total_loss': 170.70933389663696, 'policy_loss': -0.052674749385914765, 'vf_loss':



[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00008 reported episode_reward_max=85.0,episode_reward_min=9.0,episode_reward_mean=24.2,episode_len_mean=24.2,episode_media={},episodes_this_iter=165,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.7644982411824482, 'mean_inference_ms': 14.623200097806958, 'mean_action_processing_ms': 0.41561189750015226, 'mean_env_wait_ms': 0.776773469396422, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 34836.152, 'sample_throughput': 114.823, 'learn_time_ms': 52623.561, 'learn_throughput': 76.012, 'update_time_ms': 7.119},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.0017272727272727275, 'total_loss': 104.73519110679626, 'policy_loss': -0.04131202757707797, 'vf_loss': 104.76919102668762, 'vf_expla



[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00004 reported episode_reward_max=169.0,episode_reward_min=9.0,episode_reward_mean=48.19,episode_len_mean=48.19,episode_media={},episodes_this_iter=63,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.1014962094020115, 'mean_inference_ms': 13.701296394183627, 'mean_action_processing_ms': 0.5380312697771665, 'mean_env_wait_ms': 0.8839612576262804, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=8000,timers={'sample_time_ms': 38619.603, 'sample_throughput': 103.574, 'learn_time_ms': 74361.051, 'learn_throughput': 53.792, 'update_time_ms': 63.695},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.30000000000000004, 'cur_lr': 0.0013636363636363637, 'total_loss': 506.90384101867676, 'policy_loss': -0.032757237800979055, 'vf_loss': 506.93035

[2m[36m(pid=36057)[0m 2021-05-19 15:43:45,121	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=36059)[0m 2021-05-19 15:43:45,123	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=36057)[0m 2021-05-19 15:44:25,707	INFO trainable.py:101 -- Trainable.setup took 40.587 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=36059)[0m 2021-05-19 15:44:25,831	INFO trainable.py:101 -- Trainable.setup took 40.708 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00010 reported episode_reward_max=200.0,episode_reward_min=9.0,episode_reward_mean=45.31,episode_len_mean=45.31,episode_media={},episodes_this_iter=72,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.8638825331225629, 'mean_inference_ms': 11.729535241701276, 'mean_action_processing_ms': 0.584269493405715, 'mean_env_wait_ms': 0.9035118919218319, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=8000,timers={'sample_time_ms': 27624.12, 'sample_throughput': 144.801, 'learn_time_ms': 55483.344, 'learn_throughput': 72.094, 'update_time_ms': 56.183},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.30000000000000004, 'cur_lr': 0.0019090909090909093, 'total_loss': 326.1769299507141, 'policy_loss': -0.027764145022956654, 'vf_loss': 326.19922113

The actor or task with ID ffffffffffffffffe4fee91c91bb6f05b75c3bb901000000 cannot be scheduled right now. It requires {CPU_group_0_db9c63726e369ba8ebf20eed9a99f77f: 1.000000}, {CPU_group_db9c63726e369ba8ebf20eed9a99f77f: 1.000000} for placement, but this node only has remaining {0.000000/18.000000 CPU, 87.135904 GiB/87.135904 GiB memory, 37.343959 GiB/37.343959 GiB object_store_memory, 1.000000/1.000000 CPU_group_0_db9c63726e369ba8ebf20eed9a99f77f, 1.000000/1.000000 CPU_group_1_e230dffb3fd10e5648088dd8af6d9169, 1.000000/1.000000 CPU_group_1_db9c63726e369ba8ebf20eed9a99f77f, 1.000000/1.000000 CPU_group_1_19401f96d5f51f491cfdeacd17a53e34, 1.000000/1.000000 CPU_group_1_3385cc0525dfcbbd4059316246676e35, 0.000000/1.000000 CPU_group_0_5763e5534f3359a5da9b9be1b5b983e2, 1.000000/1.000000 node:10.128.0.18, 0.000000/3.000000 CPU_group_3385cc0525dfcbbd4059316246676e35, 1.000000/1.000000 CPU_group_2_3385cc0525dfcbbd4059316246676e35, 1.000000/1.000000 CPU_group_2_5763e5534f3359a5da9b9be1b5b983e2, 1

[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00007 reported episode_reward_max=163.0,episode_reward_min=11.0,episode_reward_mean=44.11,episode_len_mean=44.11,episode_media={},episodes_this_iter=82,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.7046348658742994, 'mean_inference_ms': 13.633418806857035, 'mean_action_processing_ms': 0.593093972633863, 'mean_env_wait_ms': 1.061995075303193, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=8000,timers={'sample_time_ms': 32531.963, 'sample_throughput': 122.956, 'learn_time_ms': 63166.931, 'learn_throughput': 63.324, 'update_time_ms': 7.678},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.30000000000000004, 'cur_lr': 0.0016363636363636363, 'total_loss': 296.44860076904297, 'policy_loss': -0.026098009548150003, 'vf_loss': 296.4687080

[2m[36m(pid=42350, ip=172.30.49.190)[0m 2021-05-19 15:45:09,721	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=42399, ip=172.30.49.190)[0m 2021-05-19 15:45:21,430	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00000 reported episode_reward_max=200.0,episode_reward_min=9.0,episode_reward_mean=79.05,episode_len_mean=79.05,episode_media={},episodes_this_iter=27,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.4075163431411692, 'mean_inference_ms': 16.272974541682682, 'mean_action_processing_ms': 0.680826604522172, 'mean_env_wait_ms': 0.955387804373008, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=12000,timers={'sample_time_ms': 42682.971, 'sample_throughput': 93.714, 'learn_time_ms': 76302.393, 'learn_throughput': 52.423, 'update_time_ms': 7.059},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.30000000000000004, 'cur_lr': 0.001, 'total_loss': 755.8994483947754, 'policy_loss': -0.0190161170612555, 'vf_loss': 755.9158668518066, 'vf_explaine

[2m[36m(pid=42394, ip=172.30.49.190)[0m 2021-05-19 15:45:23,027	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


<IPython.core.display.HTML object>
[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00001 reported episode_reward_max=200.0,episode_reward_min=9.0,episode_reward_mean=79.34,episode_len_mean=79.34,episode_media={},episodes_this_iter=28,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.1641206688959675, 'mean_inference_ms': 16.152426389769346, 'mean_action_processing_ms': 0.4699457413155163, 'mean_env_wait_ms': 1.1426707845975534, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=12000,timers={'sample_time_ms': 44833.109, 'sample_throughput': 89.22, 'learn_time_ms': 74172.134, 'learn_throughput': 53.929, 'update_time_ms': 64.285},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.45000000000000007, 'cur_lr': 0.001090909090909091, 'total_loss': 648.5875043869019, 'policy_loss': -0.01494635



[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00002 reported episode_reward_max=200.0,episode_reward_min=9.0,episode_reward_mean=78.49,episode_len_mean=78.49,episode_media={},episodes_this_iter=26,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.04819643372076, 'mean_inference_ms': 16.075676549188593, 'mean_action_processing_ms': 0.5969556930671912, 'mean_env_wait_ms': 0.96988415087358, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=12000,timers={'sample_time_ms': 44836.267, 'sample_throughput': 89.213, 'learn_time_ms': 74837.82, 'learn_throughput': 53.449, 'update_time_ms': 122.031},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.45000000000000007, 'cur_lr': 0.0011818181818181819, 'total_loss': 611.6427593231201, 'policy_loss': -0.01514126022811979, 'vf_loss': 611.65369033813



[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00004 reported episode_reward_max=200.0,episode_reward_min=10.0,episode_reward_mean=81.28,episode_len_mean=81.28,episode_media={},episodes_this_iter=28,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.1134831427615723, 'mean_inference_ms': 14.549173849558496, 'mean_action_processing_ms': 0.6015314150508622, 'mean_env_wait_ms': 0.9005559283685522, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=12000,timers={'sample_time_ms': 44537.498, 'sample_throughput': 89.812, 'learn_time_ms': 80883.591, 'learn_throughput': 49.454, 'update_time_ms': 46.11},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.45000000000000007, 'cur_lr': 0.0013636363636363637, 'total_loss': 592.7615032196045, 'policy_loss': -0.0075556281371973455, 'vf_loss': 592.76605

[2m[36m(pid=42399, ip=172.30.49.190)[0m 2021-05-19 15:45:59,974	INFO trainable.py:101 -- Trainable.setup took 38.544 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=42350, ip=172.30.49.190)[0m 2021-05-19 15:46:00,099	INFO trainable.py:101 -- Trainable.setup took 50.379 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=42394, ip=172.30.49.190)[0m 2021-05-19 15:46:00,113	INFO trainable.py:101 -- Trainable.setup took 37.087 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=38170)[0m 2021-05-19 15:46:03,517	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=38182)[0m 2021-05-19 15:46:03,960	INFO trainer.py:694 -- Current log_level is

[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00013 reported episode_reward_max=60.0,episode_reward_min=8.0,episode_reward_mean=22.508474576271187,episode_len_mean=22.508474576271187,episode_media={},episodes_this_iter=177,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.9632635642047588, 'mean_inference_ms': 11.183544822291148, 'mean_action_processing_ms': 0.4634645151826821, 'mean_env_wait_ms': 0.9142962922635987, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 27982.865, 'sample_throughput': 142.945, 'learn_time_ms': 119828.295, 'learn_throughput': 33.381, 'update_time_ms': 15.719},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.002181818181818182, 'total_loss': 73.68086576461792, 'policy_loss': -0.053564514906611294, 'vf_loss': 

[2m[36m(pid=38208)[0m 2021-05-19 15:46:58,264	INFO trainable.py:101 -- Trainable.setup took 53.545 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=38220)[0m 2021-05-19 15:46:58,323	INFO trainable.py:101 -- Trainable.setup took 44.142 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00011 reported episode_reward_max=200.0,episode_reward_min=10.0,episode_reward_mean=75.2,episode_len_mean=75.2,episode_media={},episodes_this_iter=32,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.1364696852554206, 'mean_inference_ms': 14.509816785615248, 'mean_action_processing_ms': 0.6338021818498327, 'mean_env_wait_ms': 0.8409370986219166, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=12000,timers={'sample_time_ms': 39490.775, 'sample_throughput': 101.289, 'learn_time_ms': 65491.506, 'learn_throughput': 61.077, 'update_time_ms': 36.561},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.45000000000000007, 'cur_lr': 0.002, 'total_loss': 537.199465751648, 'policy_loss': -0.017334639182081446, 'vf_loss': 537.2115268707275, 'vf_expl

[2m[36m(pid=44587, ip=172.30.49.190)[0m 2021-05-19 15:47:30,458	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=44589, ip=172.30.49.190)[0m 2021-05-19 15:47:30,454	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=44590, ip=172.30.49.190)[0m 2021-05-19 15:47:30,886	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=44587, ip=172.30.49.190)[0m 2021-05-19 15:47:45,827	INFO trainable.py:101 -- Trainable.setup took 15.370 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=44589, ip=172.30.49.190)[0m 2021-05-19 15:47:45,851	INFO trainable.py:101 -- Trainable.setup took 15.397 seconds. If your trainable is 

[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00016 reported episode_reward_max=79.0,episode_reward_min=8.0,episode_reward_mean=21.983425414364643,episode_len_mean=21.983425414364643,episode_media={},episodes_this_iter=181,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.8380613003123464, 'mean_inference_ms': 11.46914958947275, 'mean_action_processing_ms': 0.5164421369535072, 'mean_env_wait_ms': 0.5734599183234741, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 27881.379, 'sample_throughput': 143.465, 'learn_time_ms': 78216.459, 'learn_throughput': 51.14, 'update_time_ms': 165.533},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.002454545454545455, 'total_loss': 76.1794056892395, 'policy_loss': -0.05160876706941053, 'vf_loss': 76.2

[2m[36m(pid=44590, ip=172.30.49.190)[0m 2021-05-19 15:47:46,610	INFO trainable.py:101 -- Trainable.setup took 15.724 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00014 reported episode_reward_max=143.0,episode_reward_min=9.0,episode_reward_mean=22.206703910614525,episode_len_mean=22.206703910614525,episode_media={},episodes_this_iter=179,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.8353868955201016, 'mean_inference_ms': 11.240557688855786, 'mean_action_processing_ms': 0.4690879469850356, 'mean_env_wait_ms': 0.8192364086307798, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 27686.031, 'sample_throughput': 144.477, 'learn_time_ms': 78851.351, 'learn_throughput': 50.728, 'update_time_ms': 139.09},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.0022727272727272726, 'total_loss': 86.1795494556427, 'policy_loss': -0.039461526306695305, 'vf_loss': 

The actor or task with ID ffffffffffffffff7896310dcc02927f4d851ca101000000 cannot be scheduled right now. It requires {CPU_group_0_db9c63726e369ba8ebf20eed9a99f77f: 1.000000}, {CPU_group_db9c63726e369ba8ebf20eed9a99f77f: 1.000000} for placement, but this node only has remaining {0.000000/18.000000 CPU, 87.135904 GiB/87.135904 GiB memory, 37.343959 GiB/37.343959 GiB object_store_memory, 1.000000/1.000000 CPU_group_0_db9c63726e369ba8ebf20eed9a99f77f, 1.000000/1.000000 CPU_group_1_e230dffb3fd10e5648088dd8af6d9169, 1.000000/1.000000 CPU_group_1_db9c63726e369ba8ebf20eed9a99f77f, 1.000000/1.000000 CPU_group_1_19401f96d5f51f491cfdeacd17a53e34, 1.000000/1.000000 CPU_group_1_3385cc0525dfcbbd4059316246676e35, 0.000000/1.000000 CPU_group_0_5763e5534f3359a5da9b9be1b5b983e2, 1.000000/1.000000 node:10.128.0.18, 0.000000/3.000000 CPU_group_3385cc0525dfcbbd4059316246676e35, 1.000000/1.000000 CPU_group_2_3385cc0525dfcbbd4059316246676e35, 1.000000/1.000000 CPU_group_2_5763e5534f3359a5da9b9be1b5b983e2, 1

[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00017 reported episode_reward_max=150.0,episode_reward_min=9.0,episode_reward_mean=45.65,episode_len_mean=45.65,episode_media={},episodes_this_iter=72,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.2177673488872793, 'mean_inference_ms': 15.32141209961086, 'mean_action_processing_ms': 0.5161836121807452, 'mean_env_wait_ms': 0.7822266649474323, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=8000,timers={'sample_time_ms': 36993.802, 'sample_throughput': 108.126, 'learn_time_ms': 69470.783, 'learn_throughput': 57.578, 'update_time_ms': 12.705},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.30000000000000004, 'cur_lr': 0.0025454545454545456, 'total_loss': 285.4837260246277, 'policy_loss': -0.04914408281183569, 'vf_loss': 285.52566337

[2m[36m(pid=46550, ip=172.30.49.190)[0m 2021-05-19 15:50:20,639	INFO trainable.py:101 -- Trainable.setup took 15.087 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00013 reported episode_reward_max=200.0,episode_reward_min=13.0,episode_reward_mean=68.02,episode_len_mean=68.02,episode_media={},episodes_this_iter=45,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.0556124640578621, 'mean_inference_ms': 13.550194890419617, 'mean_action_processing_ms': 0.531017184540437, 'mean_env_wait_ms': 0.9920800080262129, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=12000,timers={'sample_time_ms': 33760.098, 'sample_throughput': 118.483, 'learn_time_ms': 85285.316, 'learn_throughput': 46.901, 'update_time_ms': 59.406},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.45000000000000007, 'cur_lr': 0.002181818181818182, 'total_loss': 425.2617349624634, 'policy_loss': -0.01900720107369125, 'vf_loss': 425.2744646



[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00016 reported episode_reward_max=200.0,episode_reward_min=10.0,episode_reward_mean=71.43,episode_len_mean=71.43,episode_media={},episodes_this_iter=37,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.8738996674257745, 'mean_inference_ms': 14.26839374928858, 'mean_action_processing_ms': 0.5562291147165326, 'mean_env_wait_ms': 0.7470185745003424, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=12000,timers={'sample_time_ms': 38363.524, 'sample_throughput': 104.266, 'learn_time_ms': 64260.31, 'learn_throughput': 62.247, 'update_time_ms': 59.817},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.45000000000000007, 'cur_lr': 0.002454545454545455, 'total_loss': 480.5640296936035, 'policy_loss': -0.01588725796318613, 'vf_loss': 480.57525825

[2m[36m(pid=42101)[0m 2021-05-19 15:51:14,360	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=42155)[0m 2021-05-19 15:51:16,951	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
The actor or task with ID ffffffffffffffff6efb8d5ff6bed703b34696b201000000 cannot be scheduled right now. It requires {CPU_group_0_04424e424ef3127dac20feda22e6c560: 1.000000}, {CPU_group_04424e424ef3127dac20feda22e6c560: 1.000000} for placement, but this node only has remaining {0.000000/18.000000 CPU, 87.135904 GiB/87.135904 GiB memory, 37.343959 GiB/37.343959 GiB object_store_memory, 0.000000/1.000000 CPU_group_0_db9c63726e369ba8ebf20eed9a99f77f, 1.000000/1.000000 CPU_group_1_e230dffb3fd10e5648088dd8af6d9169, 1.000000/1.000000 CPU_group_1_db9c63726e369ba8ebf20eed9a99f77f, 1.000000/1.000000 CPU_group_1_19401f96d5f51f491cf

[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00024 reported episode_reward_max=88.0,episode_reward_min=9.0,episode_reward_mean=22.93103448275862,episode_len_mean=22.93103448275862,episode_media={},episodes_this_iter=174,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.7941089267298268, 'mean_inference_ms': 8.466176791343587, 'mean_action_processing_ms': 0.3267546334223631, 'mean_env_wait_ms': 0.6908704526107212, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 22038.653, 'sample_throughput': 181.499, 'learn_time_ms': 89071.052, 'learn_throughput': 44.908, 'update_time_ms': 6.932},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.003181818181818182, 'total_loss': 111.57347440719604, 'policy_loss': -0.04601599264424294, 'vf_loss': 111.6



[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00023 reported episode_reward_max=200.0,episode_reward_min=9.0,episode_reward_mean=71.38,episode_len_mean=71.38,episode_media={},episodes_this_iter=35,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.2360563928631507, 'mean_inference_ms': 18.805885149999845, 'mean_action_processing_ms': 0.6094279034790532, 'mean_env_wait_ms': 1.127278288602751, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=12000,timers={'sample_time_ms': 45499.183, 'sample_throughput': 87.914, 'learn_time_ms': 60316.586, 'learn_throughput': 66.317, 'update_time_ms': 5.332},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.45000000000000007, 'cur_lr': 0.003090909090909091, 'total_loss': 538.5059070587158, 'policy_loss': -0.024122198665281758, 'vf_loss': 538.524193763



[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00020 reported episode_reward_max=200.0,episode_reward_min=11.0,episode_reward_mean=60.48,episode_len_mean=60.48,episode_media={},episodes_this_iter=40,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.1717889917478794, 'mean_inference_ms': 16.480343771427474, 'mean_action_processing_ms': 0.5196613648401729, 'mean_env_wait_ms': 0.9751698599576767, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=12000,timers={'sample_time_ms': 44812.683, 'sample_throughput': 89.26, 'learn_time_ms': 84499.491, 'learn_throughput': 47.338, 'update_time_ms': 5.12},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.45000000000000007, 'cur_lr': 0.0028181818181818186, 'total_loss': 492.4771318435669, 'policy_loss': -0.023834797670133412, 'vf_loss': 492.49487209

[2m[36m(pid=49724, ip=172.30.49.190)[0m 2021-05-19 15:53:40,530	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=49728, ip=172.30.49.190)[0m 2021-05-19 15:53:40,528	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=49732, ip=172.30.49.190)[0m 2021-05-19 15:53:40,533	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00027 reported episode_reward_max=88.0,episode_reward_min=9.0,episode_reward_mean=22.46067415730337,episode_len_mean=22.46067415730337,episode_media={},episodes_this_iter=178,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.0571738479426447, 'mean_inference_ms': 11.456385940891183, 'mean_action_processing_ms': 0.32481040717575627, 'mean_env_wait_ms': 0.5735205020517585, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 27808.322, 'sample_throughput': 143.842, 'learn_time_ms': 79644.939, 'learn_throughput': 50.223, 'update_time_ms': 10.857},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.003454545454545455, 'total_loss': 68.31681209802628, 'policy_loss': -0.03905672318069264, 'vf_loss': 68.

[2m[36m(pid=49724, ip=172.30.49.190)[0m 2021-05-19 15:53:55,992	INFO trainable.py:101 -- Trainable.setup took 15.465 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=49728, ip=172.30.49.190)[0m 2021-05-19 15:53:56,007	INFO trainable.py:101 -- Trainable.setup took 15.479 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=49732, ip=172.30.49.190)[0m 2021-05-19 15:53:56,029	INFO trainable.py:101 -- Trainable.setup took 15.502 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


<IPython.core.display.HTML object>


[2m[36m(pid=44271)[0m 2021-05-19 15:53:59,166	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00024 reported episode_reward_max=200.0,episode_reward_min=9.0,episode_reward_mean=45.74,episode_len_mean=45.74,episode_media={},episodes_this_iter=73,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.795956258336856, 'mean_inference_ms': 9.260254576711493, 'mean_action_processing_ms': 0.39913369925272646, 'mean_env_wait_ms': 0.5937762466110088, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=8000,timers={'sample_time_ms': 24143.137, 'sample_throughput': 165.679, 'learn_time_ms': 84762.591, 'learn_throughput': 47.191, 'update_time_ms': 64.325},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.30000000000000004, 'cur_lr': 0.003181818181818182, 'total_loss': 267.96294689178467, 'policy_loss': -0.030848020658595487, 'vf_loss': 267.9873762

[2m[36m(pid=44323)[0m 2021-05-19 15:54:01,369	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=44882)[0m 2021-05-19 15:54:11,663	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
The actor or task with ID ffffffffffffffffb9bdc127ee32267d0cf6cbe701000000 cannot be scheduled right now. It requires {CPU_group_db9c63726e369ba8ebf20eed9a99f77f: 1.000000}, {CPU_group_0_db9c63726e369ba8ebf20eed9a99f77f: 1.000000} for placement, but this node only has remaining {0.000000/18.000000 CPU, 87.135904 GiB/87.135904 GiB memory, 37.343959 GiB/37.343959 GiB object_store_memory, 1.000000/1.000000 CPU_group_0_db9c63726e369ba8ebf20eed9a99f77f, 1.000000/1.000000 CPU_group_1_e230dffb3fd10e5648088dd8af6d9169, 1.000000/1.000000 CPU_group_1_db9c63726e369ba8ebf20eed9a99f77f, 1.000000/1.000000 CPU_group_1_19401f96d5f51f491cf

[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00026 reported episode_reward_max=75.0,episode_reward_min=9.0,episode_reward_mean=23.682634730538922,episode_len_mean=23.682634730538922,episode_media={},episodes_this_iter=167,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.9631844285388717, 'mean_inference_ms': 12.222771919749643, 'mean_action_processing_ms': 0.255405375072121, 'mean_env_wait_ms': 0.5674098740457586, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 29154.236, 'sample_throughput': 137.201, 'learn_time_ms': 146327.871, 'learn_throughput': 27.336, 'update_time_ms': 11.536},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.003363636363636364, 'total_loss': 98.81721806526184, 'policy_loss': -0.049235164624406025, 'vf_loss': 9

[2m[36m(pid=50589, ip=172.30.49.190)[0m 2021-05-19 15:55:02,636	INFO trainable.py:101 -- Trainable.setup took 14.240 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00027 reported episode_reward_max=187.0,episode_reward_min=9.0,episode_reward_mean=43.1,episode_len_mean=43.1,episode_media={},episodes_this_iter=78,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.4721618304765354, 'mean_inference_ms': 15.20237193710383, 'mean_action_processing_ms': 0.47480811845146675, 'mean_env_wait_ms': 0.8228191655404109, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=8000,timers={'sample_time_ms': 39994.39, 'sample_throughput': 100.014, 'learn_time_ms': 70517.689, 'learn_throughput': 56.723, 'update_time_ms': 8.867},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.30000000000000004, 'cur_lr': 0.003454545454545455, 'total_loss': 360.2304949760437, 'policy_loss': -0.031912141901557334, 'vf_loss': 360.25573062896

[2m[36m(pid=46945)[0m 2021-05-19 15:56:35,910	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=52183, ip=172.30.49.190)[0m 2021-05-19 15:56:40,269	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=52183, ip=172.30.49.190)[0m 2021-05-19 15:56:54,102	INFO trainable.py:101 -- Trainable.setup took 13.833 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00035 reported episode_reward_max=94.0,episode_reward_min=9.0,episode_reward_mean=23.333333333333332,episode_len_mean=23.333333333333332,episode_media={},episodes_this_iter=171,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.6447819503644646, 'mean_inference_ms': 9.221999926196544, 'mean_action_processing_ms': 0.5028998518261439, 'mean_env_wait_ms': 0.4982796021525895, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 22579.031, 'sample_throughput': 177.156, 'learn_time_ms': 90267.002, 'learn_throughput': 44.313, 'update_time_ms': 11.799},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.004181818181818182, 'total_loss': 100.82530117034912, 'policy_loss': -0.049077152769314125, 'vf_loss': 1

[2m[36m(pid=46945)[0m 2021-05-19 15:57:14,132	INFO trainable.py:101 -- Trainable.setup took 38.222 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00029 reported episode_reward_max=62.0,episode_reward_min=9.0,episode_reward_mean=21.03157894736842,episode_len_mean=21.03157894736842,episode_media={},episodes_this_iter=190,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 1.619285963872151, 'mean_inference_ms': 18.06243403806576, 'mean_action_processing_ms': 0.554287521732346, 'mean_env_wait_ms': 1.0460789449029497, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 44407.538, 'sample_throughput': 90.075, 'learn_time_ms': 122005.047, 'learn_throughput': 32.786, 'update_time_ms': 9.597},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.003636363636363637, 'total_loss': 68.67967522144318, 'policy_loss': -0.05107551920809783, 'vf_loss': 68.72348

[2m[36m(pid=53737, ip=172.30.49.190)[0m 2021-05-19 15:58:25,799	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=53739, ip=172.30.49.190)[0m 2021-05-19 15:58:25,805	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=53737, ip=172.30.49.190)[0m 2021-05-19 15:58:41,733	INFO trainable.py:101 -- Trainable.setup took 15.935 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=53739, ip=172.30.49.190)[0m 2021-05-19 15:58:41,721	INFO trainable.py:101 -- Trainable.setup took 15.916 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


[2m[36m(pid=32576)[0m Trial PPO_CartPole-v0_359a9_00037 reported episode_reward_max=93.0,episode_reward_min=9.0,episode_reward_mean=22.725714285714286,episode_len_mean=22.725714285714286,episode_media={},episodes_this_iter=175,policy_reward_min={},policy_reward_max={},policy_reward_mean={},custom_metrics={},sampler_perf={'mean_raw_obs_processing_ms': 0.748811364897298, 'mean_inference_ms': 10.81839937158474, 'mean_action_processing_ms': 0.3127144585927598, 'mean_env_wait_ms': 0.4644452052342097, 'mean_env_render_ms': 0.0},off_policy_estimator={},num_healthy_workers=2,agent_timesteps_total=4000,timers={'sample_time_ms': 25902.238, 'sample_throughput': 154.427, 'learn_time_ms': 88348.739, 'learn_throughput': 45.275, 'update_time_ms': 7.504},info={'learner': defaultdict(<class 'dict'>, {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.2, 'cur_lr': 0.004363636363636364, 'total_loss': 89.68724203109741, 'policy_loss': -0.04823113637394272, 'vf_loss': 89.72

[2m[36m(pid=49698)[0m 2021-05-19 16:00:01,892	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=55388, ip=172.30.49.190)[0m 2021-05-19 16:00:20,464	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=55395, ip=172.30.49.190)[0m 2021-05-19 16:00:20,462	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=49751)[0m 2021-05-19 16:00:22,574	INFO trainer.py:694 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


KeyboardInterrupt: 

In [5]:
exp.stop(cluster)

16:00:30 crystal SmartSim[3737] INFO Stopping model workers with job name workers-CBHIFCQZY0IL
16:00:30 crystal SmartSim[3737] INFO Stopping model head with job name head-CBHIF67HSKFD
