In [1]:
#!pip3 install ray torch torchvision tabulate tensorboard
#!pip3 install 'ray[rllib]'
#!pip3 install ray

In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

from models import VisualEncoder
from train import *



In [2]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder(features_dim)
        self.encoder.load_state_dict(
            torch.load("/src/Visual Autoencoder weights and models/IGLU_encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [3]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [4]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C17']))
    env = PovOnlyWrapper(env)
    env = IgluActionWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C17 pretrained"
                  }
              }

        },
        loggers=[WandbLogger])#callbacks=[
        #    CustomLoggerCallback(),
        #])



Trial name,status,loc
PPO_my_env_d74d3_00000,PENDING,


2021-09-17 11:31:46,757	INFO wandb.py:170 -- Already logged into W&B.
2021-09-17 11:31:46,811	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[2m[36m(pid=333)[0m 2021-09-17 11:31:51,791	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=333)[0m 2021-09-17 11:31:51,791	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=333)[0m 2021-09-17 11:32:04,937	INFO trainable.py:109 -- Trainable.setup took 16.441 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-09-17_11-37-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8325520820087857
          entropy_coeff: 0.009999999999999998
          kl: 0.022896651729185865
          policy_loss: 0.14715835692154036
          total_loss: 0.1939159910298056
          vf_explained_var: 0.15425756573677063
          vf_loss: 0.06050382106461459
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 10.55.229.87
  num_healthy_wor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,1,327.133,1000,0,0,0,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-09-17_11-37-52
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.5
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 2
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8210402091344198
          entropy_coeff: 0.009999999999999998
          kl: 0.026401506742062757
          policy_loss: -0.010755915939807893
          total_loss: 0.13765250378184848
          vf_explained_var: 0.6818528771400452
          vf_loss: 0.15869836426443523
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 10.55.229.87
  num_healthy_workers: 1
  of

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,2,347.958,2000,-1.5,0,-3,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-09-17_11-38-16
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -3.0
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 3
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.256585693359375
          entropy_coeff: 0.009999999999999998
          kl: 0.020107144171431782
          policy_loss: 0.2360995369652907
          total_loss: 0.33587057946456805
          vf_explained_var: 0.3537977933883667
          vf_loss: 0.11328868969447083
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip: 10.55.229.87
  num_healthy_work

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,3,371.127,3000,-3,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-09-17_11-38-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -2.25
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 4
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.877338202794393
          entropy_coeff: 0.009999999999999998
          kl: 0.007898074699230845
          policy_loss: 0.16588142895036273
          total_loss: 0.18150627945239345
          vf_explained_var: -0.125324085354805
          vf_loss: 0.029067032629003127
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,4,392.144,4000,-2.25,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-09-17_11-38-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.8
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 5
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1206712047259013
          entropy_coeff: 0.009999999999999998
          kl: 0.007705085549517479
          policy_loss: 0.17832471330960592
          total_loss: 0.1922963089413113
          vf_explained_var: 0.2709288001060486
          vf_loss: 0.029977375594899058
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip: 10.55.229.87
  num_healthy_wo

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,5,414.332,5000,-1.8,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-09-17_11-39-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.5
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 6
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5382269514931575
          entropy_coeff: 0.009999999999999998
          kl: 0.00495691013425152
          policy_loss: 0.1046609356171555
          total_loss: 0.08784559451871449
          vf_explained_var: -0.7114735245704651
          vf_loss: 0.005221012464931442
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  node_ip: 10.55.229.87
  num_healthy_wo

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,6,436.121,6000,-1.5,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-09-17_11-39-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.2857142857142858
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 7
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1081003692415026
          entropy_coeff: 0.009999999999999998
          kl: 0.012554594395871593
          policy_loss: 0.15321244191792277
          total_loss: 0.14868131623499922
          vf_explained_var: 0.08455053716897964
          vf_loss: 0.012312703746526191
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,7,457.852,7000,-1.28571,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-09-17_11-40-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.125
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 8
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.654929945203993
          entropy_coeff: 0.009999999999999998
          kl: 0.009870582974131764
          policy_loss: -0.15086126493083107
          total_loss: -0.16899353398217096
          vf_explained_var: -0.3211252987384796
          vf_loss: 0.005085709155213812
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 10.55.229.87
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,8,479.749,8000,-1.125,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-09-17_11-40-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -1.0
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 9
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4034628338283963
          entropy_coeff: 0.009999999999999998
          kl: 0.010819106921657844
          policy_loss: 0.24843704075449044
          total_loss: 0.23871621468828785
          vf_explained_var: 0.05732899159193039
          vf_loss: 0.010662354267616239
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node_ip: 10.55.229.87
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,9,501.728,9000,-1,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-09-17_11-40-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.9
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 10
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.563137952486674
          entropy_coeff: 0.009999999999999998
          kl: 0.008054958621834526
          policy_loss: 0.24835951179265975
          total_loss: 0.2339441145459811
          vf_explained_var: -0.6284920573234558
          vf_loss: 0.008497433804182542
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_restore: 10
  node_ip: 10.55.229.87
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,10,523.847,10000,-0.9,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-09-17_11-41-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.8181818181818182
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 11
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.531078415446811
          entropy_coeff: 0.009999999999999998
          kl: 0.012065643239943425
          policy_loss: 0.15149310115310882
          total_loss: 0.13705964783827465
          vf_explained_var: -0.8338345289230347
          vf_loss: 0.006805175863620307
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11
  node_ip: 10.55.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,11,547.241,11000,-0.818182,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-09-17_11-41-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.75
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 12
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5324205372068618
          entropy_coeff: 0.009999999999999998
          kl: 0.009273919454826427
          policy_loss: 0.15472963381972576
          total_loss: 0.13739754921860164
          vf_explained_var: -0.4567834138870239
          vf_loss: 0.0048621762259345915
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_ip: 10.55.229.87
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,12,572.238,12000,-0.75,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-09-17_11-42-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6923076923076923
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 13
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4626607656478883
          entropy_coeff: 0.009999999999999998
          kl: 0.007345461035274988
          policy_loss: 0.1729673508140776
          total_loss: 0.15344630868898498
          vf_explained_var: -0.36099565029144287
          vf_loss: 0.002626472667583989
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore: 13
  node_ip: 10.55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,13,597.146,13000,-0.692308,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-09-17_11-42-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6428571428571429
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 14
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0582600394884745
          entropy_coeff: 0.009999999999999998
          kl: 0.010520209682918492
          policy_loss: 0.14467662258280647
          total_loss: 0.13108939511908424
          vf_explained_var: -0.006725592073053122
          vf_loss: 0.0034448037447873505
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore: 14
  node_ip: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,14,617.649,14000,-0.642857,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-09-17_11-42-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.6
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 15
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6969514264000787
          entropy_coeff: 0.009999999999999998
          kl: 0.012664542759675889
          policy_loss: -0.15576508504649003
          total_loss: -0.17568847772975763
          vf_explained_var: -0.08843613415956497
          vf_loss: 0.002771834875198288
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_restore: 15
  node_ip: 10.55.229.87
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,15,638.042,15000,-0.6,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-09-17_11-43-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5625
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 16
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6887780666351317
          entropy_coeff: 0.009999999999999998
          kl: 0.01096702016662355
          policy_loss: -0.10591367665264341
          total_loss: -0.1280851234992345
          vf_explained_var: -0.13516156375408173
          vf_loss: 0.001014961622422561
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  node_ip: 10.55.229.87
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,16,656.938,16000,-0.5625,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-09-17_11-43-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5294117647058824
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 17
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6258899688720705
          entropy_coeff: 0.009999999999999998
          kl: 0.014774509683753273
          policy_loss: -0.13556641543077097
          total_loss: -0.15573381218645307
          vf_explained_var: -0.100551538169384
          vf_loss: 0.0011051046196371317
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_restore: 17
  node_ip: 10.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,17,676.852,17000,-0.529412,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-09-17_11-43-40
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.5
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 18
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.660278063350254
          entropy_coeff: 0.009999999999999998
          kl: 0.010121407704418179
          policy_loss: -0.0747269931766722
          total_loss: -0.0967298680709468
          vf_explained_var: -0.2099701166152954
          vf_loss: 0.0011839313296756397
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
  node_ip: 10.55.229.87
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,18,695.036,18000,-0.5,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-09-17_11-44-01
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.47368421052631576
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 19
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6568399164411756
          entropy_coeff: 0.009999999999999998
          kl: 0.010562322556422598
          policy_loss: -0.12038397652407487
          total_loss: -0.14271839052024815
          vf_explained_var: -0.5177922248840332
          vf_loss: 0.0006691975046932284
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_restore: 19
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,19,715.457,19000,-0.473684,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-09-17_11-44-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.45
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 20
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.623726153373718
          entropy_coeff: 0.009999999999999998
          kl: 0.011514101517073409
          policy_loss: -0.08947496025098695
          total_loss: -0.11106823401318656
          vf_explained_var: -0.48777228593826294
          vf_loss: 0.0007579742650300937
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 20
  node_ip: 10.55.229.87
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,20,736.919,20000,-0.45,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-09-17_11-44-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.42857142857142855
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 21
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6582315233018665
          entropy_coeff: 0.009999999999999998
          kl: 0.01196047772176029
          policy_loss: -0.041588601387209365
          total_loss: -0.0637170770102077
          vf_explained_var: 0.10834157466888428
          vf_loss: 0.00041717866996704184
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 21
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,21,760.26,21000,-0.428571,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-09-17_11-45-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.4090909090909091
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 22
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.464519818623861
          entropy_coeff: 0.009999999999999998
          kl: 0.007032059225960867
          policy_loss: -0.033094367322822414
          total_loss: -0.05438164948589272
          vf_explained_var: -0.9026865363121033
          vf_loss: 0.000984595540366071
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
  node_ip: 10.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,22,781.032,22000,-0.409091,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-09-17_11-45-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.391304347826087
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 23
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9812383386823866
          entropy_coeff: 0.009999999999999998
          kl: 0.013824228208887372
          policy_loss: 0.04696925183137258
          total_loss: 0.038301903340551585
          vf_explained_var: 0.1569199413061142
          vf_loss: 0.006479355992956294
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  node_ip: 10.55.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,23,802.578,23000,-0.391304,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-09-17_11-45-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.375
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 24
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2728791819678413
          entropy_coeff: 0.009999999999999998
          kl: 0.010273950128591084
          policy_loss: -0.11382531647880872
          total_loss: -0.1299478679895401
          vf_explained_var: 0.09640176594257355
          vf_loss: 0.003138778503570292
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24
  node_ip: 10.55.229.87
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,24,824.098,24000,-0.375,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-09-17_11-46-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.36
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 25
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6929480618900723
          entropy_coeff: 0.009999999999999998
          kl: 0.00955669336418021
          policy_loss: 0.005242812323073546
          total_loss: -0.0036508367707331975
          vf_explained_var: -0.5446525812149048
          vf_loss: 0.0048104486896287805
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,25,843.681,25000,-0.36,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-09-17_11-46-29
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.34615384615384615
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 26
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.356685909960005
          entropy_coeff: 0.009999999999999998
          kl: 0.011747713391941871
          policy_loss: 0.011826485395431519
          total_loss: 0.020307090547349717
          vf_explained_var: 0.14689010381698608
          vf_loss: 0.028082609513593423
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_restore: 26
  node_ip: 10.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,26,863.012,26000,-0.346154,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-09-17_11-46-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3333333333333333
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 27
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.67421985467275
          entropy_coeff: 0.009999999999999998
          kl: 0.007387687331710553
          policy_loss: -0.14290436191691291
          total_loss: -0.16642245882087284
          vf_explained_var: -0.009457924403250217
          vf_loss: 0.0007307549356482923
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_restore: 27
  node_ip: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,27,881.163,27000,-0.333333,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-09-17_11-47-04
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.32142857142857145
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 28
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.673496627807617
          entropy_coeff: 0.009999999999999998
          kl: 0.010316577879610338
          policy_loss: -0.17366463707553015
          total_loss: -0.19620292484760285
          vf_explained_var: 0.18298812210559845
          vf_loss: 0.0007148317679012608
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 28
  node_ip: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,28,897.962,28000,-0.321429,0,-6,1000


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-09-17_11-47-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3103448275862069
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 29
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.585330859820048
          entropy_coeff: 0.009999999999999998
          kl: 0.009386654423589925
          policy_loss: -0.15296314118636978
          total_loss: -0.17416918488840263
          vf_explained_var: 0.24210402369499207
          vf_loss: 0.0014792679193002792
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_restore: 29
  node_ip: 10.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,29,916.224,29000,-0.310345,0,-6,1000




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-09-17_11-48-03
  done: false
  episode_len_mean: 996.0333333333333
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.3
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 30
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5916891786787244
          entropy_coeff: 0.009999999999999998
          kl: 0.007493594396164803
          policy_loss: -0.02625422610176934
          total_loss: -0.04901077699744039
          vf_explained_var: -1.0
          vf_loss: 0.0006312527252399984
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
  node_ip: 10.55.229.87
  num_he

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,30,957.244,30000,-0.3,0,-6,996.033


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-09-17_11-48-24
  done: false
  episode_len_mean: 996.1612903225806
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2903225806451613
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 31
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.7046454906463624
          entropy_coeff: 0.009999999999999998
          kl: 0.007553308414218199
          policy_loss: 0.020122432543171778
          total_loss: -0.004054422304034233
          vf_explained_var: -1.0
          vf_loss: 0.0003203579966793768
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since_restore: 31
  node_ip: 10.55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,31,978.354,31000,-0.290323,0,-6,996.161


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-09-17_11-48-44
  done: false
  episode_len_mean: 996.28125
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.28125
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 32
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5962152216169567
          entropy_coeff: 0.009999999999999998
          kl: 0.008306270521363797
          policy_loss: -0.03481988054182794
          total_loss: -0.0574555197937621
          vf_explained_var: -0.8783384561538696
          vf_loss: 0.000523146388554273
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 32
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,32,998.424,32000,-0.28125,0,-6,996.281


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-09-17_11-49-04
  done: false
  episode_len_mean: 996.3939393939394
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2727272727272727
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 33
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6951094309488934
          entropy_coeff: 0.009999999999999998
          kl: 0.005184844854526559
          policy_loss: -0.06732877666751544
          total_loss: -0.09233222810758485
          vf_explained_var: -1.0
          vf_loss: 0.00019775936945936538
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_restore: 33
  node_ip: 10.55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,33,1017.89,33000,-0.272727,0,-6,996.394


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-09-17_11-49-24
  done: false
  episode_len_mean: 996.5
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2647058823529412
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 34
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6443238364325627
          entropy_coeff: 0.009999999999999998
          kl: 0.006959032444651589
          policy_loss: -0.054323647357523444
          total_loss: -0.07749979491862986
          vf_explained_var: -0.9429082870483398
          vf_loss: 0.0009184180187326597
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore: 34
  node_ip: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,34,1038.47,34000,-0.264706,0,-6,996.5


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-09-17_11-49-47
  done: false
  episode_len_mean: 996.6
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2571428571428571
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 35
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.69091395272149
          entropy_coeff: 0.009999999999999998
          kl: 0.005998443806821503
          policy_loss: 0.06256420800669325
          total_loss: 0.0377992092528277
          vf_explained_var: -1.0
          vf_loss: 0.00011966413213586849
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_restore: 35
  node_ip: 10.55.229.87
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,35,1060.7,35000,-0.257143,0,-6,996.6


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-09-17_11-50-07
  done: false
  episode_len_mean: 996.6944444444445
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.25
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 36
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.639289869202508
          entropy_coeff: 0.009999999999999998
          kl: 0.008877543813081238
          policy_loss: -0.029852394552694426
          total_loss: -0.0523911381761233
          vf_explained_var: -0.7377275824546814
          vf_loss: 0.0008579833882524529
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_restore: 36
  node_ip: 10.55.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,36,1081.31,36000,-0.25,0,-6,996.694


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-09-17_11-50-28
  done: false
  episode_len_mean: 996.7837837837837
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.24324324324324326
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 37
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6939575725131566
          entropy_coeff: 0.009999999999999998
          kl: 0.00973015713329681
          policy_loss: 0.02485754932794306
          total_loss: 0.0014339764912923178
          vf_explained_var: -1.0
          vf_loss: 0.00023207344428455043
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restore: 37
  node_ip: 10.55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,37,1102.18,37000,-0.243243,0,-6,996.784


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-09-17_11-50-47
  done: false
  episode_len_mean: 996.8684210526316
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.23684210526315788
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 38
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.596448032061259
          entropy_coeff: 0.009999999999999998
          kl: 0.007038465043673724
          policy_loss: -0.024551905939976373
          total_loss: -0.04758514016866684
          vf_explained_var: -0.8122448325157166
          vf_loss: 0.0005557626506844018
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_restore: 38
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,38,1121.21,38000,-0.236842,0,-6,996.868


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-09-17_11-51-08
  done: false
  episode_len_mean: 996.9487179487179
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.23076923076923078
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 39
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4770953747961255
          entropy_coeff: 0.009999999999999998
          kl: 0.007799864140982196
          policy_loss: -0.055536273452970714
          total_loss: -0.07700087510877185
          vf_explained_var: -1.0
          vf_loss: 0.000673895875984777
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since_restore: 39
  node_ip: 10.55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,39,1141.89,39000,-0.230769,0,-6,996.949


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-09-17_11-51-29
  done: false
  episode_len_mean: 997.025
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.225
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 40
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.374901098675198
          entropy_coeff: 0.009999999999999998
          kl: 0.007024680568122735
          policy_loss: 0.010855482601457172
          total_loss: -0.009813109557661745
          vf_explained_var: -0.8223778605461121
          vf_loss: 0.0007095911792324235
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_restore: 40
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,40,1162.79,40000,-0.225,0,-6,997.025


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-09-17_11-51-49
  done: false
  episode_len_mean: 997.0975609756098
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21951219512195122
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 41
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.593468689918518
          entropy_coeff: 0.009999999999999998
          kl: 0.007608411689799333
          policy_loss: 0.063156129916509
          total_loss: 0.040278458346923195
          vf_explained_var: -0.5153459906578064
          vf_loss: 0.0004891766149537741
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_restore: 41
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,41,1183.16,41000,-0.219512,0,-6,997.098


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-09-17_11-52-11
  done: false
  episode_len_mean: 997.1666666666666
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.21428571428571427
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 42
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6145555867089167
          entropy_coeff: 0.009999999999999998
          kl: 0.007085749145338847
          policy_loss: 0.04156573303043842
          total_loss: 0.01811413230995337
          vf_explained_var: -0.6379765272140503
          vf_loss: 0.0003025149072022436
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_since_restore: 42
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,42,1204.71,42000,-0.214286,0,-6,997.167


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-09-17_11-52-33
  done: false
  episode_len_mean: 997.2325581395348
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.20930232558139536
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 43
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5087023946974014
          entropy_coeff: 0.009999999999999998
          kl: 0.01007998562820812
          policy_loss: 0.12965340125891897
          total_loss: 0.10854187524980968
          vf_explained_var: -1.0
          vf_loss: 0.0005735010182737218
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since_restore: 43
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,43,1226.8,43000,-0.209302,0,-6,997.233


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-09-17_11-52-55
  done: false
  episode_len_mean: 997.2954545454545
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.20454545454545456
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 44
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4750684976577757
          entropy_coeff: 0.009999999999999998
          kl: 0.009351251966668498
          policy_loss: -0.030394253320991993
          total_loss: -0.05058620549324486
          vf_explained_var: -0.025417564436793327
          vf_loss: 0.0014026846728585143
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since_restore: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,44,1248.76,44000,-0.204545,0,-6,997.295


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-09-17_11-53-16
  done: false
  episode_len_mean: 997.3555555555556
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.2
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 45
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5126723262998794
          entropy_coeff: 0.009999999999999998
          kl: 0.011792254431345947
          policy_loss: 0.0059526258872614965
          total_loss: -0.0139061129755444
          vf_explained_var: -0.57793128490448
          vf_loss: 0.0012881045242668027
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_restore: 45
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,45,1269.27,45000,-0.2,0,-6,997.356


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-09-17_11-53-37
  done: false
  episode_len_mean: 997.4130434782609
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1956521739130435
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 46
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5665490706761678
          entropy_coeff: 0.009999999999999998
          kl: 0.005192610523435764
          policy_loss: 0.05657088855902354
          total_loss: 0.033021997743182714
          vf_explained_var: -0.7909436821937561
          vf_loss: 0.00036409188873626083
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_since_restore: 46
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,46,1290.78,46000,-0.195652,0,-6,997.413


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-09-17_11-53-57
  done: false
  episode_len_mean: 997.468085106383
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.19148936170212766
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 47
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5608829471800063
          entropy_coeff: 0.009999999999999998
          kl: 0.008055430893460367
          policy_loss: 0.048629298971758946
          total_loss: 0.02673081010580063
          vf_explained_var: -0.499594509601593
          vf_loss: 0.0009916389102323188
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since_restore: 47
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,47,1310.15,47000,-0.191489,0,-6,997.468


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-09-17_11-54-16
  done: false
  episode_len_mean: 997.5208333333334
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1875
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 48
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.572519135475159
          entropy_coeff: 0.009999999999999998
          kl: 0.007534422646527015
          policy_loss: 0.03229837454338041
          total_loss: 0.009447801175216835
          vf_explained_var: -1.0
          vf_loss: 0.0003317488488391973
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_since_restore: 48
  node_ip: 10.55.229.87
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,48,1330.01,48000,-0.1875,0,-6,997.521


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-09-17_11-54-37
  done: false
  episode_len_mean: 997.5714285714286
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1836734693877551
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 49
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6118822866015963
          entropy_coeff: 0.009999999999999998
          kl: 0.006308453570991862
          policy_loss: -0.05633983065684636
          total_loss: -0.08004176177912288
          vf_explained_var: -0.9846917986869812
          vf_loss: 0.0002877886888098955
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_since_restore: 49
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,49,1350.41,49000,-0.183673,0,-6,997.571


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-09-17_11-54-58
  done: false
  episode_len_mean: 997.62
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 50
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.572450844446818
          entropy_coeff: 0.009999999999999998
          kl: 0.0063754649114050035
          policy_loss: -0.010278782414065466
          total_loss: -0.033548574563529755
          vf_explained_var: -0.4415629804134369
          vf_loss: 0.0003029969874963475
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_restore: 50
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,50,1371.49,50000,-0.18,0,-6,997.62


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-09-17_11-55-18
  done: false
  episode_len_mean: 997.6666666666666
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.17647058823529413
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 51
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5206660985946656
          entropy_coeff: 0.009999999999999998
          kl: 0.007083583153023213
          policy_loss: 0.1372103782163726
          total_loss: 0.1150543651647038
          vf_explained_var: -0.550018846988678
          vf_loss: 0.0006599369715938034
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_since_restore: 51
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,51,1391.15,51000,-0.176471,0,-6,997.667


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-09-17_11-55-38
  done: false
  episode_len_mean: 997.7115384615385
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.17307692307692307
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 52
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5833336538738676
          entropy_coeff: 0.009999999999999998
          kl: 0.005312949977490907
          policy_loss: -0.013673121399349637
          total_loss: -0.03744457515163554
          vf_explained_var: -0.7950118780136108
          vf_loss: 0.0002687614823419911
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,52,1411.57,52000,-0.173077,0,-6,997.712


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-09-17_11-55-58
  done: false
  episode_len_mean: 997.7547169811321
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16981132075471697
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 53
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.474974354108175
          entropy_coeff: 0.009999999999999998
          kl: 0.005471036304996524
          policy_loss: 0.11734735932615069
          total_loss: 0.09499752206934822
          vf_explained_var: -0.6537927985191345
          vf_loss: 0.0005534299874852877
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since_restore: 53
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,53,1431.48,53000,-0.169811,0,-6,997.755


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-09-17_11-56-19
  done: false
  episode_len_mean: 997.7962962962963
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16666666666666666
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 54
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4665581782658896
          entropy_coeff: 0.009999999999999998
          kl: 0.00911468132220178
          policy_loss: 0.017988216131925583
          total_loss: -0.0031912511938975915
          vf_explained_var: -0.49896901845932007
          vf_loss: 0.00040990768082135924
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_since_restore: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,54,1452.5,54000,-0.166667,0,-6,997.796


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-09-17_11-56-41
  done: false
  episode_len_mean: 997.8363636363637
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16363636363636364
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 55
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.522688145107693
          entropy_coeff: 0.009999999999999998
          kl: 0.007964353315357482
          policy_loss: 0.13893378617035018
          total_loss: 0.11715057508813011
          vf_explained_var: -0.2359781265258789
          vf_loss: 0.0007557010152797577
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_restore: 55
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,55,1473.85,55000,-0.163636,0,-6,997.836


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-09-17_11-57-01
  done: false
  episode_len_mean: 997.875
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16071428571428573
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 56
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.54894253677792
          entropy_coeff: 0.009999999999999998
          kl: 0.005664155145714442
          policy_loss: 0.20042095051871406
          total_loss: 0.1773409925401211
          vf_explained_var: -0.9173348546028137
          vf_loss: 0.0004978131345220997
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since_restore: 56
  node_ip: 10.55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,56,1494.28,56000,-0.160714,0,-6,997.875


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-09-17_11-57-22
  done: false
  episode_len_mean: 997.9122807017544
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15789473684210525
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 57
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.362864046626621
          entropy_coeff: 0.009999999999999998
          kl: 0.015214361851835603
          policy_loss: 0.08532434710197978
          total_loss: 0.0683188319620159
          vf_explained_var: 0.13178850710391998
          vf_loss: 0.0014882752649201495
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_restore: 57
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,57,1515.13,57000,-0.157895,0,-6,997.912


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-09-17_11-57-42
  done: false
  episode_len_mean: 997.948275862069
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15517241379310345
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 58
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5994016408920286
          entropy_coeff: 0.009999999999999998
          kl: 0.003919696678871117
          policy_loss: 0.01693007235104839
          total_loss: -0.007387547567486763
          vf_explained_var: -0.9963218569755554
          vf_loss: 0.00035349804974329243
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since_restore: 58
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,58,1535.11,58000,-0.155172,0,-6,997.948


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-09-17_11-58-02
  done: false
  episode_len_mean: 997.9830508474577
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15254237288135594
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 59
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.580409876505534
          entropy_coeff: 0.009999999999999998
          kl: 0.011202735749780186
          policy_loss: 0.028530783578753473
          total_loss: 0.0048245202543007
          vf_explained_var: -0.3805197477340698
          vf_loss: 0.00020737118377130376
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_since_restore: 59
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,59,1554.63,59000,-0.152542,0,-6,997.983




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-09-17_11-58-40
  done: false
  episode_len_mean: 996.05
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 60
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.605114483833313
          entropy_coeff: 0.009999999999999998
          kl: 0.011749671108785763
          policy_loss: 0.012571687748034795
          total_loss: -0.01089285264412562
          vf_explained_var: -0.750762939453125
          vf_loss: 0.0006038495682231668
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
  node_ip: 10.55.229.87
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,60,1592.84,60000,-0.15,0,-6,996.05


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-09-17_11-59-01
  done: false
  episode_len_mean: 996.1147540983607
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14754098360655737
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 61
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.662177054087321
          entropy_coeff: 0.009999999999999998
          kl: 0.009611889711393356
          policy_loss: 0.06219906972514259
          total_loss: 0.03753438095251719
          vf_explained_var: -0.9571781754493713
          vf_loss: 0.00033507461032261036
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,61,1613.65,61000,-0.147541,0,-6,996.115


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-09-17_11-59-23
  done: false
  episode_len_mean: 996.1774193548387
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14516129032258066
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 62
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.622997925016615
          entropy_coeff: 0.009999999999999998
          kl: 0.0077461316806264335
          policy_loss: -0.08818741585645411
          total_loss: -0.11287050868074099
          vf_explained_var: -0.6271733641624451
          vf_loss: 0.00023972698383861117
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_since_restore: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,62,1635.57,62000,-0.145161,0,-6,996.177


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-09-17_11-59-43
  done: false
  episode_len_mean: 996.2380952380952
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14285714285714285
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 63
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6194220675362483
          entropy_coeff: 0.009999999999999998
          kl: 0.007692296037220419
          policy_loss: -0.060676634084019396
          total_loss: -0.08526498385601573
          vf_explained_var: -0.6201708316802979
          vf_loss: 0.00030779422862300027
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_restore: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,63,1655.88,63000,-0.142857,0,-6,996.238


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-09-17_12-00-03
  done: false
  episode_len_mean: 996.296875
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.140625
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 64
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6289323621326024
          entropy_coeff: 0.009999999999999998
          kl: 0.007954230135985728
          policy_loss: -0.04383460050448775
          total_loss: -0.06839010098742114
          vf_explained_var: -0.7870053648948669
          vf_loss: 0.0003915450551883421
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_since_restore: 64
  node_ip: 10.55.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,64,1675.58,64000,-0.140625,0,-6,996.297


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-09-17_12-00-23
  done: false
  episode_len_mean: 996.3538461538461
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.13846153846153847
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 65
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16874999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4319579389360215
          entropy_coeff: 0.009999999999999998
          kl: 0.0285542163118468
          policy_loss: 0.006777266330189175
          total_loss: -0.011171819104088678
          vf_explained_var: -0.001862167613580823
          vf_loss: 0.0015519743492606924
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_restore: 65

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,65,1696.28,65000,-0.138462,0,-6,996.354


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-09-17_12-00-47
  done: false
  episode_len_mean: 996.4090909090909
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.13636363636363635
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 66
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3480296048853133
          entropy_coeff: 0.009999999999999998
          kl: 0.02487002721985557
          policy_loss: -0.2271477121445868
          total_loss: -0.16994733545515273
          vf_explained_var: 0.12110377103090286
          vf_loss: 0.0643854452452312
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_restore: 66
  node_ip: 10.55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,66,1719.61,66000,-0.136364,0,-6,996.409


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-09-17_12-01-08
  done: false
  episode_len_mean: 996.4626865671642
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.13432835820895522
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 67
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.6835751480526395
          entropy_coeff: 0.009999999999999998
          kl: 0.011697778905650264
          policy_loss: -0.20479283597734238
          total_loss: -0.2087068905433019
          vf_explained_var: 0.4075738191604614
          vf_loss: 0.008480196472050415
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_since_restore: 67
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,67,1740.45,67000,-0.134328,0,-6,996.463


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-09-17_12-01-30
  done: false
  episode_len_mean: 996.5147058823529
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1323529411764706
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 68
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.681647147072686
          entropy_coeff: 0.009999999999999998
          kl: 0.005362766349328258
          policy_loss: -0.146048682079547
          total_loss: -0.17030539868606462
          vf_explained_var: -0.7936663031578064
          vf_loss: 0.0005235768856234952
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_since_restore: 68
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,68,1762.43,68000,-0.132353,0,-6,996.515


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-09-17_12-01-51
  done: false
  episode_len_mean: 996.5652173913044
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.13043478260869565
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 69
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.682385766506195
          entropy_coeff: 0.009999999999999998
          kl: 0.014775814188122086
          policy_loss: -0.1807917046878073
          total_loss: -0.18370871825350654
          vf_explained_var: 0.3091524839401245
          vf_loss: 0.008296653253233267
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_since_restore: 69
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,69,1783.49,69000,-0.130435,0,-6,996.565


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-09-17_12-02-15
  done: false
  episode_len_mean: 996.6142857142858
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.17142857142857143
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 70
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7593254407246908
          entropy_coeff: 0.009999999999999998
          kl: 0.013135910085929802
          policy_loss: -0.08781980209880405
          total_loss: -0.06990940239694383
          vf_explained_var: 0.3782915472984314
          vf_loss: 0.030516113423638875
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since_restore: 70
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,70,1807.4,70000,-0.171429,0,-6,996.614


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-09-17_12-02-36
  done: false
  episode_len_mean: 996.6619718309859
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16901408450704225
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 71
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3313600487179227
          entropy_coeff: 0.009999999999999998
          kl: 0.009249212627710375
          policy_loss: -0.06587968009213606
          total_loss: -0.08124723757306734
          vf_explained_var: 0.04926378279924393
          vf_loss: 0.004434232918235163
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_restore: 71
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,71,1828.47,71000,-0.169014,0,-6,996.662


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-09-17_12-02-57
  done: false
  episode_len_mean: 996.7083333333334
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.18055555555555555
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 72
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.4942849331431918
          entropy_coeff: 0.009999999999999998
          kl: 0.009845601098148787
          policy_loss: -0.040692878928449416
          total_loss: -0.0405314928955502
          vf_explained_var: 0.47759881615638733
          vf_loss: 0.011365978882855012
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_since_restore: 72
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,72,1849.73,72000,-0.180556,0,-6,996.708


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-09-17_12-03-18
  done: false
  episode_len_mean: 996.7534246575342
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1780821917808219
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 73
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2092454532782237
          entropy_coeff: 0.009999999999999998
          kl: 0.008057922017772117
          policy_loss: -0.07164878149827321
          total_loss: -0.07847950243287616
          vf_explained_var: -0.22768601775169373
          vf_loss: 0.002202245497594251
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since_restore: 73
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,73,1870.07,73000,-0.178082,0,-6,996.753


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-09-17_12-03-38
  done: false
  episode_len_mean: 996.7972972972973
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.17567567567567569
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 74
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.78732021384769
          entropy_coeff: 0.009999999999999998
          kl: 0.004847924844793599
          policy_loss: 0.15035739069183668
          total_loss: 0.12447390933003691
          vf_explained_var: -0.2961638867855072
          vf_loss: 0.00014902631850822622
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_restore: 74
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,74,1890.32,74000,-0.175676,0,-6,996.797


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-09-17_12-04-00
  done: false
  episode_len_mean: 996.84
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.17333333333333334
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 75
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.747909437285529
          entropy_coeff: 0.009999999999999998
          kl: 0.008864443687889903
          policy_loss: 0.22119099961386787
          total_loss: 0.19561366786559423
          vf_explained_var: -0.4017302989959717
          vf_loss: 0.00021890560714786666
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_restore: 75
  node_ip: 10.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,75,1912.27,75000,-0.173333,0,-6,996.84


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-09-17_12-04-23
  done: false
  episode_len_mean: 996.8815789473684
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.17105263157894737
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 76
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.651280747519599
          entropy_coeff: 0.009999999999999998
          kl: 0.010167692320472968
          policy_loss: 0.041649584223826724
          total_loss: 0.018801196581787534
          vf_explained_var: -0.29168176651000977
          vf_loss: 0.0017341463677843826
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_restore: 76
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,76,1935.03,76000,-0.171053,0,-6,996.882


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-09-17_12-04-50
  done: false
  episode_len_mean: 996.922077922078
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16883116883116883
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 77
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.6505048433939615
          entropy_coeff: 0.009999999999999998
          kl: 0.01739890654130302
          policy_loss: 0.04425622605615192
          total_loss: 0.02276866568459405
          vf_explained_var: -0.6006844639778137
          vf_loss: 0.0017144130583296323
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_restore: 77
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,77,1961.94,77000,-0.168831,0,-6,996.922


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-09-17_12-05-12
  done: false
  episode_len_mean: 996.9615384615385
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16666666666666666
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 78
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.7996091736687554
          entropy_coeff: 0.009999999999999998
          kl: 0.010110235263627487
          policy_loss: 0.005160707400904761
          total_loss: -0.007823523961835437
          vf_explained_var: 0.022857094183564186
          vf_loss: 0.0030924935611741
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_since_restore: 78
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,78,1984.14,78000,-0.166667,0,-6,996.962


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-09-17_12-05-32
  done: false
  episode_len_mean: 997.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16455696202531644
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 79
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.5787433650758533
          entropy_coeff: 0.009999999999999998
          kl: 0.011448549777362753
          policy_loss: 0.04804096122582754
          total_loss: 0.025097030649582543
          vf_explained_var: -0.10338200628757477
          vf_loss: 0.0006700698677579769
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_restore: 79
  node_ip: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,79,2003.95,79000,-0.164557,0,-6,997


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-09-17_12-05-53
  done: false
  episode_len_mean: 997.0375
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1625
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 80
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.7261841694513955
          entropy_coeff: 0.009999999999999998
          kl: 0.007416652737251105
          policy_loss: 0.13044191292590565
          total_loss: 0.10491255840493573
          vf_explained_var: -0.4378131031990051
          vf_loss: 0.00032448224834903764
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore: 80
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,80,2025.79,80000,-0.1625,0,-6,997.038


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-09-17_12-06-14
  done: false
  episode_len_mean: 997.074074074074
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16049382716049382
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 81
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.4189632336298623
          entropy_coeff: 0.009999999999999998
          kl: 0.01113431852729434
          policy_loss: 0.07782759360141224
          total_loss: 0.0564344491602646
          vf_explained_var: -0.07267534732818604
          vf_loss: 0.0006827062788134855
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_since_restore: 81
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,81,2046.75,81000,-0.160494,0,-6,997.074


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-09-17_12-06-37
  done: false
  episode_len_mean: 997.109756097561
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15853658536585366
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 82
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.599143491850959
          entropy_coeff: 0.009999999999999998
          kl: 0.010127068659953355
          policy_loss: -0.010222329820195834
          total_loss: -0.032569019724097516
          vf_explained_var: -0.4360887408256531
          vf_loss: 0.0017221841505185391
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_restore: 82
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,82,2069.36,82000,-0.158537,0,-6,997.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-09-17_12-06-58
  done: false
  episode_len_mean: 997.144578313253
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1566265060240964
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 83
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.5844804207483927
          entropy_coeff: 0.009999999999999998
          kl: 0.007499080314881487
          policy_loss: 0.033319500150779884
          total_loss: 0.0093902756460011
          vf_explained_var: -0.8462010025978088
          vf_loss: 0.0004919250564449208
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_restore: 83
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,83,2090.61,83000,-0.156627,0,-6,997.145


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-09-17_12-07-20
  done: false
  episode_len_mean: 997.1785714285714
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15476190476190477
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 84
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.5901665528615316
          entropy_coeff: 0.009999999999999998
          kl: 0.011607688712689829
          policy_loss: -0.062297636167042786
          total_loss: -0.08402547396512496
          vf_explained_var: -0.6153408288955688
          vf_loss: 0.0019701822732006097
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_restore: 84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,84,2111.94,84000,-0.154762,0,-6,997.179


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-09-17_12-07-43
  done: false
  episode_len_mean: 997.2117647058824
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15294117647058825
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 85
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.5535075558556453
          entropy_coeff: 0.009999999999999998
          kl: 0.007302621317655028
          policy_loss: -0.05762269020908409
          total_loss: -0.08020128450459904
          vf_explained_var: -0.6017072796821594
          vf_loss: 0.0015701238638333355
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_restore: 85
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,85,2135.17,85000,-0.152941,0,-6,997.212


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-09-17_12-08-04
  done: false
  episode_len_mean: 997.2441860465116
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16279069767441862
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 86
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5879243930180869
          entropy_coeff: 0.009999999999999998
          kl: 0.014863628071148858
          policy_loss: 0.004015587601396773
          total_loss: 0.02721075142423312
          vf_explained_var: 0.5836988091468811
          vf_loss: 0.03625263834579123
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_restore: 86
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,86,2155.86,86000,-0.162791,0,-6,997.244


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-09-17_12-08-26
  done: false
  episode_len_mean: 997.2758620689655
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.16091954022988506
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 87
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.4648949517144096
          entropy_coeff: 0.009999999999999998
          kl: 0.013747318157107951
          policy_loss: 0.036554096080362794
          total_loss: 0.016068021787537468
          vf_explained_var: -0.20082828402519226
          vf_loss: 0.0015530292733779384
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_restore: 87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,87,2178.26,87000,-0.16092,0,-6,997.276


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-09-17_12-08-49
  done: false
  episode_len_mean: 997.3068181818181
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1590909090909091
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 88
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1898437500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.551800553003947
          entropy_coeff: 0.009999999999999998
          kl: 0.004263292353270545
          policy_loss: 0.0022775776477323637
          total_loss: -0.020095433036072387
          vf_explained_var: -0.5439303517341614
          vf_loss: 0.002335634621946762
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,88,2201.31,88000,-0.159091,0,-6,997.307


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-09-17_12-09-11
  done: false
  episode_len_mean: 997.3370786516854
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15730337078651685
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 89
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09492187500000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.862240109178755
          entropy_coeff: 0.009999999999999998
          kl: 0.02807579839754687
          policy_loss: -0.1246907057861487
          total_loss: -0.126204647620519
          vf_explained_var: 0.2639685571193695
          vf_loss: 0.014443451140282882
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_since_restore: 89
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,89,2222.71,89000,-0.157303,0,-6,997.337




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-09-17_12-09-59
  done: false
  episode_len_mean: 996.0333333333333
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15555555555555556
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 90
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.512342291408115
          entropy_coeff: 0.009999999999999998
          kl: 0.01054850365601805
          policy_loss: -0.044465203852289255
          total_loss: -0.06561124492436647
          vf_explained_var: -0.3280496299266815
          vf_loss: 0.0024754543284264702
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore: 90
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,90,2270.61,90000,-0.155556,0,-6,996.033


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-09-17_12-10-22
  done: false
  episode_len_mean: 996.0769230769231
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15384615384615385
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 91
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3378765079710218
          entropy_coeff: 0.009999999999999998
          kl: 0.013296604901088829
          policy_loss: -0.03418432409978575
          total_loss: -0.05355495251715183
          vf_explained_var: -0.5038247108459473
          vf_loss: 0.0021149283675994312
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since_restore: 91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,91,2293.42,91000,-0.153846,0,-6,996.077


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-09-17_12-10-41
  done: false
  episode_len_mean: 996.1195652173913
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15217391304347827
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 92
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6115903509987723
          entropy_coeff: 0.009999999999999998
          kl: 0.012788253404188212
          policy_loss: -0.016317624412477017
          total_loss: -0.03992792123721706
          vf_explained_var: -1.0
          vf_loss: 0.000684778971481137
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_since_restore: 92
  node_ip: 10.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,92,2312.98,92000,-0.152174,0,-6,996.12


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-09-17_12-11-04
  done: false
  episode_len_mean: 996.1612903225806
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.15053763440860216
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 93
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6231721772087946
          entropy_coeff: 0.009999999999999998
          kl: 0.009976965842619891
          policy_loss: -0.01748005830579334
          total_loss: -0.04120165660149521
          vf_explained_var: -0.5270453691482544
          vf_loss: 0.001089575985517715
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since_restore: 93
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,93,2335.66,93000,-0.150538,0,-6,996.161


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-09-17_12-11-25
  done: false
  episode_len_mean: 996.2021276595744
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14893617021276595
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 94
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.529301243358188
          entropy_coeff: 0.009999999999999998
          kl: 0.011303815605560569
          policy_loss: -0.0864819327990214
          total_loss: -0.10834221740563711
          vf_explained_var: -0.6209126710891724
          vf_loss: 0.00182325813996916
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_restore: 94
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,94,2357.21,94000,-0.148936,0,-6,996.202


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-09-17_12-11-47
  done: false
  episode_len_mean: 996.2421052631579
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14736842105263157
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 95
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7282590044869317
          entropy_coeff: 0.009999999999999998
          kl: 0.01093716756770162
          policy_loss: -0.08470574178629452
          total_loss: -0.1101939257648256
          vf_explained_var: -0.9687694907188416
          vf_loss: 0.00023713670088909566
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_restore: 95
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,95,2379.17,95000,-0.147368,0,-6,996.242


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-09-17_12-12-08
  done: false
  episode_len_mean: 996.28125
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14583333333333334
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 96
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.597190899319119
          entropy_coeff: 0.009999999999999998
          kl: 0.010360204543609234
          policy_loss: -0.0689450555998418
          total_loss: -0.09245996433827612
          vf_explained_var: -0.9270338416099548
          vf_loss: 0.0009818850685809999
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_restore: 96
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,96,2400.02,96000,-0.145833,0,-6,996.281


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-09-17_12-12-30
  done: false
  episode_len_mean: 996.319587628866
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14432989690721648
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 97
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5748609251446193
          entropy_coeff: 0.009999999999999998
          kl: 0.009554114888837834
          policy_loss: -0.04073672856514653
          total_loss: -0.064096922479156
          vf_explained_var: -0.5242765545845032
          vf_loss: 0.0010280759392319143
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_restore: 97
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,97,2421.51,97000,-0.14433,0,-6,996.32


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-09-17_12-12-51
  done: false
  episode_len_mean: 996.3571428571429
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14285714285714285
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 98
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6256618552737767
          entropy_coeff: 0.009999999999999998
          kl: 0.01371157633164195
          policy_loss: 0.043156652980380586
          total_loss: 0.019874976244237688
          vf_explained_var: -0.6642675995826721
          vf_loss: 0.001022648128047068
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_restore: 98
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,98,2442.93,98000,-0.142857,0,-6,996.357


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-09-17_12-13-14
  done: false
  episode_len_mean: 996.3939393939394
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.1414141414141414
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 99
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.443727069430881
          entropy_coeff: 0.009999999999999998
          kl: 0.015180123717132473
          policy_loss: -0.004589583145247565
          total_loss: -0.02541354956726233
          vf_explained_var: -0.7978007197380066
          vf_loss: 0.0014519160561677483
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_restore: 99
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,99,2466,99000,-0.141414,0,-6,996.394


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-09-17_12-13-37
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 100
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3950306627485487
          entropy_coeff: 0.009999999999999998
          kl: 0.011573837640274736
          policy_loss: -0.043028517233000864
          total_loss: -0.06447875483168496
          vf_explained_var: -0.6765676140785217
          vf_loss: 0.0008521546250752483
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  node_ip: 10.55.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,100,2488.32,100000,-0.14,0,-6,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-09-17_12-13-59
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.14
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 101
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.437473177909851
          entropy_coeff: 0.009999999999999998
          kl: 0.014073381750692802
          policy_loss: -0.01898759798043304
          total_loss: -0.04005986009207037
          vf_explained_var: -0.9911442399024963
          vf_loss: 0.001298660193828659
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,101,2510.86,101000,-0.14,0,-6,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-09-17_12-14-23
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.11
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 102
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.400231358740065
          entropy_coeff: 0.009999999999999998
          kl: 0.011387287457032554
          policy_loss: -0.05097810071375635
          total_loss: -0.07185932066705492
          vf_explained_var: -1.0
          vf_loss: 0.0014997381173695128
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  node_ip: 10.55.229.87
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,102,2534.02,102000,-0.11,0,-6,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-09-17_12-14-46
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 103
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.449418558014764
          entropy_coeff: 0.009999999999999998
          kl: 0.010848966769631355
          policy_loss: -0.010474602174427774
          total_loss: -0.03251426236497031
          vf_explained_var: -0.4952675998210907
          vf_loss: 0.0009098165733222333
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,103,2557.55,103000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-09-17_12-15-11
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 104
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3971771028306748
          entropy_coeff: 0.009999999999999998
          kl: 0.010057135934312199
          policy_loss: -0.0492396314524942
          total_loss: -0.07026630114350053
          vf_explained_var: -1.0
          vf_loss: 0.0015131378067760831
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  node_ip: 10.55.229.87
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,104,2582.36,104000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-09-17_12-15-35
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 105
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.445719567934672
          entropy_coeff: 0.009999999999999998
          kl: 0.01062537140738427
          policy_loss: -0.02658760998811987
          total_loss: -0.04884598371055391
          vf_explained_var: -1.0
          vf_loss: 0.000685951970000234
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,105,2606.63,105000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-09-17_12-15-59
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 106
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3610691600375704
          entropy_coeff: 0.009999999999999998
          kl: 0.013947366054081797
          policy_loss: -0.07662792404492696
          total_loss: -0.09692705762055186
          vf_explained_var: -0.4752286374568939
          vf_loss: 0.0013256908633694467
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,106,2630.06,106000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-09-17_12-16-20
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 107
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4248368289735582
          entropy_coeff: 0.009999999999999998
          kl: 0.009437246918515404
          policy_loss: -0.10654901617930995
          total_loss: -0.12868357089658577
          vf_explained_var: -1.0
          vf_loss: 0.0007701107578921235
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node_ip: 10.55.229.87
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,107,2651.45,107000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-09-17_12-16-43
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 108
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0410037928157383
          entropy_coeff: 0.009999999999999998
          kl: 0.02125752388742986
          policy_loss: -0.08330033847855198
          total_loss: -0.06261907857325343
          vf_explained_var: 0.4351070523262024
          vf_loss: 0.038064589548028174
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,108,2674.4,108000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-09-17_12-17-05
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 109
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.445371145672268
          entropy_coeff: 0.009999999999999998
          kl: 0.009782441690906139
          policy_loss: -0.009773344463772244
          total_loss: -0.030494355327553218
          vf_explained_var: -0.8929105401039124
          vf_loss: 0.0016434229186011685
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,109,2695.8,109000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-09-17_12-17-26
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 110
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.5558599578009713
          entropy_coeff: 0.009999999999999998
          kl: 0.011409046787479976
          policy_loss: 0.020525161052743594
          total_loss: -0.0017047274029917186
          vf_explained_var: -1.0
          vf_loss: 0.0008920330994038119
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  node_ip: 10.55.229.87
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,110,2716.89,110000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-09-17_12-17-47
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 111
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.5189634482065837
          entropy_coeff: 0.009999999999999998
          kl: 0.012090204302106767
          policy_loss: -0.048534168303012847
          total_loss: -0.07024448501567046
          vf_explained_var: -1.0
          vf_loss: 0.0008971612046783169
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,111,2738.41,111000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-09-17_12-18-10
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 112
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.4473704126146103
          entropy_coeff: 0.009999999999999998
          kl: 0.010531518075151503
          policy_loss: 0.00040603743659125435
          total_loss: -0.020472222090595297
          vf_explained_var: -1.0
          vf_loss: 0.0013461831058116836
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,112,2760.82,112000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-09-17_12-18-32
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 113
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.3859406338797675
          entropy_coeff: 0.009999999999999998
          kl: 0.00834848841114323
          policy_loss: -0.07417795504960749
          total_loss: -0.09495690365632375
          vf_explained_var: -0.8835589289665222
          vf_loss: 0.001297433768114489
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  node_ip: 10.55.229.87
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,113,2782.74,113000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-09-17_12-18-52
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 114
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.3800995349884033
          entropy_coeff: 0.009999999999999998
          kl: 0.01146774601683589
          policy_loss: -0.09023776488999526
          total_loss: -0.1103533337927527
          vf_explained_var: -0.834629237651825
          vf_loss: 0.0012362120039243665
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  node_ip: 10.55.229.87
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,114,2803.4,114000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-09-17_12-19-16
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 115
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.3574768013424343
          entropy_coeff: 0.009999999999999998
          kl: 0.011144591132150774
          policy_loss: -0.09405082940227455
          total_loss: -0.11400963150792652
          vf_explained_var: -1.0
          vf_loss: 0.0012357685908985635
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  node_ip: 10.55.229.87
  num_healthy_wo

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,115,2826.51,115000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-09-17_12-19-37
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 116
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 1.8898015247450934
          entropy_coeff: 0.009999999999999998
          kl: 0.013397774594389547
          policy_loss: -0.11173594792683919
          total_loss: -0.11933159687452846
          vf_explained_var: 0.3663696348667145
          vf_loss: 0.008440946435762776
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  node_ip: 10.55.229.87
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,116,2847.95,116000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-09-17_12-19-59
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 117
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.469111699528164
          entropy_coeff: 0.009999999999999998
          kl: 0.0121672635629873
          policy_loss: -0.07043854304485851
          total_loss: -0.0909836760825581
          vf_explained_var: -0.5555711984634399
          vf_loss: 0.0015473710761095087
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  node_ip: 10.55.229.87
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,117,2869.64,117000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-09-17_12-20-20
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 118
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.3818835894266766
          entropy_coeff: 0.009999999999999998
          kl: 0.012859240416590565
          policy_loss: -0.006342281483941608
          total_loss: -0.025570384671704638
          vf_explained_var: -0.9837234616279602
          vf_loss: 0.0018443290299425521
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,118,2890.37,118000,-0.05,0,-3,996.43


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-09-17_12-20-40
  done: false
  episode_len_mean: 996.43
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 119
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 2.2835214243994817
          entropy_coeff: 0.009999999999999998
          kl: 0.013155861325511055
          policy_loss: -0.12420820337202813
          total_loss: -0.14127405385176342
          vf_explained_var: -0.5896868109703064
          vf_loss: 0.002959610789548606
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,119,2911.2,119000,-0.05,0,-3,996.43




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-09-17_12-21-22
  done: false
  episode_len_mean: 994.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 120
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21357421875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7736882978015476
          entropy_coeff: 0.009999999999999998
          kl: 0.033166344244157625
          policy_loss: -0.0555082396707601
          total_loss: -0.045292250832749736
          vf_explained_var: -0.1948496699333191
          vf_loss: 0.020869393759251884
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,120,2952.5,120000,-0.05,0,-3,994.87


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-09-17_12-21-45
  done: false
  episode_len_mean: 994.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 121
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.3374123202429877
          entropy_coeff: 0.009999999999999998
          kl: 0.013050459843795131
          policy_loss: -0.02246841471642256
          total_loss: -0.037462924503617816
          vf_explained_var: -0.5238310694694519
          vf_loss: 0.004198753391392529
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,121,2976.04,121000,-0.05,0,-3,994.87


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-09-17_12-22-09
  done: false
  episode_len_mean: 994.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 122
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.0404961625734965
          entropy_coeff: 0.009999999999999998
          kl: 0.00928683659396055
          policy_loss: -0.10122576053771708
          total_loss: -0.09320877840121587
          vf_explained_var: -0.057225652039051056
          vf_loss: 0.025446799343141417
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,122,2999.63,122000,-0.05,0,-3,994.87


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-09-17_12-22-28
  done: false
  episode_len_mean: 994.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 123
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.9267613887786865
          entropy_coeff: 0.009999999999999998
          kl: 0.019908543532838822
          policy_loss: -0.07351846396923065
          total_loss: -0.07842791279157003
          vf_explained_var: 0.5549955368041992
          vf_loss: 0.007980233860305615
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,123,3019.05,123000,-0.06,0,-3,994.87


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-09-17_12-22-51
  done: false
  episode_len_mean: 994.87
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: -0.06
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 124
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.9405799720022414
          entropy_coeff: 0.009999999999999998
          kl: 0.009753102485843712
          policy_loss: 0.0282860999306043
          total_loss: 0.019036925294333033
          vf_explained_var: -0.16663862764835358
          vf_loss: 0.007032110261368669
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,124,3041.52,124000,-0.06,0,-3,994.87


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-09-17_12-23-11
  done: false
  episode_len_mean: 994.87
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 125
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.944806350602044
          entropy_coeff: 0.009999999999999998
          kl: 0.012197409976933561
          policy_loss: -0.09054802656173706
          total_loss: -0.0783381309774187
          vf_explained_var: 0.04035874083638191
          vf_loss: 0.027750381015034187
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,125,3061.66,125000,-0.05,1,-3,994.87


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-09-17_12-23-32
  done: false
  episode_len_mean: 994.87
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 126
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.9929669936498007
          entropy_coeff: 0.009999999999999998
          kl: 0.014712239395109804
          policy_loss: -0.027322612785630754
          total_loss: -0.037600276950332856
          vf_explained_var: 0.15046650171279907
          vf_loss: 0.0049387739428008596
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node_ip: 10.55.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,126,3082.41,126000,-0.05,1,-3,994.87


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-09-17_12-23-53
  done: false
  episode_len_mean: 994.87
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 127
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.9122215774324205
          entropy_coeff: 0.009999999999999998
          kl: 0.013576780514786594
          policy_loss: -0.07478177535037199
          total_loss: -0.0850123543292284
          vf_explained_var: -0.1157212182879448
          vf_loss: 0.004542158657891883
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,127,3103.26,127000,-0.05,1,-3,994.87


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-09-17_12-24-16
  done: false
  episode_len_mean: 994.87
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 128
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.316945383283827
          entropy_coeff: 0.009999999999999998
          kl: 0.010088099887949677
          policy_loss: -0.05141940421114365
          total_loss: -0.06917586074107224
          vf_explained_var: -0.8578599691390991
          vf_loss: 0.002181160177052435
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,128,3125.87,128000,-0.05,1,-3,994.87


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-09-17_12-24-40
  done: false
  episode_len_mean: 994.87
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 129
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.0479537977112665
          entropy_coeff: 0.009999999999999998
          kl: 0.010306951849753516
          policy_loss: -0.06498321096102397
          total_loss: -0.07578105040722423
          vf_explained_var: 0.19519323110580444
          vf_loss: 0.006379751399314652
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,129,3150.34,129000,-0.05,1,-3,994.87


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-09-17_12-25-01
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 130
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.420143307579888
          entropy_coeff: 0.009999999999999998
          kl: 0.012572116114495647
          policy_loss: -0.04213267929024166
          total_loss: -0.06050246088869042
          vf_explained_var: -0.4564734101295471
          vf_loss: 0.0018040305915443847
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,130,3170.94,130000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-09-17_12-25-21
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 131
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.458908012178209
          entropy_coeff: 0.009999999999999998
          kl: 0.010099060792395198
          policy_loss: -0.06306235481881434
          total_loss: -0.08367393580265343
          vf_explained_var: -0.32355350255966187
          vf_loss: 0.0007421532427542843
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,131,3191.21,131000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-09-17_12-25-42
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 132
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.4483855379952324
          entropy_coeff: 0.009999999999999998
          kl: 0.012007816454606492
          policy_loss: 0.027592906086809105
          total_loss: 0.008000983810052275
          vf_explained_var: -0.9837244749069214
          vf_loss: 0.0010450927570572175
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,132,3212.34,132000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-09-17_12-26-02
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 133
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.491351482603285
          entropy_coeff: 0.009999999999999998
          kl: 0.008600589458638492
          policy_loss: -0.04028805270791054
          total_loss: -0.06205409901837508
          vf_explained_var: -1.0
          vf_loss: 0.00039217389979360936
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  node_ip: 10.55.229.87
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,133,3232.12,133000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-09-17_12-26-22
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 134
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.529958481258816
          entropy_coeff: 0.009999999999999998
          kl: 0.011932382636034017
          policy_loss: -0.02629579363597764
          total_loss: -0.04720821003946993
          vf_explained_var: -0.7050359845161438
          vf_loss: 0.0005644907673639762
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,134,3252.01,134000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-09-17_12-26-43
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 135
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.554200953907437
          entropy_coeff: 0.009999999999999998
          kl: 0.011163853777264043
          policy_loss: -0.02106411928931872
          total_loss: -0.04284183431623711
          vf_explained_var: -0.9647737145423889
          vf_loss: 0.00018782899100592152
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,135,3273.19,135000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-09-17_12-27-03
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 136
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.5076779656940036
          entropy_coeff: 0.009999999999999998
          kl: 0.008498940811222096
          policy_loss: -0.05248340935342842
          total_loss: -0.0739362835056252
          vf_explained_var: -1.0
          vf_loss: 0.0009011746455346131
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  node_ip: 10.55.229.87
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,136,3293.33,136000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-09-17_12-27-23
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 137
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.4930771695242986
          entropy_coeff: 0.009999999999999998
          kl: 0.01339896581705461
          policy_loss: -0.04156754621201091
          total_loss: -0.061105486129721005
          vf_explained_var: -0.8102798461914062
          vf_loss: 0.0011003181422387974
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,137,3313.21,137000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-09-17_12-27-45
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 138
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.5110334396362304
          entropy_coeff: 0.009999999999999998
          kl: 0.008643838421109202
          policy_loss: -0.0371304295439687
          total_loss: -0.05905163950390286
          vf_explained_var: -0.9766775369644165
          vf_loss: 0.0004199709575510092
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,138,3334.94,138000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-09-17_12-28-06
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 139
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.501179035504659
          entropy_coeff: 0.009999999999999998
          kl: 0.0052663108218657445
          policy_loss: -0.08662640932533476
          total_loss: -0.1095280236668057
          vf_explained_var: -1.0
          vf_loss: 0.00042305211035353646
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  node_ip: 10.55.229.87
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,139,3355.81,139000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-09-17_12-28-27
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 140
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.4377379284964666
          entropy_coeff: 0.009999999999999998
          kl: 0.009546864238779899
          policy_loss: -0.06849491078820494
          total_loss: -0.08850019880466992
          vf_explained_var: -0.8471523523330688
          vf_loss: 0.0013136437284022881
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,140,3376.67,140000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-09-17_12-28-50
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 141
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.450490776697795
          entropy_coeff: 0.009999999999999998
          kl: 0.010521490339220218
          policy_loss: -0.043889830489125516
          total_loss: -0.06360063722564115
          vf_explained_var: -0.8855666518211365
          vf_loss: 0.0014234219406110546
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,141,3399.67,141000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-09-17_12-29-12
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 142
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.4462637450959948
          entropy_coeff: 0.009999999999999998
          kl: 0.008315613971717652
          policy_loss: 0.016792931270578668
          total_loss: -0.004580179632951816
          vf_explained_var: -0.8604872822761536
          vf_loss: 0.00042552469725099705
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  node_ip: 10.55.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,142,3421.34,142000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-09-17_12-29-33
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 143
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.2402521279123095
          entropy_coeff: 0.009999999999999998
          kl: 0.012325041808585032
          policy_loss: -0.09700226253933376
          total_loss: -0.11311782966885302
          vf_explained_var: -0.673058032989502
          vf_loss: 0.0023384869978245763
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_since_restore: 143
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,143,3443.22,143000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-09-17_12-29-56
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 144
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.211416419347127
          entropy_coeff: 0.009999999999999998
          kl: 0.010504637477082272
          policy_loss: -0.06921374439779256
          total_loss: -0.08610630689395798
          vf_explained_var: -0.9461559653282166
          vf_loss: 0.0018563204115101446
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_since_restore: 144
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,144,3465.68,144000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-09-17_12-30-19
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 145
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.8596008989546033
          entropy_coeff: 0.009999999999999998
          kl: 0.014272596840122356
          policy_loss: -0.06997817369798819
          total_loss: -0.08093187782085604
          vf_explained_var: -0.47597622871398926
          vf_loss: 0.0030699168008545205
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterations_since_restore: 145
  node_ip: 10.55.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,145,3488.36,145000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-09-17_12-30-42
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 146
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.952524483203888
          entropy_coeff: 0.009999999999999998
          kl: 0.010818561085446217
          policy_loss: -0.04485704455938604
          total_loss: -0.058808583786918056
          vf_explained_var: -0.9600870609283447
          vf_loss: 0.00210785731065294
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterations_since_restore: 146
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,146,3511.31,146000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-09-17_12-31-03
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 147
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.237096310986413
          entropy_coeff: 0.009999999999999998
          kl: 0.009844050061072288
          policy_loss: -0.019409024549855127
          total_loss: -0.03710847823984093
          vf_explained_var: -0.7488583922386169
          vf_loss: 0.0015178567487358426
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterations_since_restore: 147
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,147,3532.71,147000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-09-17_12-31-26
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 148
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.8128536012437608
          entropy_coeff: 0.009999999999999998
          kl: 0.010980895245667326
          policy_loss: -0.02079895345701112
          total_loss: -0.03331697094771597
          vf_explained_var: -1.0
          vf_loss: 0.0020926659008384577
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterations_since_restore: 148
  node_ip: 10.55.229.87
  num_heal

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,148,3555.6,148000,-0.05,1,-3,996.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-09-17_12-31-48
  done: false
  episode_len_mean: 996.06
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 149
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.8678631358676487
          entropy_coeff: 0.009999999999999998
          kl: 0.011706085978709405
          policy_loss: -0.027267458248469566
          total_loss: -0.03997246024923192
          vf_explained_var: -1.0
          vf_loss: 0.0022234510112967756
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterations_since_restore: 149
  node_ip: 10.55.229.87
  num_hea

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,149,3577.85,149000,-0.05,1,-3,996.06




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-09-17_12-32-29
  done: false
  episode_len_mean: 994.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 150
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.828073391649458
          entropy_coeff: 0.009999999999999998
          kl: 0.011733039304530053
          policy_loss: -0.0720957869456874
          total_loss: -0.08403067261808449
          vf_explained_var: -1.0
          vf_loss: 0.002587034929698954
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 150
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,150,3618.71,150000,-0.05,1,-3,994.89


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-09-17_12-32-52
  done: false
  episode_len_mean: 994.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 151
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.212010701497396
          entropy_coeff: 0.009999999999999998
          kl: 0.013314972859247239
          policy_loss: -0.05696420412924555
          total_loss: -0.07312988332576222
          vf_explained_var: -0.43057429790496826
          vf_loss: 0.0016888272715732456
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterations_since_restore: 151
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,151,3641.34,151000,-0.05,1,-3,994.89


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-09-17_12-33-13
  done: false
  episode_len_mean: 994.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 152
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.1537937535179985
          entropy_coeff: 0.009999999999999998
          kl: 0.01648995029004368
          policy_loss: -0.010548469548424085
          total_loss: -0.02534708482109838
          vf_explained_var: -0.6186804175376892
          vf_loss: 0.0014565794340645273
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_since_restore: 152
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,152,3662.69,152000,-0.05,1,-3,994.89


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-09-17_12-33-35
  done: false
  episode_len_mean: 994.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 153
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.9260594089825949
          entropy_coeff: 0.009999999999999998
          kl: 0.01132183843695142
          policy_loss: -0.06022522548834483
          total_loss: -0.07402798264390892
          vf_explained_var: -0.9774143695831299
          vf_loss: 0.0018307589476333104
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterations_since_restore: 153
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,153,3683.92,153000,-0.05,1,-3,994.89


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-09-17_12-33-55
  done: false
  episode_len_mean: 994.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 154
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.133923077583313
          entropy_coeff: 0.009999999999999998
          kl: 0.011304170291635558
          policy_loss: -0.02418375060790115
          total_loss: -0.04049466347528829
          vf_explained_var: -0.7788010239601135
          vf_loss: 0.001406897423213296
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iterations_since_restore: 154
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,154,3704.59,154000,-0.05,1,-3,994.89


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-09-17_12-34-15
  done: false
  episode_len_mean: 994.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 155
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.3161039961708916
          entropy_coeff: 0.009999999999999998
          kl: 0.011992398676154044
          policy_loss: -0.08401796685324775
          total_loss: -0.10255945407681995
          vf_explained_var: -0.5244120955467224
          vf_loss: 0.0007776464005776992
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iterations_since_restore: 155
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,155,3724.11,155000,-0.05,1,-3,994.89


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-09-17_12-34-39
  done: false
  episode_len_mean: 994.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 156
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.1101482020484075
          entropy_coeff: 0.009999999999999998
          kl: 0.01273304493528949
          policy_loss: -0.0608680527864231
          total_loss: -0.07662692528424991
          vf_explained_var: -0.8579201698303223
          vf_loss: 0.0012634346281023075
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iterations_since_restore: 156
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,156,3748.02,156000,-0.05,1,-3,994.89


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-09-17_12-35-01
  done: false
  episode_len_mean: 994.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 157
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.8197116057078044
          entropy_coeff: 0.009999999999999998
          kl: 0.006822921506126889
          policy_loss: -0.08421174919025766
          total_loss: -0.09916790910065174
          vf_explained_var: -0.2679310739040375
          vf_loss: 0.0010551560983812023
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  iterations_since_restore: 157
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,157,3770,157000,-0.05,1,-3,994.89


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-09-17_12-35-22
  done: false
  episode_len_mean: 994.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 158
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.2133744253052605
          entropy_coeff: 0.009999999999999998
          kl: 0.007080793649837997
          policy_loss: -0.029157265772422156
          total_loss: -0.03811273587246736
          vf_explained_var: -0.5103279948234558
          vf_loss: 0.0009098596651003593
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterations_since_restore: 158
  node_ip: 10.55.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,158,3790.98,158000,-0.05,1,-3,994.89


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-09-17_12-35-42
  done: false
  episode_len_mean: 994.89
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 159
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.9229062226083544
          entropy_coeff: 0.009999999999999998
          kl: 0.009496264842429901
          policy_loss: -0.06700451225042343
          total_loss: -0.08211902408964104
          vf_explained_var: -0.5830950140953064
          vf_loss: 0.001072315273065922
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  iterations_since_restore: 159
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,159,3811.62,159000,-0.05,1,-3,994.89


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-09-17_12-36-05
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 160
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.5375478982925415
          entropy_coeff: 0.009999999999999998
          kl: 0.012381180438810554
          policy_loss: -0.05140774490104781
          total_loss: -0.060831688758399755
          vf_explained_var: 0.060454294085502625
          vf_loss: 0.0019850851256503826
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 160
  node_ip: 10.55.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,160,3834.06,160000,-0.05,1,-3,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-09-17_12-36-26
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 161
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.9031488332483504
          entropy_coeff: 0.009999999999999998
          kl: 0.011671738904660639
          policy_loss: -0.0023719746619462966
          total_loss: -0.016241512530379826
          vf_explained_var: -0.07053528726100922
          vf_loss: 0.0014227746879138673
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  iterations_since_restore: 161
  node_ip: 10.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,161,3855.23,161000,-0.05,1,-3,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-09-17_12-36-47
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 162
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.9414480553732978
          entropy_coeff: 0.009999999999999998
          kl: 0.013559135910729225
          policy_loss: -0.04931067584289445
          total_loss: -0.062319658531083004
          vf_explained_var: -0.4272163510322571
          vf_loss: 0.0020616766880266367
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  iterations_since_restore: 162
  node_ip: 10.55.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,162,3876.23,162000,-0.05,1,-3,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-09-17_12-37-09
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 163
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 2.052362717522515
          entropy_coeff: 0.009999999999999998
          kl: 0.01045439754165792
          policy_loss: -0.06539859386781852
          total_loss: -0.08117583679656187
          vf_explained_var: -0.6981022953987122
          vf_loss: 0.001397198118502274
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iterations_since_restore: 163
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,163,3898.04,163000,-0.05,1,-3,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-09-17_12-37-31
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 164
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.8629295614030625
          entropy_coeff: 0.009999999999999998
          kl: 0.010411088165436391
          policy_loss: 0.004538694272438685
          total_loss: -0.009293776295251317
          vf_explained_var: -0.6668480634689331
          vf_loss: 0.0014615147183778593
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterations_since_restore: 164
  node_ip: 10.55.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,164,3920.01,164000,-0.05,1,-3,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-09-17_12-37-52
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 165
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.3019887030124664
          entropy_coeff: 0.009999999999999998
          kl: 0.012157731157288826
          policy_loss: -0.06137906619244152
          total_loss: -0.06914592625366317
          vf_explained_var: -0.9362831115722656
          vf_loss: 0.001358162499188135
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iterations_since_restore: 165
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,165,3941.3,165000,-0.05,1,-3,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-09-17_12-38-13
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 166
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.8888964268896316
          entropy_coeff: 0.009999999999999998
          kl: 0.01088152402166575
          policy_loss: 0.01325711500313547
          total_loss: -0.0007654618471860886
          vf_explained_var: -0.5768797993659973
          vf_loss: 0.0013803680252749474
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterations_since_restore: 166
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,166,3961.81,166000,-0.05,1,-3,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-09-17_12-38-34
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 167
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.2537896593411764
          entropy_coeff: 0.009999999999999998
          kl: 0.023187873543705568
          policy_loss: -0.04290616363286972
          total_loss: -0.04662536842127641
          vf_explained_var: -0.14490056037902832
          vf_loss: 0.0013901956948555178
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterations_since_restore: 167
  node_ip: 10.55.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,167,3983.27,167000,-0.05,1,-3,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-09-17_12-38-54
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 168
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9731562415758768
          entropy_coeff: 0.009999999999999998
          kl: 0.010086247661471386
          policy_loss: -0.04666903391480446
          total_loss: -0.06050267008443674
          vf_explained_var: -0.2763653099536896
          vf_loss: 0.0010510609514312818
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterations_since_restore: 168
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,168,4003.19,168000,-0.05,1,-3,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-09-17_12-39-16
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.05
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 169
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.808015563752916
          entropy_coeff: 0.009999999999999998
          kl: 0.00807083169733649
          policy_loss: -0.06510322985963689
          total_loss: -0.07802431504759523
          vf_explained_var: -0.7681087851524353
          vf_loss: 0.0012806937845501428
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  iterations_since_restore: 169
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,169,4024.45,169000,-0.05,1,-3,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-09-17_12-39-37
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 170
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8732129732767742
          entropy_coeff: 0.009999999999999998
          kl: 0.01494741758274642
          policy_loss: -0.06217225169142087
          total_loss: -0.07261846156583893
          vf_explained_var: -0.12025958299636841
          vf_loss: 0.0011030560044067292
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 170
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,170,4046.03,170000,-0.02,1,-1,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-09-17_12-39-58
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.02
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 171
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8517875777350532
          entropy_coeff: 0.009999999999999998
          kl: 0.0075463426812566875
          policy_loss: -0.06459145405226284
          total_loss: -0.07882903392116229
          vf_explained_var: -0.514999270439148
          vf_loss: 0.0006539617911686138
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_since_restore: 171
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,171,4066.47,171000,-0.02,1,-1,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-09-17_12-40-20
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 172
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8555278327729967
          entropy_coeff: 0.009999999999999998
          kl: 0.008513789398525477
          policy_loss: -0.10576918911602762
          total_loss: -0.11930989631348186
          vf_explained_var: -0.6051615476608276
          vf_loss: 0.0009233383022041785
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_since_restore: 172
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,172,4088.23,172000,-0.01,1,-1,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-09-17_12-40-41
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 173
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.908419312371148
          entropy_coeff: 0.009999999999999998
          kl: 0.012835154938535136
          policy_loss: -0.056669689135419
          total_loss: -0.06728586935334735
          vf_explained_var: -0.5707488656044006
          vf_loss: 0.002300182845081306
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iterations_since_restore: 173
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,173,4109.48,173000,-0.01,1,-1,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-09-17_12-41-02
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 174
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8957646833525763
          entropy_coeff: 0.009999999999999998
          kl: 0.00956577505889585
          policy_loss: -0.018407573882076474
          total_loss: -0.03147956869668431
          vf_explained_var: -0.835473895072937
          vf_loss: 0.0012888919707620516
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterations_since_restore: 174
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,174,4130.67,174000,-0.01,1,-1,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-09-17_12-41-23
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 175
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.181014213297102
          entropy_coeff: 0.009999999999999998
          kl: 0.00871430510691863
          policy_loss: -0.014583014986581273
          total_loss: -0.03146728517280684
          vf_explained_var: -0.44637179374694824
          vf_loss: 0.0007382844631340251
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterations_since_restore: 175
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,175,4151.64,175000,-0.01,1,-1,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-09-17_12-41-44
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 176
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9866928180058798
          entropy_coeff: 0.009999999999999998
          kl: 0.010989709013833861
          policy_loss: -0.05865188104410966
          total_loss: -0.07190047740522358
          vf_explained_var: -0.9817169904708862
          vf_loss: 0.0013373152521024975
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterations_since_restore: 176
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,176,4172.35,176000,-0.01,1,-1,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-09-17_12-42-05
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 177
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9152248991860283
          entropy_coeff: 0.009999999999999998
          kl: 0.012769913084982716
          policy_loss: -0.07508023654421171
          total_loss: -0.08684204684363471
          vf_explained_var: -1.0
          vf_loss: 0.0012539586239856564
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  iterations_since_restore: 177
  node_ip: 10.55.229.87
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,177,4193.48,177000,-0.01,1,-1,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-09-17_12-42-29
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 178
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9787060446209377
          entropy_coeff: 0.009999999999999998
          kl: 0.009866772151086502
          policy_loss: 0.027354151838355593
          total_loss: 0.013159998754660288
          vf_explained_var: -0.5583335757255554
          vf_loss: 0.0008515084919054061
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  iterations_since_restore: 178
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,178,4216.98,178000,-0.01,1,-1,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-09-17_12-42-49
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 179
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9158910459942289
          entropy_coeff: 0.009999999999999998
          kl: 0.01372321819357675
          policy_loss: -0.00395935003956159
          total_loss: -0.014950409200456408
          vf_explained_var: -0.5404573678970337
          vf_loss: 0.0015732685453258455
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  iterations_since_restore: 179
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,179,4237.55,179000,-0.01,1,-1,996.07




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-09-17_12-43-34
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 180
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.910135226779514
          entropy_coeff: 0.009999999999999998
          kl: 0.009820711336038339
          policy_loss: -0.014753762694696585
          total_loss: -0.027690749636126888
          vf_explained_var: -0.5704227685928345
          vf_loss: 0.001445098905565424
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 180
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,180,4281.91,180000,-0.01,1,-1,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-09-17_12-43-54
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 181
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.795401806301541
          entropy_coeff: 0.009999999999999998
          kl: 0.008676564348699988
          policy_loss: -0.07895863975087801
          total_loss: -0.09121870882809162
          vf_explained_var: -0.6625661849975586
          vf_loss: 0.0015244932735287067
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iterations_since_restore: 181
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,181,4302.38,181000,-0.01,1,-1,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-09-17_12-44-14
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 182
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.803948590490553
          entropy_coeff: 0.009999999999999998
          kl: 0.010481161689088065
          policy_loss: -0.11352839446109202
          total_loss: -0.12500043412567013
          vf_explained_var: -0.7762566208839417
          vf_loss: 0.0015308059519156814
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iterations_since_restore: 182
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,182,4322.26,182000,-0.01,1,-1,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-09-17_12-44-35
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 183
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9157222933239406
          entropy_coeff: 0.009999999999999998
          kl: 0.011354019971735832
          policy_loss: -0.04537493917677138
          total_loss: -0.05807631876733568
          vf_explained_var: -0.8247475624084473
          vf_loss: 0.0009997572483068022
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  iterations_since_restore: 183
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,183,4342.98,183000,-0.01,1,-1,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-09-17_12-44-56
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 184
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7450766152805752
          entropy_coeff: 0.009999999999999998
          kl: 0.011487178622993996
          policy_loss: -0.043839367230733235
          total_loss: -0.05462666046288278
          vf_explained_var: -0.4535786509513855
          vf_loss: 0.001143397972919047
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 184
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,184,4364.56,184000,-0.01,1,-1,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-09-17_12-45-18
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: -0.01
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 185
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7458354817496406
          entropy_coeff: 0.009999999999999998
          kl: 0.010862149467837057
          policy_loss: -0.10133874672982428
          total_loss: -0.11190385483205319
          vf_explained_var: 0.25032415986061096
          vf_loss: 0.0016735253454599943
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  iterations_since_restore: 185
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,185,4386.18,185000,-0.01,1,-1,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-09-17_12-45-41
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 186
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.58891415198644
          entropy_coeff: 0.009999999999999998
          kl: 0.008099312289120114
          policy_loss: -0.05437823683023453
          total_loss: -0.06517029090060128
          vf_explained_var: -0.797540545463562
          vf_loss: 0.0012050279257689706
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iterations_since_restore: 186
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,186,4408.98,186000,0,1,-1,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-09-17_12-46-02
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 187
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9042450057135687
          entropy_coeff: 0.009999999999999998
          kl: 0.014524281695022416
          policy_loss: -0.035537341982126235
          total_loss: -0.04676181260082457
          vf_explained_var: -0.7296751141548157
          vf_loss: 0.0008384496562131163
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  iterations_since_restore: 187
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,187,4429.77,187000,0,1,-1,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-09-17_12-46-23
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 188
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6492479681968688
          entropy_coeff: 0.009999999999999998
          kl: 0.014711113687248527
          policy_loss: -0.04128913324740198
          total_loss: -0.04906219123966164
          vf_explained_var: -0.41207683086395264
          vf_loss: 0.0016501133278426197
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  iterations_since_restore: 188
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,188,4451.07,188000,0,1,-1,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-09-17_12-46-44
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 189
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.664021733072069
          entropy_coeff: 0.009999999999999998
          kl: 0.008349495163351022
          policy_loss: -0.013494716460506121
          total_loss: -0.02474330889268054
          vf_explained_var: -1.0
          vf_loss: 0.0013793414885488648
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  iterations_since_restore: 189
  node_ip: 10.55.229.87
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,189,4472.29,189000,0,1,-1,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-09-17_12-47-06
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 190
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6111059175597298
          entropy_coeff: 0.009999999999999998
          kl: 0.008772848596345363
          policy_loss: -0.10891874374614821
          total_loss: -0.11977002268864048
          vf_explained_var: -0.809903621673584
          vf_loss: 0.0010440596275859409
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 190
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,190,4493.82,190000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-09-17_12-47-27
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 191
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6813222487767538
          entropy_coeff: 0.009999999999999998
          kl: 0.009958217426012736
          policy_loss: -0.020095432032313613
          total_loss: -0.03132448929051558
          vf_explained_var: -0.7511095404624939
          vf_loss: 0.0007988218176049283
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  iterations_since_restore: 191
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,191,4515.01,191000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-09-17_12-47-49
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 192
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7498871445655824
          entropy_coeff: 0.009999999999999998
          kl: 0.0148068818839772
          policy_loss: -0.06780821060140928
          total_loss: -0.07628695741295814
          vf_explained_var: -0.29771891236305237
          vf_loss: 0.0019047951678253917
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterations_since_restore: 192
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,192,4536.55,192000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-09-17_12-48-12
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 193
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7184889965587191
          entropy_coeff: 0.009999999999999998
          kl: 0.010668925517259157
          policy_loss: -0.02153450660407543
          total_loss: -0.03300156949294938
          vf_explained_var: -1.0
          vf_loss: 0.0005909589782176125
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  iterations_since_restore: 193
  node_ip: 10.55.229.87
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,193,4559.44,193000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-09-17_12-48-33
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 194
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7440305034319559
          entropy_coeff: 0.009999999999999998
          kl: 0.0072871291910290535
          policy_loss: 0.08529128084580104
          total_loss: 0.07199059832427236
          vf_explained_var: -0.990918755531311
          vf_loss: 0.0006378488870622176
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  iterations_since_restore: 194
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,194,4580.59,194000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-09-17_12-48-53
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 195
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.919088606039683
          entropy_coeff: 0.009999999999999998
          kl: 0.012559574235208825
          policy_loss: -0.03851765270034472
          total_loss: -0.048753797966572976
          vf_explained_var: -0.4083922207355499
          vf_loss: 0.002919334766920656
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  iterations_since_restore: 195
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,195,4600.95,195000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-09-17_12-49-15
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 196
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9819213469823203
          entropy_coeff: 0.009999999999999998
          kl: 0.012313274533789257
          policy_loss: -0.036238116895159087
          total_loss: -0.04866375244326061
          vf_explained_var: -1.0
          vf_loss: 0.0014765292090790658
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterations_since_restore: 196
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,196,4622.22,196000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-09-17_12-49-39
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 197
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7812488476435344
          entropy_coeff: 0.009999999999999998
          kl: 0.010176056760228083
          policy_loss: -0.032043586547176046
          total_loss: -0.04407579863650931
          vf_explained_var: -1.0
          vf_loss: 0.000890255652484484
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  iterations_since_restore: 197
  node_ip: 10.55.229.87
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,197,4646.26,197000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-09-17_12-50-01
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 198
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8324817167388068
          entropy_coeff: 0.009999999999999998
          kl: 0.013513632676657197
          policy_loss: -0.022153770758046046
          total_loss: -0.0325771763920784
          vf_explained_var: -0.2777040898799896
          vf_loss: 0.0014075398904323164
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  iterations_since_restore: 198
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,198,4668.15,198000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-09-17_12-50-22
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 199
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.800882871945699
          entropy_coeff: 0.009999999999999998
          kl: 0.010523390834674965
          policy_loss: -0.042553014390998414
          total_loss: -0.05471200665665998
          vf_explained_var: -0.9289901852607727
          vf_loss: 0.0007929032752549068
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  iterations_since_restore: 199
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,199,4689.12,199000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-09-17_12-50-43
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 200
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8304377476374307
          entropy_coeff: 0.009999999999999998
          kl: 0.010281148994502803
          policy_loss: -0.04973201841736833
          total_loss: -0.06157998217062818
          vf_explained_var: -0.6602421998977661
          vf_loss: 0.00151589323197388
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 200
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,200,4710.9,200000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-09-17_12-51-05
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 201
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9192161917686463
          entropy_coeff: 0.009999999999999998
          kl: 0.010069269177125787
          policy_loss: -0.025942989997565745
          total_loss: -0.039240504511528544
          vf_explained_var: -0.8082243204116821
          vf_loss: 0.0010559389574660194
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  iterations_since_restore: 201
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,201,4732.71,201000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-09-17_12-51-27
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 202
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8210470212830439
          entropy_coeff: 0.009999999999999998
          kl: 0.012264260502264958
          policy_loss: -0.039335542172193524
          total_loss: -0.05038734732402696
          vf_explained_var: -0.6723785996437073
          vf_loss: 0.001265173601374651
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  iterations_since_restore: 202
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,202,4754.54,202000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-09-17_12-51-49
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 203
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8247246901194254
          entropy_coeff: 0.009999999999999998
          kl: 0.009390285385000634
          policy_loss: -0.04217991601261828
          total_loss: -0.05516249918275409
          vf_explained_var: -0.872680127620697
          vf_loss: 0.0007522372554780709
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iterations_since_restore: 203
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,203,4776,203000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-09-17_12-52-11
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 204
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8415960192680358
          entropy_coeff: 0.009999999999999998
          kl: 0.009413781317034632
          policy_loss: -0.023581356472439235
          total_loss: -0.03662644773721695
          vf_explained_var: -1.0
          vf_loss: 0.0008471510851652258
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  iterations_since_restore: 204
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,204,4798.1,204000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-09-17_12-52-32
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 205
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8630988041559855
          entropy_coeff: 0.009999999999999998
          kl: 0.015687619843919594
          policy_loss: -0.05484868082114392
          total_loss: -0.0647807346449958
          vf_explained_var: -0.741744339466095
          vf_loss: 0.0011603720415021396
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iterations_since_restore: 205
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,205,4819.69,205000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-09-17_12-52-56
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 206
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8110047234429254
          entropy_coeff: 0.009999999999999998
          kl: 0.013717515309154245
          policy_loss: -0.061805362357861465
          total_loss: -0.0676212588014702
          vf_explained_var: -0.21270030736923218
          vf_loss: 0.005702308095189639
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  iterations_since_restore: 206
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,206,4843.66,206000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-09-17_12-53-19
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 207
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.726768562528822
          entropy_coeff: 0.009999999999999998
          kl: 0.016913280265167075
          policy_loss: -0.08474570529328453
          total_loss: -0.08905459766586622
          vf_explained_var: -0.05947989225387573
          vf_loss: 0.004831251477460481
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  iterations_since_restore: 207
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,207,4866.45,207000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-09-17_12-53-41
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 208
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8419441368844773
          entropy_coeff: 0.009999999999999998
          kl: 0.01246316005813286
          policy_loss: -0.11542360813667377
          total_loss: -0.12631067455642753
          vf_explained_var: -0.7849888205528259
          vf_loss: 0.0015433044554406983
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterations_since_restore: 208
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,208,4888.05,208000,0,1,-1,996.1


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-09-17_12-54-02
  done: false
  episode_len_mean: 996.1
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 209
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.785466229915619
          entropy_coeff: 0.009999999999999998
          kl: 0.009874135860669203
          policy_loss: -0.06448472989723086
          total_loss: -0.07631923022369544
          vf_explained_var: -0.780852198600769
          vf_loss: 0.0012752251518476341
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  iterations_since_restore: 209
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,209,4909.5,209000,0,1,-1,996.1




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-09-17_12-54-48
  done: false
  episode_len_mean: 994.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 210
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8835110068321228
          entropy_coeff: 0.009999999999999998
          kl: 0.010695370312755435
          policy_loss: 0.016877177812986904
          total_loss: 0.004887766080598036
          vf_explained_var: -0.10867059230804443
          vf_loss: 0.0017061243396407613
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterations_since_restore: 210
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,210,4955.47,210000,0,1,-1,994.55


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-09-17_12-55-09
  done: false
  episode_len_mean: 994.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 211
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.069602886835734
          entropy_coeff: 0.009999999999999998
          kl: 0.013167202549691403
          policy_loss: -0.05420509084231324
          total_loss: -0.06682654039727318
          vf_explained_var: -0.4852379560470581
          vf_loss: 0.0017471854018771813
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  iterations_since_restore: 211
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,211,4976.45,211000,0,1,-1,994.55


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-09-17_12-55-30
  done: false
  episode_len_mean: 994.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 212
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.196355687247382
          entropy_coeff: 0.009999999999999998
          kl: 0.013378352572863367
          policy_loss: -0.021953478683200148
          total_loss: -0.03654515182392465
          vf_explained_var: -0.7393118143081665
          vf_loss: 0.0009430235775653272
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  iterations_since_restore: 212
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,212,4996.58,212000,0,1,-1,994.55


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-09-17_12-55-50
  done: false
  episode_len_mean: 994.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 213
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0415684633784825
          entropy_coeff: 0.009999999999999998
          kl: 0.012957344788403309
          policy_loss: -0.10081625088221496
          total_loss: -0.11417060539954238
          vf_explained_var: -0.2644387483596802
          vf_loss: 0.0008347808102068181
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  iterations_since_restore: 213
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,213,5017.22,213000,0,1,-1,994.55


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-09-17_12-56-13
  done: false
  episode_len_mean: 994.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 214
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1427780707677204
          entropy_coeff: 0.009999999999999998
          kl: 0.009424977445997958
          policy_loss: -0.07874864826185836
          total_loss: -0.0948236836741368
          vf_explained_var: -0.044945694506168365
          vf_loss: 0.0008236479030327043
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  iterations_since_restore: 214
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,214,5039.47,214000,0,1,-1,994.55


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-09-17_12-56-33
  done: false
  episode_len_mean: 994.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 215
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0050293617778356
          entropy_coeff: 0.009999999999999998
          kl: 0.013246689226210289
          policy_loss: -0.06719138386348883
          total_loss: -0.07974726342492633
          vf_explained_var: -0.618224561214447
          vf_loss: 0.001128822560228097
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  iterations_since_restore: 215
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,215,5059.85,215000,0,1,-1,994.55


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-09-17_12-56-54
  done: false
  episode_len_mean: 994.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 216
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9327411360210842
          entropy_coeff: 0.009999999999999998
          kl: 0.012801307043459116
          policy_loss: -0.09322091941204336
          total_loss: -0.10426265303459432
          vf_explained_var: -0.24551036953926086
          vf_loss: 0.0021341110974188067
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterations_since_restore: 216
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,216,5080.49,216000,0,1,-1,994.55


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-09-17_12-57-13
  done: false
  episode_len_mean: 994.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 217
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.367240701781379
          entropy_coeff: 0.009999999999999998
          kl: 0.009859587919861587
          policy_loss: -0.10452458615311318
          total_loss: -0.12291349884536532
          vf_explained_var: -0.8213814496994019
          vf_loss: 0.0005455480740541437
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  iterations_since_restore: 217
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,217,5099.59,217000,0,1,-1,994.55


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-09-17_12-57-35
  done: false
  episode_len_mean: 994.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 218
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8402519742647807
          entropy_coeff: 0.009999999999999998
          kl: 0.014111878109713653
          policy_loss: -0.08514437810000446
          total_loss: -0.09209464989188644
          vf_explained_var: -0.43059518933296204
          vf_loss: 0.004670893127978262
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  iterations_since_restore: 218
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,218,5121.77,218000,0,1,-1,994.55


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-09-17_12-57-55
  done: false
  episode_len_mean: 994.55
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 219
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0937465800179376
          entropy_coeff: 0.009999999999999998
          kl: 0.008736657669738534
          policy_loss: -0.08168211297856437
          total_loss: -0.09720296478933758
          vf_explained_var: -0.8143491744995117
          vf_loss: 0.001218285424773866
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  iterations_since_restore: 219
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,219,5141.44,219000,0,1,-1,994.55


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-09-17_12-58-16
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 220
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.968561413553026
          entropy_coeff: 0.009999999999999998
          kl: 0.011473198705135306
          policy_loss: -0.024857106028745572
          total_loss: -0.03776720892637968
          vf_explained_var: -0.21506379544734955
          vf_loss: 0.001262158933807061
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 220
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,220,5163.11,220000,0,1,-1,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-09-17_12-58-37
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 221
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9733978470166524
          entropy_coeff: 0.009999999999999998
          kl: 0.01598886112817439
          policy_loss: -0.05756007917225361
          total_loss: -0.06784268048488432
          vf_explained_var: -0.2295282930135727
          vf_loss: 0.0017680523477287757
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  iterations_since_restore: 221
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,221,5183.3,221000,0,1,-1,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-09-17_12-58-56
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.0
  episode_reward_min: -1.0
  episodes_this_iter: 1
  episodes_total: 222
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2903773466746014
          entropy_coeff: 0.009999999999999998
          kl: 0.010352948184403575
          policy_loss: -0.029747795768909985
          total_loss: -0.047199611986676854
          vf_explained_var: -0.6179518103599548
          vf_loss: 0.0004769305760116064
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  iterations_since_restore: 222
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,222,5202.52,222000,0,1,-1,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-09-17_12-59-17
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 223
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2018638836012947
          entropy_coeff: 0.009999999999999998
          kl: 0.009554359965701585
          policy_loss: -0.057626703986898065
          total_loss: -0.0742123673359553
          vf_explained_var: -0.141108900308609
          vf_loss: 0.0008417026238021208
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  iterations_since_restore: 223
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,223,5223.09,223000,0.01,1,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-09-17_12-59-38
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 1.0
  episode_reward_mean: 0.01
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 224
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8922201050652399
          entropy_coeff: 0.009999999999999998
          kl: 0.011891495723562724
          policy_loss: -0.07822657111618254
          total_loss: -0.08957069135374493
          vf_explained_var: -0.6731227040290833
          vf_loss: 0.0018637216093743013
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_since_restore: 224
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,224,5244.19,224000,0.01,1,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-09-17_13-00-01
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 225
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.325787483321296
          entropy_coeff: 0.009999999999999998
          kl: 0.009563224092002356
          policy_loss: -0.007777372416522768
          total_loss: -0.025763917879925834
          vf_explained_var: -0.87129145860672
          vf_loss: 0.0006757955251183982
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  iterations_since_restore: 225
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,225,5267.97,225000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-09-17_13-00-23
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 226
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.88915982776218
          entropy_coeff: 0.009999999999999998
          kl: 0.01358278661515812
          policy_loss: -0.06094362164537112
          total_loss: -0.07158148280448384
          vf_explained_var: -0.5859333276748657
          vf_loss: 0.00172663720505726
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  iterations_since_restore: 226
  node_ip: 10.55.229.87
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,226,5289.14,226000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-09-17_13-00-43
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 227
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5116786479949953
          entropy_coeff: 0.009999999999999998
          kl: 0.010258098989230193
          policy_loss: -0.030051898087064426
          total_loss: -0.04987878517972098
          vf_explained_var: -1.0
          vf_loss: 0.00036045141039519674
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  iterations_since_restore: 227
  node_ip: 10.55.229.87
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,227,5309.11,227000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-09-17_13-01-05
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 228
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.401009111934238
          entropy_coeff: 0.009999999999999998
          kl: 0.00972322257254182
          policy_loss: -0.05260191737777657
          total_loss: -0.07146546548853318
          vf_explained_var: -1.0
          vf_loss: 0.00047412764745078875
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iterations_since_restore: 228
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,228,5331.73,228000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-09-17_13-01-28
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 229
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9264569030867682
          entropy_coeff: 0.009999999999999998
          kl: 0.011252371818146025
          policy_loss: -0.0819086945719189
          total_loss: -0.09393675873676936
          vf_explained_var: -0.024482425302267075
          vf_loss: 0.0018292679076289966
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iterations_since_restore: 229
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,229,5354.12,229000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-09-17_13-01-49
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 230
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4067123254140217
          entropy_coeff: 0.009999999999999998
          kl: 0.0159808436475539
          policy_loss: -0.05798084607554806
          total_loss: -0.07368578298224343
          vf_explained_var: -0.5717572569847107
          vf_loss: 0.000682720208230118
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  iterations_since_restore: 230
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,230,5375.01,230000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-09-17_13-02-09
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 231
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.375819969177246
          entropy_coeff: 0.009999999999999998
          kl: 0.016918729714040737
          policy_loss: -0.04716336644358105
          total_loss: -0.06241649819744958
          vf_explained_var: -0.5044807195663452
          vf_loss: 0.0003749067761721866
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  iterations_since_restore: 231
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,231,5394.93,231000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-09-17_13-02-32
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 232
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6927730268902248
          entropy_coeff: 0.009999999999999998
          kl: 0.011777262272475436
          policy_loss: -0.20777333395348654
          total_loss: -0.21773468322224088
          vf_explained_var: 0.18922178447246552
          vf_loss: 0.0013069160253508017
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  iterations_since_restore: 232
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,232,5418.2,232000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-09-17_13-02-52
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 233
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1705028494199117
          entropy_coeff: 0.009999999999999998
          kl: 0.008380091982388526
          policy_loss: -0.09298102768758933
          total_loss: -0.10974223961432775
          vf_explained_var: -0.24728107452392578
          vf_loss: 0.0009168300077564911
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  iterations_since_restore: 233
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,233,5438.08,233000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-09-17_13-03-11
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 234
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.4491851541731093
          entropy_coeff: 0.009999999999999998
          kl: 0.009736742219176501
          policy_loss: -0.04025728586647245
          total_loss: -0.059479636864529714
          vf_explained_var: -0.5977715253829956
          vf_loss: 0.0005905855042025603
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  iterations_since_restore: 234
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,234,5456.88,234000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-09-17_13-03-30
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 235
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3519351694318984
          entropy_coeff: 0.009999999999999998
          kl: 0.008716062051584277
          policy_loss: -0.04767108221227924
          total_loss: -0.06615736271358198
          vf_explained_var: -0.5999708771705627
          vf_loss: 0.00084463380305048
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
  iterations_since_restore: 235
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,235,5476.64,235000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-09-17_13-03-51
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 236
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4805419921874998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5584807634353637
          entropy_coeff: 0.009999999999999998
          kl: 0.0038307869018370786
          policy_loss: -0.03296334677272373
          total_loss: -0.05634311352752977
          vf_explained_var: -0.8542150855064392
          vf_loss: 0.00036419278191412253
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  iterations_since_restore: 236
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,236,5497.11,236000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-09-17_13-04-12
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 237
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 2.2920024342007106
          entropy_coeff: 0.009999999999999998
          kl: 0.012525019140303408
          policy_loss: -0.08468877714541223
          total_loss: -0.10317967029081451
          vf_explained_var: -0.8052874803543091
          vf_loss: 0.0014197276471046886
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  iterations_since_restore: 237
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,237,5518.25,237000,0,0,0,996.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-09-17_13-04-31
  done: false
  episode_len_mean: 996.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 238
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 2.4051309400134615
          entropy_coeff: 0.009999999999999998
          kl: 0.009668669065648396
          policy_loss: -0.04426674925618702
          total_loss: -0.06556955153743425
          vf_explained_var: -0.5549781918525696
          vf_loss: 0.00042540446199078964
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
  iterations_since_restore: 238
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,238,5537.14,238000,0,0,0,996.11




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-09-17_13-05-10
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 240
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 1.5302413596047295
          entropy_coeff: 0.009999999999999998
          kl: 0.008992739588868166
          policy_loss: -0.06487356589900123
          total_loss: -0.07560779410931799
          vf_explained_var: -0.08387229591608047
          vf_loss: 0.002407490720765458
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  iterations_since_restore: 239
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,239,5576.44,239000,0,0,0,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-09-17_13-05-32
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 241
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 2.5727473550372655
          entropy_coeff: 0.009999999999999998
          kl: 0.012811994657241253
          policy_loss: -0.05757978981774714
          total_loss: -0.08006197597003645
          vf_explained_var: -0.681026816368103
          vf_loss: 0.00016693572882407656
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_since_restore: 240
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,240,5597.62,240000,0,0,0,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-09-17_13-05-53
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 242
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 2.5172870026694403
          entropy_coeff: 0.009999999999999998
          kl: 0.012729663930558487
          policy_loss: -0.014022427631749048
          total_loss: -0.03557681787448625
          vf_explained_var: -0.8781890869140625
          vf_loss: 0.0005599146766851643
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  iterations_since_restore: 241
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,241,5619.36,241000,0,0,0,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-09-17_13-06-16
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 243
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 1.8226065384017096
          entropy_coeff: 0.009999999999999998
          kl: 0.021640508631334102
          policy_loss: -0.022951164096593858
          total_loss: -0.03153983900944392
          vf_explained_var: 0.27084046602249146
          vf_loss: 0.004437802041259905
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  iterations_since_restore: 242
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,242,5641.8,242000,0,0,0,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-09-17_13-06-35
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 244
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 2.2193796462482878
          entropy_coeff: 0.009999999999999998
          kl: 0.015399175165676512
          policy_loss: -0.07748763422585196
          total_loss: -0.09247404902966486
          vf_explained_var: -0.5581624507904053
          vf_loss: 0.0016574208833036311
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  iterations_since_restore: 243
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,243,5661.27,243000,0,0,0,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-09-17_13-06-55
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 245
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 2.1183500872717964
          entropy_coeff: 0.009999999999999998
          kl: 0.011411988366341008
          policy_loss: -0.00787665301726924
          total_loss: -0.023327657083670298
          vf_explained_var: -0.1909130960702896
          vf_loss: 0.0016195433744643298
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  iterations_since_restore: 244
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,244,5680.77,244000,0,0,0,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-09-17_13-07-17
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 246
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.6699415524800618
          entropy_coeff: 0.009999999999999998
          kl: 0.007410018533563011
          policy_loss: -0.014593663232194052
          total_loss: -0.02762328452534146
          vf_explained_var: 0.06282100826501846
          vf_loss: 0.0009991751414620215
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000
  iterations_since_restore: 245
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,245,5703.22,245000,0,0,0,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-09-17_13-07-36
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 247
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 2.6288456016116672
          entropy_coeff: 0.009999999999999998
          kl: 0.00848820469784819
          policy_loss: -0.07270787155462635
          total_loss: -0.09552950834234555
          vf_explained_var: -0.5303865075111389
          vf_loss: 0.0004076127384097264
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  iterations_since_restore: 246
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,246,5721.42,246000,0,0,0,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-09-17_13-07-53
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 248
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 2.6399842474195694
          entropy_coeff: 0.009999999999999998
          kl: 0.007505974543899422
          policy_loss: -0.028431431328256925
          total_loss: -0.051923176998065576
          vf_explained_var: -0.8157765865325928
          vf_loss: 0.00020289487120963814
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
  iterations_since_restore: 247
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,247,5738.54,247000,0,0,0,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-09-17_13-08-12
  done: false
  episode_len_mean: 994.9
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 249
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 2.4170959181255762
          entropy_coeff: 0.009999999999999998
          kl: 0.010257711211463634
          policy_loss: -0.008020046022203233
          total_loss: -0.02808950493733088
          vf_explained_var: -0.3131256401538849
          vf_loss: 0.000404552183479407
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  iterations_since_restore: 248
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,248,5757.27,248000,0,0,0,994.9


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-09-17_13-08-30
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 250
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 2.60210837788052
          entropy_coeff: 0.009999999999999998
          kl: 0.004698857526665121
          policy_loss: -0.041860949703388745
          total_loss: -0.06606315546151664
          vf_explained_var: -0.14511190354824066
          vf_loss: 0.00012537952568689233
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  iterations_since_restore: 249
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,249,5775.7,249000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-09-17_13-08-51
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 251
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18020324707031254
          cur_lr: 5.000000000000001e-05
          entropy: 2.1764312744140626
          entropy_coeff: 0.009999999999999998
          kl: 0.00884762589463038
          policy_loss: -0.046544923674729136
          total_loss: -0.06564622355831994
          vf_explained_var: 0.05611061677336693
          vf_loss: 0.0010686416013009471
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  iterations_since_restore: 250
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,250,5796.51,250000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-09-17_13-09-11
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 252
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18020324707031254
          cur_lr: 5.000000000000001e-05
          entropy: 2.598739269044664
          entropy_coeff: 0.009999999999999998
          kl: 0.0100361169889656
          policy_loss: -0.02394912954316371
          total_loss: -0.04791394385198752
          vf_explained_var: -0.35088589787483215
          vf_loss: 0.00021403972202986348
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
  iterations_since_restore: 251
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,251,5816.33,251000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-09-17_13-09-29
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 253
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18020324707031254
          cur_lr: 5.000000000000001e-05
          entropy: 2.404777634143829
          entropy_coeff: 0.009999999999999998
          kl: 0.011342534185933692
          policy_loss: -0.00515492997235722
          total_loss: -0.026935775991943148
          vf_explained_var: -0.6658045053482056
          vf_loss: 0.00022296775976226652
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000
  iterations_since_restore: 252
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,252,5834.8,252000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-09-17_13-09-49
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 254
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18020324707031254
          cur_lr: 5.000000000000001e-05
          entropy: 2.464255279964871
          entropy_coeff: 0.009999999999999998
          kl: 0.00703289825658509
          policy_loss: 0.011814820766448974
          total_loss: -0.01139675122168329
          vf_explained_var: -0.3809989392757416
          vf_loss: 0.00016362950208430346
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000
  iterations_since_restore: 253
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,253,5854.65,253000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-09-17_13-10-10
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 255
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18020324707031254
          cur_lr: 5.000000000000001e-05
          entropy: 1.172269121143553
          entropy_coeff: 0.009999999999999998
          kl: 0.008866148927023568
          policy_loss: -0.002254860517051485
          total_loss: -0.01016233538587888
          vf_explained_var: -0.08020985871553421
          vf_loss: 0.0022175042940135526
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000
  iterations_since_restore: 254
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,254,5875.58,254000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-09-17_13-10-31
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 256
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18020324707031254
          cur_lr: 5.000000000000001e-05
          entropy: 2.3873217066129047
          entropy_coeff: 0.009999999999999998
          kl: 0.02298204412364342
          policy_loss: 0.030240821093320845
          total_loss: 0.011448577418923378
          vf_explained_var: -0.547936737537384
          vf_loss: 0.0009395352946537767
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000
  iterations_since_restore: 255
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,255,5896.7,255000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-09-17_13-10-52
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 257
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 1.0639383316040039
          entropy_coeff: 0.009999999999999998
          kl: 0.013952982865125405
          policy_loss: 0.05028067599568102
          total_loss: 0.044030386871761744
          vf_explained_var: -0.03897345811128616
          vf_loss: 0.0006175309279771884
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  iterations_since_restore: 256
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,256,5917.85,256000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-09-17_13-11-11
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 258
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.089183260334863
          entropy_coeff: 0.009999999999999998
          kl: 0.011181906765771361
          policy_loss: 0.04003968040148417
          total_loss: 0.022593638445768092
          vf_explained_var: -0.5010351538658142
          vf_loss: 0.00042326368867280607
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000
  iterations_since_restore: 257
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,257,5936.7,257000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-09-17_13-11-36
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 259
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 1.5433738811148539
          entropy_coeff: 0.009999999999999998
          kl: 0.010662395507604651
          policy_loss: 0.01581247817311022
          total_loss: 0.00558922580546803
          vf_explained_var: -0.5498093962669373
          vf_loss: 0.0023283872692445584
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000
  iterations_since_restore: 258
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,258,5961.3,258000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-09-17_13-11-57
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 260
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.338711112075382
          entropy_coeff: 0.009999999999999998
          kl: 0.012111252115523819
          policy_loss: 0.03643445461574528
          total_loss: 0.01714893157283465
          vf_explained_var: 0.12745891511440277
          vf_loss: 0.0008278600753884449
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000
  iterations_since_restore: 259
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,259,5982.55,259000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-09-17_13-12-16
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 261
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.477712015310923
          entropy_coeff: 0.009999999999999998
          kl: 0.007478591137112368
          policy_loss: -0.051330424265729056
          total_loss: -0.07359165789352523
          vf_explained_var: -0.02806364931166172
          vf_loss: 0.0004943814887155895
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
  iterations_since_restore: 260
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,260,6001.05,260000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-09-17_13-12-34
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 262
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.4529397792286343
          entropy_coeff: 0.009999999999999998
          kl: 0.01885646320976784
          policy_loss: 0.017440967427359687
          total_loss: -0.0015133948789702522
          vf_explained_var: -0.006633467972278595
          vf_loss: 0.0004780429225623569
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000
  iterations_since_restore: 261
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,261,6019.24,261000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-09-17_13-12-56
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 263
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.5738446897930567
          entropy_coeff: 0.009999999999999998
          kl: 0.007366168234181379
          policy_loss: -0.010512118879705667
          total_loss: -0.033688547027607756
          vf_explained_var: 0.009148666635155678
          vf_loss: 0.0005709096788551606
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262000
  iterations_since_restore: 262
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,262,6040.87,262000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-09-17_13-13-15
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 264
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.5150874071651037
          entropy_coeff: 0.009999999999999998
          kl: 0.009870686889307064
          policy_loss: -0.08928388361301687
          total_loss: -0.1113191194832325
          vf_explained_var: -0.2960982322692871
          vf_loss: 0.0004475411179555522
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000
  iterations_since_restore: 263
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,263,6060.44,263000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-09-17_13-13-35
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 265
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.4838998529646132
          entropy_coeff: 0.009999999999999998
          kl: 0.006964994946142748
          policy_loss: -0.05879223350849416
          total_loss: -0.08042348954930073
          vf_explained_var: -0.34815871715545654
          vf_loss: 0.001325071110901869
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000
  iterations_since_restore: 264
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,264,6080.58,264000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-09-17_13-13-54
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 266
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.6568623595767553
          entropy_coeff: 0.009999999999999998
          kl: 0.0067920278052539295
          policy_loss: -0.05886666588485241
          total_loss: -0.08298657859882547
          vf_explained_var: -0.5992338061332703
          vf_loss: 0.0006127939402277762
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 265000
  iterations_since_restore: 265
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,265,6098.75,265000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-09-17_13-14-13
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 267
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.495101903544532
          entropy_coeff: 0.009999999999999998
          kl: 0.008789616749868657
          policy_loss: -0.09074656379719576
          total_loss: -0.11272072237398889
          vf_explained_var: -0.8771919012069702
          vf_loss: 0.0006009842327330261
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000
  iterations_since_restore: 266
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,266,6117.77,266000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-09-17_13-14-35
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 268
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.605266242557102
          entropy_coeff: 0.009999999999999998
          kl: 0.008884117093942124
          policy_loss: -0.02031387612223625
          total_loss: -0.0435525575445758
          vf_explained_var: -0.6254823207855225
          vf_loss: 0.0004125600045881583
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000
  iterations_since_restore: 267
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,267,6139.69,267000,0,0,0,996.07


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-09-17_13-14-53
  done: false
  episode_len_mean: 996.07
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 269
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.065948888990614
          entropy_coeff: 0.009999999999999998
          kl: 0.008675385659342262
          policy_loss: -0.012590409484174517
          total_loss: -0.030268286830849117
          vf_explained_var: -0.9930423498153687
          vf_loss: 0.0006366129956101456
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  iterations_since_restore: 268
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,268,6158.13,268000,0,0,0,996.07




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-09-17_13-15-34
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 270
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.4288941701253255
          entropy_coeff: 0.009999999999999998
          kl: 0.0185153060813357
          policy_loss: -0.001324428700738483
          total_loss: -0.020217958009905286
          vf_explained_var: -0.36393865942955017
          vf_loss: 0.00039063461734056344
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000
  iterations_since_restore: 269
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,269,6198.48,269000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-09-17_13-15-53
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 271
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2703048706054688
          cur_lr: 5.000000000000001e-05
          entropy: 2.7947929753197562
          entropy_coeff: 0.009999999999999998
          kl: 0.003679657428587014
          policy_loss: -0.06740568661027485
          total_loss: -0.09431537257300483
          vf_explained_var: -1.0
          vf_loss: 4.3615165163323077e-05
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  iterations_since_restore: 270
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,270,6217.59,270000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-09-17_13-16-12
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 272
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 2.2668595472971598
          entropy_coeff: 0.009999999999999998
          kl: 0.012661088515012015
          policy_loss: -0.0035423156287935046
          total_loss: -0.024239884979195065
          vf_explained_var: -0.21099282801151276
          vf_loss: 0.0002598455196372621
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
  iterations_since_restore: 271
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,271,6236.6,271000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-09-17_13-16-33
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 273
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1351524353027344
          cur_lr: 5.000000000000001e-05
          entropy: 2.4588063054614597
          entropy_coeff: 0.009999999999999998
          kl: 0.027354001990714177
          policy_loss: 0.01135951206088066
          total_loss: -0.008425585242609184
          vf_explained_var: 0.05565381050109863
          vf_loss: 0.0011060083175026293
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  iterations_since_restore: 272
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,272,6257.4,272000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-09-17_13-16-57
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 274
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 2.7228121439615887
          entropy_coeff: 0.009999999999999998
          kl: 0.006555908932268236
          policy_loss: -0.056072154579063255
          total_loss: -0.0818501996083392
          vf_explained_var: -0.9795089960098267
          vf_loss: 0.0001210060645664473
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000
  iterations_since_restore: 273
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,273,6281.85,273000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-09-17_13-17-16
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 275
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20272865295410147
          cur_lr: 5.000000000000001e-05
          entropy: 2.796656839052836
          entropy_coeff: 0.009999999999999998
          kl: 0.002929389044971289
          policy_loss: -0.05718997925933864
          total_loss: -0.0845182174195846
          vf_explained_var: -0.8385451436042786
          vf_loss: 4.4457897537439646e-05
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000
  iterations_since_restore: 274
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,274,6301.16,274000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-09-17_13-17-36
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 276
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10136432647705074
          cur_lr: 5.000000000000001e-05
          entropy: 2.743808952967326
          entropy_coeff: 0.009999999999999998
          kl: 0.005968940498635611
          policy_loss: -0.14591676187184122
          total_loss: -0.17268128825558557
          vf_explained_var: -0.6891257762908936
          vf_loss: 6.852521623336037e-05
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000
  iterations_since_restore: 275
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,275,6320.46,275000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-09-17_13-17-56
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 277
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10136432647705074
          cur_lr: 5.000000000000001e-05
          entropy: 2.456915643480089
          entropy_coeff: 0.009999999999999998
          kl: 0.008951282872195055
          policy_loss: -0.025808141064933603
          total_loss: -0.049170313361618256
          vf_explained_var: -0.5455197095870972
          vf_loss: 0.00029964175125011633
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
  iterations_since_restore: 276
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,276,6341.04,276000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-09-17_13-18-17
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 278
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10136432647705074
          cur_lr: 5.000000000000001e-05
          entropy: 1.9432082699404822
          entropy_coeff: 0.009999999999999998
          kl: 0.01930281784279969
          policy_loss: -0.06910237587160534
          total_loss: -0.08511004828744465
          vf_explained_var: 0.20948821306228638
          vf_loss: 0.0014677918149472032
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
  iterations_since_restore: 277
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,277,6361.36,277000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-09-17_13-18-39
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 279
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10136432647705074
          cur_lr: 5.000000000000001e-05
          entropy: 2.151294015513526
          entropy_coeff: 0.009999999999999998
          kl: 0.026719063651283213
          policy_loss: -0.007707224579321013
          total_loss: -0.025066928565502168
          vf_explained_var: 0.4342817962169647
          vf_loss: 0.0014448768426922874
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 278000
  iterations_since_restore: 278
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,278,6383.32,278000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-09-17_13-18-59
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 280
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.590017549196879
          entropy_coeff: 0.009999999999999998
          kl: 0.006035456381887233
          policy_loss: 0.0939078358726369
          total_loss: 0.06912598158750269
          vf_explained_var: -1.0
          vf_loss: 0.00020065097810907496
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000
  iterations_since_restore: 279
  node_ip: 10.55.229.87
  num_healthy_wo

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,279,6403.46,279000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-09-17_13-19-19
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 281
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.514970670806037
          entropy_coeff: 0.009999999999999998
          kl: 0.01295887925274677
          policy_loss: 0.07565965863565603
          total_loss: 0.05288413324289852
          vf_explained_var: -0.20167717337608337
          vf_loss: 0.00040382903986634723
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  iterations_since_restore: 280
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,280,6423.86,280000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-09-17_13-19-43
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 282
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.653016376495361
          entropy_coeff: 0.009999999999999998
          kl: 0.005141748840291803
          policy_loss: 0.04774459033376641
          total_loss: 0.02204261819521586
          vf_explained_var: -0.907717227935791
          vf_loss: 4.640608339185645e-05
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
  iterations_since_restore: 281
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,281,6446.92,281000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-09-17_13-20-05
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 283
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.628738843070136
          entropy_coeff: 0.009999999999999998
          kl: 0.005978218634318347
          policy_loss: 0.02432834202837613
          total_loss: -0.0009620924376779132
          vf_explained_var: -0.95623779296875
          vf_loss: 8.798712709297736e-05
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 282000
  iterations_since_restore: 282
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,282,6469.09,282000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-09-17_13-20-25
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 284
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.6835966454611886
          entropy_coeff: 0.009999999999999998
          kl: 0.00681183663113365
          policy_loss: 0.15913677844736313
          total_loss: 0.1333863417307536
          vf_explained_var: -1.0
          vf_loss: 4.9813299119705336e-05
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000
  iterations_since_restore: 283
  node_ip: 10.55.229.87
  num_healthy_wo

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,283,6489.19,283000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-09-17_13-20-45
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 285
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.6861200041241116
          entropy_coeff: 0.009999999999999998
          kl: 0.008850301769182866
          policy_loss: 0.12586577178703415
          total_loss: 0.1003912650876575
          vf_explained_var: -1.0
          vf_loss: 4.1035588846069814e-05
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  iterations_since_restore: 284
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,284,6508.99,284000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-09-17_13-21-09
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 286
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.594762987560696
          entropy_coeff: 0.009999999999999998
          kl: 0.012465199660029657
          policy_loss: 0.0809033066034317
          total_loss: 0.05719075567192501
          vf_explained_var: -0.458179771900177
          vf_loss: 0.00033978787113382066
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000
  iterations_since_restore: 285
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,285,6532.82,285000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-09-17_13-21-31
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 287
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.6342938449647693
          entropy_coeff: 0.009999999999999998
          kl: 0.00742433108662074
          policy_loss: 0.097702813314067
          total_loss: 0.07259404645818802
          vf_explained_var: -0.9233048558235168
          vf_loss: 0.00010532662353297281
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  iterations_since_restore: 286
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,286,6554.99,286000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-09-17_13-21-52
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 288
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.6728565163082547
          entropy_coeff: 0.009999999999999998
          kl: 0.008199312571388628
          policy_loss: 0.07098161060776975
          total_loss: 0.04563160137169891
          vf_explained_var: -1.0
          vf_loss: 0.00013187835401266864
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287000
  iterations_since_restore: 287
  node_ip: 10.55.229.87
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,287,6576.02,287000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-09-17_13-22-12
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 289
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.71205964618259
          entropy_coeff: 0.009999999999999998
          kl: 0.006346682099491316
          policy_loss: 0.1056672726240423
          total_loss: 0.07954907003376219
          vf_explained_var: -0.9774124026298523
          vf_loss: 3.74023309228101e-05
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000
  iterations_since_restore: 288
  node_ip: 10.55.229.87
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,288,6596.48,288000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-09-17_13-22-33
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 290
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15204648971557622
          cur_lr: 5.000000000000001e-05
          entropy: 2.7392754289839
          entropy_coeff: 0.009999999999999998
          kl: 0.002583901194258968
          policy_loss: -0.0015251671093412572
          total_loss: -0.028494014114969306
          vf_explained_var: -1.0
          vf_loss: 3.1033457606907986e-05
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000
  iterations_since_restore: 289
  node_ip: 10.55.229.87
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,289,6616.62,289000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-09-17_13-22-54
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 291
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 2.5808987591001724
          entropy_coeff: 0.009999999999999998
          kl: 0.013379837952902603
          policy_loss: 0.08939649338523546
          total_loss: 0.06504130644930733
          vf_explained_var: -0.9134762287139893
          vf_loss: 0.00043662014407649014
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000
  iterations_since_restore: 290
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,290,6637.8,290000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-09-17_13-23-14
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 292
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07602324485778811
          cur_lr: 5.000000000000001e-05
          entropy: 2.0300462113486395
          entropy_coeff: 0.009999999999999998
          kl: 0.021526053129853572
          policy_loss: -0.051824366839395626
          total_loss: -0.06783039818207423
          vf_explained_var: -0.36844465136528015
          vf_loss: 0.002657949284184724
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000
  iterations_since_restore: 291
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,291,6658.23,291000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-09-17_13-23-36
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 293
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 2.1903636693954467
          entropy_coeff: 0.009999999999999998
          kl: 0.01747640852145622
          policy_loss: -0.05006816662434074
          total_loss: -0.0689697943524354
          vf_explained_var: -0.5702649354934692
          vf_loss: 0.0010090894567029965
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000
  iterations_since_restore: 292
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,292,6679.98,292000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-09-17_13-23-58
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 294
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 2.715079519483778
          entropy_coeff: 0.009999999999999998
          kl: 0.005825567838672856
          policy_loss: 0.054059695452451706
          total_loss: 0.027669125960932836
          vf_explained_var: -0.7730311155319214
          vf_loss: 9.590674304086457e-05
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000
  iterations_since_restore: 293
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,293,6701.54,293000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-09-17_13-24-18
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 295
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 2.740007151497735
          entropy_coeff: 0.009999999999999998
          kl: 0.0059119788618384045
          policy_loss: 0.04569174028519127
          total_loss: 0.01905200283250047
          vf_explained_var: -1.0
          vf_loss: 8.616146703085784e-05
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000
  iterations_since_restore: 294
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,294,6721.98,294000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-09-17_13-24-38
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 296
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11403486728668213
          cur_lr: 5.000000000000001e-05
          entropy: 2.7596044884787667
          entropy_coeff: 0.009999999999999998
          kl: 0.004616245810422213
          policy_loss: 0.08974302700824208
          total_loss: 0.0626988043801652
          vf_explained_var: -1.0
          vf_loss: 2.5408403391540763e-05
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 295000
  iterations_since_restore: 295
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,295,6741.84,295000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-09-17_13-24-58
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 297
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05701743364334107
          cur_lr: 5.000000000000001e-05
          entropy: 2.7227751440472074
          entropy_coeff: 0.009999999999999998
          kl: 0.006479527670285368
          policy_loss: -0.0519945389435937
          total_loss: -0.07873268184355564
          vf_explained_var: -0.713198184967041
          vf_loss: 0.0001201620956433342
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  iterations_since_restore: 296
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,296,6761.61,296000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-09-17_13-25-18
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 298
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05701743364334107
          cur_lr: 5.000000000000001e-05
          entropy: 2.449505032433404
          entropy_coeff: 0.009999999999999998
          kl: 0.025076790889276872
          policy_loss: 0.01081434248222245
          total_loss: -0.011442748664153946
          vf_explained_var: -0.025228386744856834
          vf_loss: 0.000808143576917549
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000
  iterations_since_restore: 297
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,297,6781.83,297000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-09-17_13-25-38
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 299
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0855261504650116
          cur_lr: 5.000000000000001e-05
          entropy: 2.5872764481438533
          entropy_coeff: 0.009999999999999998
          kl: 0.01653148897503319
          policy_loss: 0.024194898006195825
          total_loss: -3.3324030745360584e-05
          vf_explained_var: -0.823795735836029
          vf_loss: 0.00023066772506960358
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298000
  iterations_since_restore: 298
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,298,6801.41,298000,0,0,0,995.96




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-09-17_13-26-20
  done: false
  episode_len_mean: 994.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 300
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0855261504650116
          cur_lr: 5.000000000000001e-05
          entropy: 1.943097103966607
          entropy_coeff: 0.009999999999999998
          kl: 0.02565695260900607
          policy_loss: -0.03890473345915477
          total_loss: -0.054285597883992724
          vf_explained_var: -0.21230025589466095
          vf_loss: 0.0018557662176640912
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000
  iterations_since_restore: 299
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,299,6843.72,299000,0,0,0,994.41


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-09-17_13-26-41
  done: false
  episode_len_mean: 994.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 301
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 2.5828995757632787
          entropy_coeff: 0.009999999999999998
          kl: 0.01222180818445694
          policy_loss: -0.05976082806785901
          total_loss: -0.0830717012596627
          vf_explained_var: -0.9296801090240479
          vf_loss: 0.0009501971827679275
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  iterations_since_restore: 300
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,300,6864.96,300000,0,0,0,994.41


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-09-17_13-27-02
  done: false
  episode_len_mean: 994.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 302
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 2.5723404407501222
          entropy_coeff: 0.009999999999999998
          kl: 0.015406273359853352
          policy_loss: -0.014544805884361267
          total_loss: -0.03799937263958984
          vf_explained_var: -1.0
          vf_loss: 0.0002923779955987508
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000
  iterations_since_restore: 301
  node_ip: 10.55.229.87
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,301,6885.48,301000,0,0,0,994.41


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-09-17_13-27-21
  done: false
  episode_len_mean: 994.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 303
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 2.465876163376702
          entropy_coeff: 0.009999999999999998
          kl: 0.013190686082480084
          policy_loss: -0.12211132724252012
          total_loss: -0.1448359102010727
          vf_explained_var: -0.8919652104377747
          vf_loss: 0.00024195426053160595
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000
  iterations_since_restore: 302
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,302,6904.72,302000,0,0,0,994.41


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-09-17_13-27-41
  done: false
  episode_len_mean: 994.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 304
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 2.4633743074205188
          entropy_coeff: 0.009999999999999998
          kl: 0.012553596910081764
          policy_loss: -0.021862055154310334
          total_loss: -0.0442400753705038
          vf_explained_var: -0.952746570110321
          vf_loss: 0.000645231101791271
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000
  iterations_since_restore: 303
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,303,6924.92,303000,0,0,0,994.41


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-09-17_13-28-01
  done: false
  episode_len_mean: 994.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 305
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 2.4431792497634888
          entropy_coeff: 0.009999999999999998
          kl: 0.015462125781565087
          policy_loss: 0.06915298985938231
          total_loss: 0.0474073226046231
          vf_explained_var: -0.6804192662239075
          vf_loss: 0.0007024998766913389
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  iterations_since_restore: 304
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,304,6944.97,304000,0,0,0,994.41


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-09-17_13-28-22
  done: false
  episode_len_mean: 994.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 306
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 2.5267819934421114
          entropy_coeff: 0.009999999999999998
          kl: 0.0178893573896566
          policy_loss: -0.06795270920006766
          total_loss: -0.0905139504517946
          vf_explained_var: -0.581429123878479
          vf_loss: 0.0004115661172868891
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000
  iterations_since_restore: 305
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,305,6965.49,305000,0,0,0,994.41


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-09-17_13-28-43
  done: false
  episode_len_mean: 994.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 307
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 2.505867849455939
          entropy_coeff: 0.009999999999999998
          kl: 0.017343864268813725
          policy_loss: -0.0458839846154054
          total_loss: -0.06821206888804833
          vf_explained_var: -1.0
          vf_loss: 0.0005055652472543039
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
  iterations_since_restore: 306
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,306,6986.52,306000,0,0,0,994.41


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-09-17_13-29-04
  done: false
  episode_len_mean: 994.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 308
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 2.499177736706204
          entropy_coeff: 0.009999999999999998
          kl: 0.01354604856810224
          policy_loss: -0.024357867903179592
          total_loss: -0.04721443373709917
          vf_explained_var: -0.9994920492172241
          vf_loss: 0.000397397769201133
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
  iterations_since_restore: 307
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,307,7007.59,307000,0,0,0,994.41


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-09-17_13-29-24
  done: false
  episode_len_mean: 994.41
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 309
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12828922569751744
          cur_lr: 5.000000000000001e-05
          entropy: 2.495782306459215
          entropy_coeff: 0.009999999999999998
          kl: 0.03506498531670215
          policy_loss: -0.03693333088109891
          total_loss: -0.056966881391902764
          vf_explained_var: -0.6741402745246887
          vf_loss: 0.0004258098427574926
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
  iterations_since_restore: 308
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,308,7027.51,308000,0,0,0,994.41


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-09-17_13-29-45
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 310
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 2.5108295891020034
          entropy_coeff: 0.009999999999999998
          kl: 0.015809574851891578
          policy_loss: 0.05791384068628152
          total_loss: 0.03679774660203192
          vf_explained_var: -0.4413624703884125
          vf_loss: 0.0009499044674157631
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000
  iterations_since_restore: 309
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,309,7048.72,309000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-09-17_13-30-06
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 311
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 2.5390938149558173
          entropy_coeff: 0.009999999999999998
          kl: 0.012153583631710083
          policy_loss: 0.024798952539761863
          total_loss: 0.0022855339778794182
          vf_explained_var: -0.4724244475364685
          vf_loss: 0.0005387588354399971
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
  iterations_since_restore: 310
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,310,7069.37,310000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-09-17_13-30-27
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 312
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19243383854627605
          cur_lr: 5.000000000000001e-05
          entropy: 1.2847437143325806
          entropy_coeff: 0.009999999999999998
          kl: 0.04750924064854833
          policy_loss: -0.04836229748196072
          total_loss: -0.049563030733002555
          vf_explained_var: 0.08580955117940903
          vf_loss: 0.002504315571988829
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
  iterations_since_restore: 311
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,311,7090.72,311000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-09-17_13-30-48
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 313
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.4916642904281616
          entropy_coeff: 0.009999999999999998
          kl: 0.005707322127863663
          policy_loss: -0.09620169376333555
          total_loss: -0.11913233912653393
          vf_explained_var: -0.5865264534950256
          vf_loss: 0.0003385753644882546
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
  iterations_since_restore: 312
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,312,7111.2,312000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-09-17_13-31-09
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 314
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.5537684784995185
          entropy_coeff: 0.009999999999999998
          kl: 0.007723044732857827
          policy_loss: -0.04003762768374549
          total_loss: -0.06311881169676781
          vf_explained_var: -0.656319797039032
          vf_loss: 0.00022723653888129372
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000
  iterations_since_restore: 313
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,313,7132.3,313000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-09-17_13-31-33
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 315
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.5364712105857
          entropy_coeff: 0.009999999999999998
          kl: 0.00706910255157175
          policy_loss: -0.08178039809895886
          total_loss: -0.10473292635546791
          vf_explained_var: -0.7417970299720764
          vf_loss: 0.0003716798840792358
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000
  iterations_since_restore: 314
  node_ip: 10.55.229.87
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,314,7155.87,314000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-09-17_13-31-51
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 316
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6301744408077665
          entropy_coeff: 0.009999999999999998
          kl: 0.015018740838336776
          policy_loss: -0.0678845244149367
          total_loss: -0.08955935332924128
          vf_explained_var: 0.596545398235321
          vf_loss: 0.0002917445962035951
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000
  iterations_since_restore: 315
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,315,7174.48,315000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-09-17_13-32-11
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 317
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.5903414567311605
          entropy_coeff: 0.009999999999999998
          kl: 0.013666468243457332
          policy_loss: -0.03220473610692554
          total_loss: -0.053694354101187655
          vf_explained_var: -0.4365336000919342
          vf_loss: 0.0004689619866743063
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  iterations_since_restore: 316
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,316,7193.98,316000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-09-17_13-32-31
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 318
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6392770104938084
          entropy_coeff: 0.009999999999999998
          kl: 0.014180509863822325
          policy_loss: 0.04551607188251283
          total_loss: 0.023548009991645812
          vf_explained_var: 0.13509918749332428
          vf_loss: 0.00033149601658806206
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317000
  iterations_since_restore: 317
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,317,7213.6,317000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-09-17_13-32-51
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 319
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.627832285563151
          entropy_coeff: 0.009999999999999998
          kl: 0.011714868604749708
          policy_loss: 0.01712141020430459
          total_loss: -0.005386201292276382
          vf_explained_var: -0.6045064926147461
          vf_loss: 0.0003892079620426456
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  iterations_since_restore: 318
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,318,7233.54,318000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-09-17_13-33-09
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 320
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.649402101834615
          entropy_coeff: 0.009999999999999998
          kl: 0.011255105400806344
          policy_loss: -0.01251573268738058
          total_loss: -0.03542601902607
          vf_explained_var: 0.03595118969678879
          vf_loss: 0.0003349406571487634
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000
  iterations_since_restore: 319
  node_ip: 10.55.229.87
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,319,7252.32,319000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-09-17_13-33-29
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 321
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.647917111714681
          entropy_coeff: 0.009999999999999998
          kl: 0.009798290413640389
          policy_loss: 0.128355705510411
          total_loss: 0.10495235657112466
          vf_explained_var: 0.690070629119873
          vf_loss: 0.00024753660576000885
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  iterations_since_restore: 320
  node_ip: 10.55.229.87
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,320,7271.71,320000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-09-17_13-33-49
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 322
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6822238948610093
          entropy_coeff: 0.009999999999999998
          kl: 0.013639886780067122
          policy_loss: 0.014979426604178217
          total_loss: -0.007476299131910006
          vf_explained_var: -0.09404674917459488
          vf_loss: 0.00042935083380749955
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000
  iterations_since_restore: 321
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,321,7291.91,321000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-09-17_13-34-09
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 323
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.664602420065138
          entropy_coeff: 0.009999999999999998
          kl: 0.006705674179633863
          policy_loss: -0.04995034212867419
          total_loss: -0.07441381609274281
          vf_explained_var: -0.2908651530742645
          vf_loss: 0.0002469519293703747
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
  iterations_since_restore: 322
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,322,7311.83,322000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-09-17_13-34-27
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 324
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6984516196780737
          entropy_coeff: 0.009999999999999998
          kl: 0.008099874828295286
          policy_loss: -0.054858312548862566
          total_loss: -0.07925954167213704
          vf_explained_var: -0.6145387887954712
          vf_loss: 0.0002452529770178242
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000
  iterations_since_restore: 323
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,323,7329.74,323000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-09-17_13-34-45
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 325
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.7145500156614517
          entropy_coeff: 0.009999999999999998
          kl: 0.00916484364463014
          policy_loss: -0.0075305701181706455
          total_loss: -0.0317483983726965
          vf_explained_var: -0.6218150854110718
          vf_loss: 0.00028223175506557
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000
  iterations_since_restore: 324
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,324,7348.21,324000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-09-17_13-35-04
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 326
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.670909153090583
          entropy_coeff: 0.009999999999999998
          kl: 0.0060912616393631915
          policy_loss: -0.009594168343270818
          total_loss: -0.03388900289105044
          vf_explained_var: -0.5336843132972717
          vf_loss: 0.000656010420379971
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000
  iterations_since_restore: 325
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,325,7367.25,325000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-09-17_13-35-24
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 327
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.7275604910320705
          entropy_coeff: 0.009999999999999998
          kl: 0.007098065238040855
          policy_loss: -0.08070705276396539
          total_loss: -0.10584342761172189
          vf_explained_var: -0.5284174084663391
          vf_loss: 9.036700936727963e-05
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000
  iterations_since_restore: 326
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,326,7386.34,326000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-09-17_13-35-41
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 328
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6853039026260377
          entropy_coeff: 0.009999999999999998
          kl: 0.010833132522216557
          policy_loss: -0.07705482666691145
          total_loss: -0.10067113836606344
          vf_explained_var: -0.36802974343299866
          vf_loss: 0.00010973540863435321
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 327000
  iterations_since_restore: 327
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,327,7403.73,327000,0,0,0,995.96


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-09-17_13-36-00
  done: false
  episode_len_mean: 995.96
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 329
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.676465378867255
          entropy_coeff: 0.009999999999999998
          kl: 0.010095163316262696
          policy_loss: -0.03114379263586468
          total_loss: -0.05480850860476494
          vf_explained_var: 0.16705097258090973
          vf_loss: 0.00018596170450564388
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  iterations_since_restore: 328
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,328,7422.53,328000,0,0,0,995.96




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-09-17_13-36-36
  done: false
  episode_len_mean: 994.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 330
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.721941203541226
          entropy_coeff: 0.009999999999999998
          kl: 0.0080743008743707
          policy_loss: -0.03015322627292739
          total_loss: -0.05484855638609992
          vf_explained_var: -0.126287043094635
          vf_loss: 0.0001934258487431685
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000
  iterations_since_restore: 329
  node_ip: 10.55.229.87
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,329,7458.67,329000,0,0,0,994.78


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-09-17_13-36-57
  done: false
  episode_len_mean: 994.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 331
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6860768265194364
          entropy_coeff: 0.009999999999999998
          kl: 0.008128431784233668
          policy_loss: -0.05347922829290231
          total_loss: -0.07776551035543283
          vf_explained_var: -0.2141050398349762
          vf_loss: 0.00022820530513298663
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  iterations_since_restore: 330
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,330,7479.79,330000,0,0,0,994.78


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-09-17_13-37-18
  done: false
  episode_len_mean: 994.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 332
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6899021201663547
          entropy_coeff: 0.009999999999999998
          kl: 0.013832345930857097
          policy_loss: -0.0643574368622568
          total_loss: -0.08668933854334884
          vf_explained_var: -0.4183999300003052
          vf_loss: 0.0005744021594687688
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000
  iterations_since_restore: 331
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,331,7500.03,331000,0,0,0,994.78


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-09-17_13-37-36
  done: false
  episode_len_mean: 994.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 333
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.7192030005984837
          entropy_coeff: 0.009999999999999998
          kl: 0.006572312706226195
          policy_loss: -0.026911605811781352
          total_loss: -0.05192607587410344
          vf_explained_var: -0.4737413227558136
          vf_loss: 0.00028045787253682243
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000
  iterations_since_restore: 332
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,332,7518.22,332000,0,0,0,994.78


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-09-17_13-37-54
  done: false
  episode_len_mean: 994.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 334
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.7130660984251236
          entropy_coeff: 0.009999999999999998
          kl: 0.008168578612684187
          policy_loss: 0.0004251049210627874
          total_loss: -0.02404476830528842
          vf_explained_var: -0.13306017220020294
          vf_loss: 0.00030292009290254404
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000
  iterations_since_restore: 333
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,333,7536.68,333000,0,0,0,994.78


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-09-17_13-38-15
  done: false
  episode_len_mean: 994.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 335
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.7187144464916653
          entropy_coeff: 0.009999999999999998
          kl: 0.005068223096763644
          policy_loss: -0.017105157756143147
          total_loss: -0.04236310794949531
          vf_explained_var: -0.4819315969944
          vf_loss: 0.00046624623533817714
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000
  iterations_since_restore: 334
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,334,7557.46,334000,0,0,0,994.78


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-09-17_13-38-33
  done: false
  episode_len_mean: 994.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 336
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6532060384750364
          entropy_coeff: 0.009999999999999998
          kl: 0.00913983088617999
          policy_loss: -0.07659812370936075
          total_loss: -0.10009994871086544
          vf_explained_var: -0.42734771966934204
          vf_loss: 0.00039201513593272667
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000
  iterations_since_restore: 335
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,335,7575.73,335000,0,0,0,994.78


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-09-17_13-38-54
  done: false
  episode_len_mean: 994.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 337
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6525537941190933
          entropy_coeff: 0.009999999999999998
          kl: 0.007584276056918239
          policy_loss: -0.042454076806704204
          total_loss: -0.0659783790508906
          vf_explained_var: -0.7123852968215942
          vf_loss: 0.0008120273328434753
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  iterations_since_restore: 336
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,336,7596.72,336000,0,0,0,994.78


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-09-17_13-39-13
  done: false
  episode_len_mean: 994.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 338
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.74482438299391
          entropy_coeff: 0.009999999999999998
          kl: 0.006225130138213271
          policy_loss: -0.004856618607623709
          total_loss: -0.030186659263239966
          vf_explained_var: -0.552081823348999
          vf_loss: 0.00032131355809623426
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337000
  iterations_since_restore: 337
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,337,7615.51,337000,0,0,0,994.78


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-09-17_13-39-32
  done: false
  episode_len_mean: 994.78
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 339
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6095751418007747
          entropy_coeff: 0.009999999999999998
          kl: 0.008488776476751742
          policy_loss: -0.04109926610771153
          total_loss: -0.06429805560037494
          vf_explained_var: -0.15739214420318604
          vf_loss: 0.0004466714235907906
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
  iterations_since_restore: 338
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,338,7634.74,338000,0,0,0,994.78


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-09-17_13-39-54
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 340
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6967735211054484
          entropy_coeff: 0.009999999999999998
          kl: 0.006022063673556449
          policy_loss: -0.05559521358874109
          total_loss: -0.08056984610027737
          vf_explained_var: -0.4697314500808716
          vf_loss: 0.00025483121989711637
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 339000
  iterations_since_restore: 339
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,339,7655.89,339000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-09-17_13-40-14
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 341
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.7266912698745727
          entropy_coeff: 0.009999999999999998
          kl: 0.007102061331480137
          policy_loss: -0.012686932273209096
          total_loss: -0.03760014039774736
          vf_explained_var: -0.4610620141029358
          vf_loss: 0.0003036857978966307
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  iterations_since_restore: 340
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,340,7676,340000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-09-17_13-40-32
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 342
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6579215314653184
          entropy_coeff: 0.009999999999999998
          kl: 0.008395834049997969
          policy_loss: -0.05904081579711702
          total_loss: -0.0829032953414652
          vf_explained_var: -0.48606160283088684
          vf_loss: 0.00029327121384186387
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000
  iterations_since_restore: 341
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,341,7694.35,341000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-09-17_13-40-52
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 343
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.1969785922103457
          entropy_coeff: 0.009999999999999998
          kl: 0.0068758533798056395
          policy_loss: 0.0857872744401296
          total_loss: 0.0735799522863494
          vf_explained_var: -0.464179664850235
          vf_loss: 0.007777745729112616
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000
  iterations_since_restore: 342
  node_ip: 10.55.229.87
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,342,7714.51,342000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-09-17_13-41-13
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 344
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.5027475449774
          entropy_coeff: 0.009999999999999998
          kl: 0.010281908209682555
          policy_loss: 0.1568133345908589
          total_loss: 0.1356921030415429
          vf_explained_var: -0.5464722514152527
          vf_loss: 0.0009383633782476863
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000
  iterations_since_restore: 343
  node_ip: 10.55.229.87
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,343,7735.47,343000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-09-17_13-41-34
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 345
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.524704894754622
          entropy_coeff: 0.009999999999999998
          kl: 0.0092665881465344
          policy_loss: -0.0686433658003807
          total_loss: -0.09087883217467202
          vf_explained_var: -0.37660691142082214
          vf_loss: 0.00033677785647038644
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  iterations_since_restore: 344
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,344,7755.8,344000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-09-17_13-41-52
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 346
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.429302243391673
          entropy_coeff: 0.009999999999999998
          kl: 0.011110326919637595
          policy_loss: -0.09010871069298851
          total_loss: -0.11078161117103365
          vf_explained_var: -0.2532065212726593
          vf_loss: 0.00041312296760022746
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000
  iterations_since_restore: 345
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,345,7774.07,345000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-09-17_13-42-11
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 347
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.6006974299748737
          entropy_coeff: 0.009999999999999998
          kl: 0.006052110022783649
          policy_loss: -0.042362586905558906
          total_loss: -0.06636353203405937
          vf_explained_var: -0.8779569864273071
          vf_loss: 0.00025908021408819576
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000
  iterations_since_restore: 346
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,346,7793.09,346000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-09-17_13-42-31
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 348
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.5803742912080554
          entropy_coeff: 0.009999999999999998
          kl: 0.01045361353186001
          policy_loss: -0.02661841654529174
          total_loss: -0.04907266307208273
          vf_explained_var: -1.0
          vf_loss: 0.00033205220495599656
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000
  iterations_since_restore: 347
  node_ip: 10.55.229.87
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,347,7813.12,347000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-09-17_13-42-50
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 349
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.424767699506548
          entropy_coeff: 0.009999999999999998
          kl: 0.007997640834481718
          policy_loss: -0.011387708245052232
          total_loss: -0.03317649604545699
          vf_explained_var: -0.34963351488113403
          vf_loss: 0.00015036388613225425
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
  iterations_since_restore: 348
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,348,7831.83,348000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-09-17_13-43-09
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 350
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.409401423401303
          entropy_coeff: 0.009999999999999998
          kl: 0.006837070894781761
          policy_loss: -0.07096812008983559
          total_loss: -0.09288744141037265
          vf_explained_var: -0.31013253331184387
          vf_loss: 0.00020116848465679343
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000
  iterations_since_restore: 349
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,349,7850.65,349000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-09-17_13-43-28
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 351
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2886507578194141
          cur_lr: 5.000000000000001e-05
          entropy: 2.4264811555544537
          entropy_coeff: 0.009999999999999998
          kl: 0.003183575550155086
          policy_loss: -0.004329053602284855
          total_loss: -0.027491595823731688
          vf_explained_var: -0.571775496006012
          vf_loss: 0.0001833285928114492
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  iterations_since_restore: 350
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,350,7869.91,350000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-09-17_13-43-47
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 352
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14432537890970706
          cur_lr: 5.000000000000001e-05
          entropy: 2.3928215424219768
          entropy_coeff: 0.009999999999999998
          kl: 0.01016050058314099
          policy_loss: -0.007361174250642459
          total_loss: -0.02966132230228848
          vf_explained_var: -0.12918515503406525
          vf_loss: 0.00016164853215438295
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000
  iterations_since_restore: 351
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,351,7888.85,351000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-09-17_13-44-09
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 353
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14432537890970706
          cur_lr: 5.000000000000001e-05
          entropy: 2.2516479684246913
          entropy_coeff: 0.009999999999999998
          kl: 0.013637727958336576
          policy_loss: -0.1650662715236346
          total_loss: -0.18551249926288924
          vf_explained_var: -0.18262560665607452
          vf_loss: 0.00010198090511595283
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  iterations_since_restore: 352
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,352,7910.29,352000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-09-17_13-44-28
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 354
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14432537890970706
          cur_lr: 5.000000000000001e-05
          entropy: 2.6691034343507556
          entropy_coeff: 0.009999999999999998
          kl: 0.010753496375288505
          policy_loss: -0.021820128046804003
          total_loss: -0.04679673516915904
          vf_explained_var: -0.829899787902832
          vf_loss: 0.00016242640797877863
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
  iterations_since_restore: 353
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,353,7930.06,353000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-09-17_13-44-47
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 355
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14432537890970706
          cur_lr: 5.000000000000001e-05
          entropy: 2.442146784729428
          entropy_coeff: 0.009999999999999998
          kl: 0.022506357961372497
          policy_loss: -0.09260935046606594
          total_loss: -0.11360132710801231
          vf_explained_var: 0.012914324179291725
          vf_loss: 0.00018125386047813664
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  iterations_since_restore: 354
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,354,7948.82,354000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-09-17_13-45-06
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 356
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2164880683645607
          cur_lr: 5.000000000000001e-05
          entropy: 2.5479406012429133
          entropy_coeff: 0.009999999999999998
          kl: 0.00820466397147028
          policy_loss: -0.033725285799139075
          total_loss: -0.05724827365742789
          vf_explained_var: -0.30727967619895935
          vf_loss: 0.00018020605675196243
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000
  iterations_since_restore: 355
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,355,7967.53,355000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-09-17_13-45-28
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 357
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2164880683645607
          cur_lr: 5.000000000000001e-05
          entropy: 2.5904988765716555
          entropy_coeff: 0.009999999999999998
          kl: 0.00814372348868291
          policy_loss: -0.011529728439119128
          total_loss: -0.035502453106972906
          vf_explained_var: -0.1817127913236618
          vf_loss: 0.0001692431780712569
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
  iterations_since_restore: 356
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,356,7989.37,356000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-09-17_13-45-46
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 358
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2164880683645607
          cur_lr: 5.000000000000001e-05
          entropy: 2.6028334935506185
          entropy_coeff: 0.009999999999999998
          kl: 0.009718088618194306
          policy_loss: 0.002227604513367017
          total_loss: -0.02154703280991978
          vf_explained_var: -0.5414928197860718
          vf_loss: 0.0001498463695093556
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  iterations_since_restore: 357
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,357,8007.65,357000,0,0,0,995.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-09-17_13-46-07
  done: false
  episode_len_mean: 995.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 359
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2164880683645607
          cur_lr: 5.000000000000001e-05
          entropy: 2.4946287194887797
          entropy_coeff: 0.009999999999999998
          kl: 0.006631969990679268
          policy_loss: 0.20513532848821747
          total_loss: 0.18179127929939165
          vf_explained_var: 0.24851621687412262
          vf_loss: 0.00016649667195957186
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000
  iterations_since_restore: 358
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,358,8028.2,358000,0,0,0,995.99




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-09-17_13-46-44
  done: false
  episode_len_mean: 994.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 360
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2164880683645607
          cur_lr: 5.000000000000001e-05
          entropy: 2.2289300349023606
          entropy_coeff: 0.009999999999999998
          kl: 0.012169338283106453
          policy_loss: -0.10791561073727078
          total_loss: -0.12699826773669984
          vf_explained_var: -0.006891065277159214
          vf_loss: 0.0005721295322776617
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000
  iterations_since_restore: 359
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,359,8065.82,359000,0,0,0,994.76


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-09-17_13-47-05
  done: false
  episode_len_mean: 994.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 361
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2164880683645607
          cur_lr: 5.000000000000001e-05
          entropy: 2.479541603724162
          entropy_coeff: 0.009999999999999998
          kl: 0.012160532714472438
          policy_loss: 0.018200669023725722
          total_loss: -0.0036669424838489954
          vf_explained_var: -0.4719142019748688
          vf_loss: 0.00029519534239928665
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  iterations_since_restore: 360
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,360,8086.82,360000,0,0,0,994.76


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-09-17_13-47-25
  done: false
  episode_len_mean: 994.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 362
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2164880683645607
          cur_lr: 5.000000000000001e-05
          entropy: 2.2422058277659946
          entropy_coeff: 0.009999999999999998
          kl: 0.032095341449972475
          policy_loss: -0.0003589140044318305
          total_loss: -0.015599201122919718
          vf_explained_var: 0.29046642780303955
          vf_loss: 0.00023351311572494322
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  iterations_since_restore: 361
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,361,8106.57,361000,0,0,0,994.76


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-09-17_13-47-44
  done: false
  episode_len_mean: 994.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 363
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 2.712560118569268
          entropy_coeff: 0.009999999999999998
          kl: 0.007307432627861113
          policy_loss: -0.014730172355969746
          total_loss: -0.03916866381963094
          vf_explained_var: -0.9600992798805237
          vf_loss: 0.0003141505580768151
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  iterations_since_restore: 362
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,362,8125.77,362000,0,0,0,994.76


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-09-17_13-48-05
  done: false
  episode_len_mean: 994.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 364
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 2.7046281311247085
          entropy_coeff: 0.009999999999999998
          kl: 0.009669853967214455
          policy_loss: -0.046685966854501104
          total_loss: -0.07029772903252807
          vf_explained_var: -0.07758232951164246
          vf_loss: 0.000294408303357664
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
  iterations_since_restore: 363
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,363,8146.79,363000,0,0,0,994.76


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-09-17_13-48-26
  done: false
  episode_len_mean: 994.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 365
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 2.662181838353475
          entropy_coeff: 0.009999999999999998
          kl: 0.009337135613887406
          policy_loss: -0.042644434173901874
          total_loss: -0.06600863629331191
          vf_explained_var: -0.8159119486808777
          vf_loss: 0.00022555000286956784
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  iterations_since_restore: 364
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,364,8167.16,364000,0,0,0,994.76


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-09-17_13-48-44
  done: false
  episode_len_mean: 994.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 366
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 2.585230827331543
          entropy_coeff: 0.009999999999999998
          kl: 0.016188980909258098
          policy_loss: -0.0006983224716451433
          total_loss: -0.02113165503574742
          vf_explained_var: -0.8551611304283142
          vf_loss: 0.00016189483487020576
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
  iterations_since_restore: 365
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,365,8185.5,365000,0,0,0,994.76


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-09-17_13-49-03
  done: false
  episode_len_mean: 994.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 367
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 2.705186790890164
          entropy_coeff: 0.009999999999999998
          kl: 0.005934364896149052
          policy_loss: 0.008653082119094001
          total_loss: -0.016348461227284537
          vf_explained_var: -0.3664303123950958
          vf_loss: 0.0001232445274935243
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  iterations_since_restore: 366
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,366,8204.1,366000,0,0,0,994.76


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-09-17_13-49-22
  done: false
  episode_len_mean: 994.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 368
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3247321025468408
          cur_lr: 5.000000000000001e-05
          entropy: 2.7252588987350466
          entropy_coeff: 0.009999999999999998
          kl: 0.0048783795375248375
          policy_loss: -0.011173952536450493
          total_loss: -0.036769012227240536
          vf_explained_var: -0.26505523920059204
          vf_loss: 7.336366434679399e-05
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000
  iterations_since_restore: 367
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,367,8222.71,367000,0,0,0,994.76


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-09-17_13-49-42
  done: false
  episode_len_mean: 994.76
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 369
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.647808986239963
          entropy_coeff: 0.009999999999999998
          kl: 0.012044445800842614
          policy_loss: -0.03584382811354266
          total_loss: -0.06013608167154921
          vf_explained_var: -0.49362343549728394
          vf_loss: 0.00023022979780257122
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000
  iterations_since_restore: 368
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,368,8243.52,368000,0,0,0,994.76


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-09-17_13-50-02
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 370
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.6707877582973905
          entropy_coeff: 0.009999999999999998
          kl: 0.013165805046734785
          policy_loss: -0.024240666793452367
          total_loss: -0.04871015925374296
          vf_explained_var: -0.4830853044986725
          vf_loss: 0.00010070523317659131
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  iterations_since_restore: 369
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,369,8262.68,369000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-09-17_13-50-20
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 371
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.690021006266276
          entropy_coeff: 0.009999999999999998
          kl: 0.012996325890493408
          policy_loss: -0.04223383907228708
          total_loss: -0.06691924296319485
          vf_explained_var: -0.3254469633102417
          vf_loss: 0.00010464186301659438
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
  iterations_since_restore: 370
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,370,8280.61,370000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-09-17_13-50-39
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 372
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.652987922562493
          entropy_coeff: 0.009999999999999998
          kl: 0.011565029635795731
          policy_loss: -0.04825569440921148
          total_loss: -0.0722326650387711
          vf_explained_var: 0.43440574407577515
          vf_loss: 0.000675144433580499
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000
  iterations_since_restore: 371
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,371,8299.77,371000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-09-17_13-50-57
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 373
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.75353172355228
          entropy_coeff: 0.009999999999999998
          kl: 0.01141613707364743
          policy_loss: -0.041853930200967525
          total_loss: -0.06743457843032148
          vf_explained_var: -0.7750107049942017
          vf_loss: 0.00010107831073279764
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  iterations_since_restore: 372
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,372,8318.17,372000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-09-17_13-51-17
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 374
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.697941139009264
          entropy_coeff: 0.009999999999999998
          kl: 0.01697473513881549
          policy_loss: -0.06888369988236162
          total_loss: -0.09301718067791727
          vf_explained_var: -0.7201606035232544
          vf_loss: 8.981157428327909e-05
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000
  iterations_since_restore: 373
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,373,8337.45,373000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-09-17_13-51-35
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 375
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.784498315387302
          entropy_coeff: 0.009999999999999998
          kl: 0.009601020981590797
          policy_loss: 0.02966654553181595
          total_loss: 0.003451869636774063
          vf_explained_var: -0.2974758446216583
          vf_loss: 7.142631421730685e-05
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  iterations_since_restore: 374
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,374,8355.9,374000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-09-17_13-51-53
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 376
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.7810833136240642
          entropy_coeff: 0.009999999999999998
          kl: 0.009270449126179702
          policy_loss: -0.007881391296784083
          total_loss: -0.034119709953665735
          vf_explained_var: -0.470388799905777
          vf_loss: 6.730874747012826e-05
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  iterations_since_restore: 375
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,375,8374.07,375000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-09-17_13-52-12
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 377
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.7727836767832437
          entropy_coeff: 0.009999999999999998
          kl: 0.01086257715956774
          policy_loss: -0.014404447707864974
          total_loss: -0.04024603772494528
          vf_explained_var: -0.9079561233520508
          vf_loss: 0.00012253463002909686
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
  iterations_since_restore: 376
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,376,8392.94,376000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-09-17_13-52-31
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 378
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.7737542284859553
          entropy_coeff: 0.009999999999999998
          kl: 0.009789917970932807
          policy_loss: 0.04262277086575826
          total_loss: 0.01654117065999243
          vf_explained_var: -0.7528213262557983
          vf_loss: 6.639112608455535e-05
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000
  iterations_since_restore: 377
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,377,8411.69,377000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-09-17_13-52-52
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 379
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.725517921977573
          entropy_coeff: 0.009999999999999998
          kl: 0.010376800391661374
          policy_loss: -0.04143803904039992
          total_loss: -0.06683690558291144
          vf_explained_var: -0.9870806932449341
          vf_loss: 0.00017147261488490687
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
  iterations_since_restore: 378
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,378,8432.39,378000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-09-17_13-53-10
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 380
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.67585735850864
          entropy_coeff: 0.009999999999999998
          kl: 0.010265976067808325
          policy_loss: -0.011453885957598687
          total_loss: -0.036347503587603566
          vf_explained_var: -0.7000203728675842
          vf_loss: 0.000198111284488631
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
  iterations_since_restore: 379
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,379,8450.79,379000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-09-17_13-53-30
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 381
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.5779058853785197
          entropy_coeff: 0.009999999999999998
          kl: 0.011731256070111318
          policy_loss: -0.0858241147465176
          total_loss: -0.10960051446325249
          vf_explained_var: -0.380015105009079
          vf_loss: 9.790238064953075e-05
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
  iterations_since_restore: 380
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,380,8470.83,380000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-09-17_13-53-48
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 382
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.6416123946507772
          entropy_coeff: 0.009999999999999998
          kl: 0.013689721995357099
          policy_loss: -0.043507836386561395
          total_loss: -0.06758540795288152
          vf_explained_var: -0.7980751991271973
          vf_loss: 0.00011580470503152659
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
  iterations_since_restore: 381
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,381,8489.11,381000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-09-17_13-54-06
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 383
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.6501853307088217
          entropy_coeff: 0.009999999999999998
          kl: 0.007833863500544899
          policy_loss: -0.032793698728912406
          total_loss: -0.05791006382140848
          vf_explained_var: -0.829461932182312
          vf_loss: 0.00011353441736395729
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
  iterations_since_restore: 382
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,382,8507,382000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-09-17_13-54-25
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 384
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.7035409132639567
          entropy_coeff: 0.009999999999999998
          kl: 0.011438571610927381
          policy_loss: -0.055441390722990036
          total_loss: -0.08052002665483289
          vf_explained_var: -1.0
          vf_loss: 9.953871093683927e-05
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  iterations_since_restore: 383
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,383,8525.77,383000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-09-17_13-54-46
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 385
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.7213875267240737
          entropy_coeff: 0.009999999999999998
          kl: 0.0066227682505054335
          policy_loss: -0.011037457154856788
          total_loss: -0.03707976730333434
          vf_explained_var: -0.5579561591148376
          vf_loss: 9.625232984641721e-05
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
  iterations_since_restore: 384
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,384,8546.21,384000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-09-17_13-55-05
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 386
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.719818788104587
          entropy_coeff: 0.009999999999999998
          kl: 0.010057569851299932
          policy_loss: -0.012420377994163168
          total_loss: -0.037908492195937366
          vf_explained_var: -0.9925962686538696
          vf_loss: 7.706543567312312e-05
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
  iterations_since_restore: 385
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,385,8565.24,385000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-09-17_13-55-23
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 387
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.7080723312166004
          entropy_coeff: 0.009999999999999998
          kl: 0.009908345930965566
          policy_loss: -0.009525803269611464
          total_loss: -0.034934577097495395
          vf_explained_var: -0.501922070980072
          vf_loss: 6.316931588925298e-05
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
  iterations_since_restore: 386
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,386,8583.34,386000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-09-17_13-55-42
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 388
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.709765746858385
          entropy_coeff: 0.009999999999999998
          kl: 0.013104676164743347
          policy_loss: -0.019436824942628543
          total_loss: -0.044302680881487
          vf_explained_var: -0.9933132529258728
          vf_loss: 0.0001040467769497708
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  iterations_since_restore: 387
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,387,8602.71,387000,0,0,0,996.04


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-09-17_13-56-02
  done: false
  episode_len_mean: 996.04
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 389
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.678366017341614
          entropy_coeff: 0.009999999999999998
          kl: 0.00995627543734214
          policy_loss: -0.05295260029120578
          total_loss: -0.07788545481550196
          vf_explained_var: -0.6591284275054932
          vf_loss: 0.00023424463718887031
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
  iterations_since_restore: 388
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,388,8622.15,388000,0,0,0,996.04




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-09-17_13-56-38
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 390
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.6535605404112075
          entropy_coeff: 0.009999999999999998
          kl: 0.008091704007040809
          policy_loss: -0.05256472267210484
          total_loss: -0.07755394716643625
          vf_explained_var: -0.9668577313423157
          vf_loss: 0.00023256526422604413
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  iterations_since_restore: 389
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,389,8658.75,389000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-09-17_13-56-58
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 391
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.584041889508565
          entropy_coeff: 0.009999999999999998
          kl: 0.011833135124723245
          policy_loss: -0.07449960075318814
          total_loss: -0.09818411104174124
          vf_explained_var: -0.636766791343689
          vf_loss: 0.0002346088052743451
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
  iterations_since_restore: 390
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,390,8678.7,390000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-09-17_13-57-18
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 392
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.6792087343004014
          entropy_coeff: 0.009999999999999998
          kl: 0.008853459976117447
          policy_loss: -0.049507653361393344
          total_loss: -0.07470779204741121
          vf_explained_var: -0.394969642162323
          vf_loss: 0.0001544482614816742
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
  iterations_since_restore: 391
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,391,8698.01,391000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-09-17_13-57-36
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 393
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.660081158743964
          entropy_coeff: 0.009999999999999998
          kl: 0.010442366704553762
          policy_loss: -0.03184106925295459
          total_loss: -0.05660949266619152
          vf_explained_var: -0.3474540412425995
          vf_loss: 0.00013690399711473826
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  iterations_since_restore: 392
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,392,8715.95,392000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-09-17_13-57-53
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 394
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.572627125846015
          entropy_coeff: 0.009999999999999998
          kl: 0.01254817604774967
          policy_loss: -0.0779642259909047
          total_loss: -0.10149676493472523
          vf_explained_var: -0.1312384307384491
          vf_loss: 0.00015633087622417304
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  iterations_since_restore: 393
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,393,8733.47,393000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-09-17_13-58-12
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 395
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.598134591844347
          entropy_coeff: 0.009999999999999998
          kl: 0.00680764804241893
          policy_loss: -0.04152444822506772
          total_loss: -0.06614842249287499
          vf_explained_var: -0.7298527359962463
          vf_loss: 0.00025204361646602696
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
  iterations_since_restore: 394
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,394,8752.1,394000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-09-17_13-58-30
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 396
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.593301592932807
          entropy_coeff: 0.009999999999999998
          kl: 0.006699314550097175
          policy_loss: -0.03776931319799688
          total_loss: -0.06240100860595703
          vf_explained_var: -0.43097105622291565
          vf_loss: 0.0002135785672635393
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  iterations_since_restore: 395
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,395,8770.16,395000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-09-17_13-58-48
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 397
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.5471455679999457
          entropy_coeff: 0.009999999999999998
          kl: 0.014019232189958488
          policy_loss: -0.0033809554245736863
          total_loss: -0.026470103363196055
          vf_explained_var: -0.8172590732574463
          vf_loss: 0.0001060596768588261
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
  iterations_since_restore: 396
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,396,8788.5,396000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-09-17_13-59-08
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 398
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.471708775891198
          entropy_coeff: 0.009999999999999998
          kl: 0.008100454469026946
          policy_loss: -0.0688040307826466
          total_loss: -0.09196700462036662
          vf_explained_var: -0.09301232546567917
          vf_loss: 0.00023887799247353946
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
  iterations_since_restore: 397
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,397,8808.48,397000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-09-17_13-59-29
  done: false
  episode_len_mean: 994.79
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 399
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.4103651377889843
          entropy_coeff: 0.009999999999999998
          kl: 0.014558951652462307
          policy_loss: -0.06424767110082838
          total_loss: -0.0857564616534445
          vf_explained_var: -0.1191609799861908
          vf_loss: 0.0002309836169255656
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000
  iterations_since_restore: 398
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,398,8829.23,398000,0,0,0,994.79


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-09-17_13-59-49
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 400
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.6052537891599865
          entropy_coeff: 0.009999999999999998
          kl: 0.009301511545731481
          policy_loss: -0.05671023776133855
          total_loss: -0.08109013732108805
          vf_explained_var: -0.2832314968109131
          vf_loss: 0.00016238796242002234
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  iterations_since_restore: 399
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,399,8848.69,399000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-09-17_14-00-08
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 401
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.6612859964370728
          entropy_coeff: 0.009999999999999998
          kl: 0.0061401988924113604
          policy_loss: -0.05334654073748324
          total_loss: -0.07886511058443123
          vf_explained_var: 0.07603204250335693
          vf_loss: 9.732741496640706e-05
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
  iterations_since_restore: 400
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,400,8868.04,400000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-09-17_14-00-27
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 402
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.340549793508318
          entropy_coeff: 0.009999999999999998
          kl: 0.009155103756542597
          policy_loss: -0.051580087095499036
          total_loss: -0.07317910773886574
          vf_explained_var: -0.30738502740859985
          vf_loss: 0.0003200001724609239
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  iterations_since_restore: 401
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,401,8886.87,401000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-09-17_14-00-47
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 403
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.308161211013794
          entropy_coeff: 0.009999999999999998
          kl: 0.00744609671228276
          policy_loss: -0.01780758553908931
          total_loss: -0.039316436234447694
          vf_explained_var: -0.5371439456939697
          vf_loss: 0.00036377303561291937
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  iterations_since_restore: 402
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,402,8906.5,402000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-09-17_14-01-07
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 404
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.2067686120669046
          entropy_coeff: 0.009999999999999998
          kl: 0.013513848188058751
          policy_loss: -0.06531418959299723
          total_loss: -0.08406128303872215
          vf_explained_var: 0.014394700527191162
          vf_loss: 0.0011264018834380193
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  iterations_since_restore: 403
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,403,8926.48,403000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-09-17_14-01-26
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 405
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 1.5116917265786065
          entropy_coeff: 0.009999999999999998
          kl: 0.009768870881078603
          policy_loss: -0.0032839493619071115
          total_loss: -0.01610993891954422
          vf_explained_var: 0.34729915857315063
          vf_loss: 0.0007047960268285655
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  iterations_since_restore: 404
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,404,8946.08,404000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-09-17_14-01-45
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 406
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.323415860864851
          entropy_coeff: 0.009999999999999998
          kl: 0.006546312075292823
          policy_loss: -0.02952905659460359
          total_loss: -0.05142295103934076
          vf_explained_var: -0.14986583590507507
          vf_loss: 0.0002773640908180318
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  iterations_since_restore: 405
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,405,8964.92,405000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-09-17_14-02-06
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 407
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 1.8547712153858609
          entropy_coeff: 0.009999999999999998
          kl: 0.010511258935038232
          policy_loss: -0.04830743256542418
          total_loss: -0.06483452725741598
          vf_explained_var: -0.24330079555511475
          vf_loss: 0.0003139473570980853
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  iterations_since_restore: 406
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,406,8985.34,406000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-09-17_14-02-25
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 408
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.236898548073239
          entropy_coeff: 0.009999999999999998
          kl: 0.005339140282348372
          policy_loss: -0.018838611053716807
          total_loss: -0.03992495392966602
          vf_explained_var: -0.43648430705070496
          vf_loss: 0.0004157457831145924
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  iterations_since_restore: 407
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,407,9004.79,407000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-09-17_14-02-45
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 409
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.3562538438373144
          entropy_coeff: 0.009999999999999998
          kl: 0.010391130764861442
          policy_loss: -0.016813630983233453
          total_loss: -0.03837730296783977
          vf_explained_var: -0.024343809112906456
          vf_loss: 0.00031169769544147935
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  iterations_since_restore: 408
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,408,9024.71,408000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-09-17_14-03-06
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 410
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.2625971449746025
          entropy_coeff: 0.009999999999999998
          kl: 0.00995651955977561
          policy_loss: -0.010546170081943274
          total_loss: -0.03138864677813318
          vf_explained_var: -0.8370319604873657
          vf_loss: 0.00016689649739937015
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  iterations_since_restore: 409
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,409,9045.35,409000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-09-17_14-03-26
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 411
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 1.3171555726064577
          entropy_coeff: 0.009999999999999998
          kl: 0.011687767496425907
          policy_loss: -0.03114007959763209
          total_loss: -0.0423307484222783
          vf_explained_var: -0.09535645693540573
          vf_loss: 8.319288196061179e-05
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  iterations_since_restore: 410
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,410,9065.13,410000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-09-17_14-03-44
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 412
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1623660512734204
          cur_lr: 5.000000000000001e-05
          entropy: 2.169094361199273
          entropy_coeff: 0.009999999999999998
          kl: 0.021160498713781693
          policy_loss: 0.0073917287919256424
          total_loss: -0.010488005313608382
          vf_explained_var: -0.03621051087975502
          vf_loss: 0.00037546501911391614
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  iterations_since_restore: 411
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,411,9083.58,411000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-09-17_14-04-03
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 413
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.629051036304898
          entropy_coeff: 0.009999999999999998
          kl: 0.00573230410499532
          policy_loss: 0.006175234499904845
          total_loss: -0.01861179553800159
          vf_explained_var: -0.19004510343074799
          vf_loss: 0.00010738369618366051
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  iterations_since_restore: 412
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,412,9102.34,412000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-09-17_14-04-22
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 414
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.698948947588603
          entropy_coeff: 0.009999999999999998
          kl: 0.009781457161016426
          policy_loss: -0.03041883541478051
          total_loss: -0.054959981754008264
          vf_explained_var: -0.7206754684448242
          vf_loss: 6.607919685974492e-05
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  iterations_since_restore: 413
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,413,9121.06,413000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-09-17_14-04-40
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 415
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.4880574160152014
          entropy_coeff: 0.009999999999999998
          kl: 0.006319304414000962
          policy_loss: -0.020789297266552844
          total_loss: -0.0439835872915056
          vf_explained_var: -0.360118567943573
          vf_loss: 0.00014722232183430605
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  iterations_since_restore: 414
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,414,9139.08,414000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-09-17_14-04-58
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 416
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.351855683326721
          entropy_coeff: 0.009999999999999998
          kl: 0.014094839710330771
          policy_loss: -0.043871132532755534
          total_loss: -0.06345158725873463
          vf_explained_var: -0.23679937422275543
          vf_loss: 0.0005053153441230684
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000
  iterations_since_restore: 415
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,415,9157.8,415000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-09-17_14-05-17
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 417
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.436581860648261
          entropy_coeff: 0.009999999999999998
          kl: 0.00884643609044365
          policy_loss: -0.020057313475343917
          total_loss: -0.041981271193880176
          vf_explained_var: -0.22541865706443787
          vf_loss: 0.0002873189154570355
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  iterations_since_restore: 416
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,416,9176.31,416000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-09-17_14-05-36
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 418
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.5723947339587743
          entropy_coeff: 0.009999999999999998
          kl: 0.015329179864761806
          policy_loss: -0.06524330758386188
          total_loss: -0.08675920094052951
          vf_explained_var: 0.5537768602371216
          vf_loss: 0.00047464694231014924
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417000
  iterations_since_restore: 417
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,417,9195.77,417000,0,0,0,996.34


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-09-17_14-05-55
  done: false
  episode_len_mean: 996.34
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 419
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.5825632333755495
          entropy_coeff: 0.009999999999999998
          kl: 0.006260418244428105
          policy_loss: -0.013313289359211921
          total_loss: -0.03753188124133481
          vf_explained_var: -0.654242217540741
          vf_loss: 8.232124258837657e-05
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000
  iterations_since_restore: 418
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,418,9214.16,418000,0,0,0,996.34




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-09-17_14-06-31
  done: false
  episode_len_mean: 995.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 420
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.4183794418970743
          entropy_coeff: 0.009999999999999998
          kl: 0.01022095959359108
          policy_loss: -0.04359382877333297
          total_loss: -0.06509484963284598
          vf_explained_var: -0.6650058627128601
          vf_loss: 0.00019346714153066892
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000
  iterations_since_restore: 419
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,419,9250.23,419000,0,0,0,995.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-09-17_14-06-54
  done: false
  episode_len_mean: 995.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 421
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.4375331746207345
          entropy_coeff: 0.009999999999999998
          kl: 0.007601621032219659
          policy_loss: -0.04247588620831569
          total_loss: -0.06470583308902052
          vf_explained_var: -0.617312490940094
          vf_loss: 0.0002940145967575821
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000
  iterations_since_restore: 420
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,420,9272.79,420000,0,0,0,995.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-09-17_14-07-12
  done: false
  episode_len_mean: 995.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 422
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.4658088551627264
          entropy_coeff: 0.009999999999999998
          kl: 0.00838473377780831
          policy_loss: -0.03192607584512896
          total_loss: -0.054142975359637704
          vf_explained_var: -0.6950350999832153
          vf_loss: 0.0003990967648936324
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
  iterations_since_restore: 421
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,421,9291.43,421000,0,0,0,995.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-09-17_14-07-31
  done: false
  episode_len_mean: 995.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 423
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.548492606480916
          entropy_coeff: 0.009999999999999998
          kl: 0.0147651876584016
          policy_loss: -0.033222812010596195
          total_loss: -0.054925679456856516
          vf_explained_var: -0.2710510492324829
          vf_loss: 0.0001860127738761245
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
  iterations_since_restore: 422
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,422,9310.3,422000,0,0,0,995.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-09-17_14-07-50
  done: false
  episode_len_mean: 995.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 424
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.460719754960802
          entropy_coeff: 0.009999999999999998
          kl: 0.015321531171422325
          policy_loss: -0.033168145600292416
          total_loss: -0.05372587293386459
          vf_explained_var: -0.9421509504318237
          vf_loss: 0.0003179275213268637
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
  iterations_since_restore: 423
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,423,9328.7,423000,0,0,0,995.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-09-17_14-08-08
  done: false
  episode_len_mean: 995.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 425
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.604417575730218
          entropy_coeff: 0.009999999999999998
          kl: 0.010627427630359815
          policy_loss: -0.023573455752597915
          total_loss: -0.046912547532055114
          vf_explained_var: -0.6157037019729614
          vf_loss: 0.00011678171217934302
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000
  iterations_since_restore: 424
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,424,9347.11,424000,0,0,0,995.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-09-17_14-08-26
  done: false
  episode_len_mean: 995.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 426
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.668295163578457
          entropy_coeff: 0.009999999999999998
          kl: 0.0070645424338256334
          policy_loss: -0.04269654254118602
          total_loss: -0.06756193422608905
          vf_explained_var: -0.5838393568992615
          vf_loss: 9.69958968628109e-05
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  iterations_since_restore: 425
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,425,9365.38,425000,0,0,0,995.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-09-17_14-08-46
  done: false
  episode_len_mean: 995.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 427
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.612310806910197
          entropy_coeff: 0.009999999999999998
          kl: 0.005384739293910456
          policy_loss: -0.05296218459390932
          total_loss: -0.07766881183617645
          vf_explained_var: -0.6645883321762085
          vf_loss: 0.00010503139272562596
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000
  iterations_since_restore: 426
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,426,9384.74,426000,0,0,0,995.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-09-17_14-09-05
  done: false
  episode_len_mean: 995.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 428
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.528361678123474
          entropy_coeff: 0.009999999999999998
          kl: 0.00830214183673661
          policy_loss: -0.03972700557981928
          total_loss: -0.06289188474830654
          vf_explained_var: -0.45438483357429504
          vf_loss: 9.675657020630347e-05
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000
  iterations_since_restore: 427
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,427,9403.5,427000,0,0,0,995.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-09-17_14-09-23
  done: false
  episode_len_mean: 995.11
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 429
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.409044689602322
          entropy_coeff: 0.009999999999999998
          kl: 0.010679662165003812
          policy_loss: -0.02120408525483476
          total_loss: -0.04258890264771051
          vf_explained_var: -0.42985567450523376
          vf_loss: 0.00010460694789015987
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  iterations_since_restore: 428
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,428,9422.1,428000,0,0,0,995.11


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-09-17_14-09-44
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 430
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.4516500896877713
          entropy_coeff: 0.009999999999999998
          kl: 0.006696872704786945
          policy_loss: -0.025416988879442215
          total_loss: -0.04813739282803403
          vf_explained_var: -0.8618009686470032
          vf_loss: 0.00016507965982428787
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000
  iterations_since_restore: 429
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,429,9443.08,429000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-09-17_14-10-03
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 431
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.4466636419296264
          entropy_coeff: 0.009999999999999998
          kl: 0.00559351122822128
          policy_loss: -0.04614826742973593
          total_loss: -0.06885803573661381
          vf_explained_var: -0.3378874957561493
          vf_loss: 0.0003945734954287319
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000
  iterations_since_restore: 430
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,430,9461.71,430000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-09-17_14-10-22
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 432
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.5294286410013833
          entropy_coeff: 0.009999999999999998
          kl: 0.006717035952657508
          policy_loss: -0.05386041891243723
          total_loss: -0.07733995529512565
          vf_explained_var: -0.12416709959506989
          vf_loss: 0.00017882059686245258
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000
  iterations_since_restore: 431
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,431,9480.4,431000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-09-17_14-10-41
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 433
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.3469099521636965
          entropy_coeff: 0.009999999999999998
          kl: 0.011378298842912906
          policy_loss: -0.07526256897383266
          total_loss: -0.0956122811883688
          vf_explained_var: 0.050974417477846146
          vf_loss: 0.0003482157581301079
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000
  iterations_since_restore: 432
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,432,9499.95,432000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-09-17_14-11-00
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 434
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.5709939850701224
          entropy_coeff: 0.009999999999999998
          kl: 0.01035874243539877
          policy_loss: -0.06142423641350534
          total_loss: -0.084306979427735
          vf_explained_var: -0.6432350277900696
          vf_loss: 0.00030433207255353207
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000
  iterations_since_restore: 433
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,433,9518.33,433000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-09-17_14-11-18
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 435
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 1.8977992508146497
          entropy_coeff: 0.009999999999999998
          kl: 0.009752157216930771
          policy_loss: -0.04619228865744339
          total_loss: -0.06181051391694281
          vf_explained_var: 0.5132461786270142
          vf_loss: 0.0009846363987890071
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000
  iterations_since_restore: 434
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,434,9537.16,434000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-09-17_14-11-39
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 436
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 1.999037414126926
          entropy_coeff: 0.009999999999999998
          kl: 0.00915373186221322
          policy_loss: -0.047739315778017044
          total_loss: -0.0640147269393007
          vf_explained_var: 0.10560929775238037
          vf_loss: 0.0014855806626858086
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
  iterations_since_restore: 435
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,435,9557.81,435000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-09-17_14-11-58
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 437
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24354907691013067
          cur_lr: 5.000000000000001e-05
          entropy: 2.6312927511003283
          entropy_coeff: 0.009999999999999998
          kl: 0.003727065020496016
          policy_loss: -0.03063903757267528
          total_loss: -0.05584150882851746
          vf_explained_var: -0.38748428225517273
          vf_loss: 0.00020273458276278688
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
  iterations_since_restore: 436
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,436,9576.28,436000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-09-17_14-12-19
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 438
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.529413339826796
          entropy_coeff: 0.009999999999999998
          kl: 0.010446505428593333
          policy_loss: -0.01797972485009167
          total_loss: -0.04174121926642126
          vf_explained_var: -0.3380849063396454
          vf_loss: 0.00026052093749563535
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  iterations_since_restore: 437
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,437,9597.73,437000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-09-17_14-12-39
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 439
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.581742432382372
          entropy_coeff: 0.009999999999999998
          kl: 0.01136596089841504
          policy_loss: -0.00768283245464166
          total_loss: -0.03192990066276656
          vf_explained_var: -0.8456012010574341
          vf_loss: 0.00018627341929055143
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000
  iterations_since_restore: 438
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,438,9617.32,438000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-09-17_14-12-57
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 440
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.670542565981547
          entropy_coeff: 0.009999999999999998
          kl: 0.010353470433576244
          policy_loss: -0.025753865300470757
          total_loss: -0.05101320385519001
          vf_explained_var: -0.6775668263435364
          vf_loss: 0.00018529702534174754
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000
  iterations_since_restore: 439
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,439,9635.53,439000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-09-17_14-13-15
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 441
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.6189249647988215
          entropy_coeff: 0.009999999999999998
          kl: 0.010135591903527658
          policy_loss: -0.03402284965333011
          total_loss: -0.05871575638237927
          vf_explained_var: -0.7053928375244141
          vf_loss: 0.0002620820764807124
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000
  iterations_since_restore: 440
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,440,9653.8,440000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-09-17_14-13-35
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 442
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.623038819101122
          entropy_coeff: 0.009999999999999998
          kl: 0.007491108362922184
          policy_loss: -0.03452975373301241
          total_loss: -0.05948935478097862
          vf_explained_var: -0.6232406497001648
          vf_loss: 0.00035856102314129305
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000
  iterations_since_restore: 441
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,441,9673.52,441000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-09-17_14-13-55
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 443
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.4896509766578676
          entropy_coeff: 0.009999999999999998
          kl: 0.019618463331451952
          policy_loss: -0.03781465498937501
          total_loss: -0.05961268800828192
          vf_explained_var: 0.26291152834892273
          vf_loss: 0.0007094477353929404
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 442000
  iterations_since_restore: 442
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,442,9693.82,442000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-09-17_14-14-14
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 444
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.7032640006807114
          entropy_coeff: 0.009999999999999998
          kl: 0.008528385002155787
          policy_loss: -0.005972886499431398
          total_loss: -0.03184189461171627
          vf_explained_var: -0.36428478360176086
          vf_loss: 0.0001250890224431108
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  iterations_since_restore: 443
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,443,9712.48,443000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-09-17_14-14-38
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 445
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.5519860876931086
          entropy_coeff: 0.009999999999999998
          kl: 0.012584415043301276
          policy_loss: -0.05751279890537262
          total_loss: -0.08114249921507305
          vf_explained_var: -0.6914742588996887
          vf_loss: 0.0003576990498155889
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000
  iterations_since_restore: 444
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,444,9736.19,444000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-09-17_14-15-00
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 446
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 1.7126666327317557
          entropy_coeff: 0.009999999999999998
          kl: 0.015707821770307717
          policy_loss: 0.10149090356296964
          total_loss: 0.08672370066245397
          vf_explained_var: 0.21496105194091797
          vf_loss: 0.00044664785172143534
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
  iterations_since_restore: 445
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,445,9758.32,445000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-09-17_14-15-20
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 447
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 1.9807809975412156
          entropy_coeff: 0.009999999999999998
          kl: 0.016009484737873503
          policy_loss: -0.04622587660948436
          total_loss: -0.06318163091523779
          vf_explained_var: -0.2661104202270508
          vf_loss: 0.0009025027257141321
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  iterations_since_restore: 446
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,446,9778.69,446000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-09-17_14-15-39
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 448
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.641496131155226
          entropy_coeff: 0.009999999999999998
          kl: 0.012552680992655576
          policy_loss: 0.005699213304453426
          total_loss: -0.019037325763040118
          vf_explained_var: -0.1110762357711792
          vf_loss: 0.00014982701149569443
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
  iterations_since_restore: 447
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,447,9796.88,447000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-09-17_14-15-59
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 449
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 1.453835588031345
          entropy_coeff: 0.009999999999999998
          kl: 0.0056670713586582065
          policy_loss: -0.05021398237182034
          total_loss: -0.06343801431357861
          vf_explained_var: -0.9985901117324829
          vf_loss: 0.0006242212498263042
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 448000
  iterations_since_restore: 448
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,448,9817.32,448000,0,0,0,996.29




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-09-17_14-16-40
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 450
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.6810022089216443
          entropy_coeff: 0.009999999999999998
          kl: 0.00602185953986021
          policy_loss: -0.025127790909674434
          total_loss: -0.05105205361420909
          vf_explained_var: -0.8718673586845398
          vf_loss: 0.00015245164686853563
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
  iterations_since_restore: 449
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,449,9858.43,449000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-09-17_14-17-00
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 451
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 1.3230859683619605
          entropy_coeff: 0.009999999999999998
          kl: 0.019914145932902055
          policy_loss: -0.03422067765560415
          total_loss: -0.0433813056598107
          vf_explained_var: 0.05400201305747032
          vf_loss: 0.0016451960377809073
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000
  iterations_since_restore: 450
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,450,9878.38,450000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-09-17_14-17-19
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 452
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.1945432504018147
          entropy_coeff: 0.009999999999999998
          kl: 0.007256555330985432
          policy_loss: -0.02721389815625217
          total_loss: -0.047862661717873484
          vf_explained_var: -1.0
          vf_loss: 0.00041300401221633365
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000
  iterations_since_restore: 451
  node_ip: 10.55.229.87
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,451,9896.65,451000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-09-17_14-17-39
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 453
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 1.8973057707150778
          entropy_coeff: 0.009999999999999998
          kl: 0.017014958277370776
          policy_loss: -0.03471099951614936
          total_loss: -0.05124086058802075
          vf_explained_var: -0.5929005742073059
          vf_loss: 0.00037120742286636717
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 452000
  iterations_since_restore: 452
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,452,9916.8,452000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-09-17_14-17-57
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 454
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.666109636094835
          entropy_coeff: 0.009999999999999998
          kl: 0.010957735236550864
          policy_loss: -0.04502872170673476
          total_loss: -0.07021971952377094
          vf_explained_var: -0.6955724358558655
          vf_loss: 0.00013572681752533654
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000
  iterations_since_restore: 453
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,453,9934.72,453000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-09-17_14-18-15
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 455
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 1.3672903971539603
          entropy_coeff: 0.009999999999999998
          kl: 0.008295371435336405
          policy_loss: -0.06342636404765976
          total_loss: -0.07435700086255868
          vf_explained_var: -0.5956247448921204
          vf_loss: 0.0017321014149729308
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 454000
  iterations_since_restore: 454
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,454,9953.41,454000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-09-17_14-18-34
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 456
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.7377027140723333
          entropy_coeff: 0.009999999999999998
          kl: 0.011405081690274146
          policy_loss: -0.020884392534693083
          total_loss: -0.046681597497728135
          vf_explained_var: -0.6272464990615845
          vf_loss: 0.0001909725888279152
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000
  iterations_since_restore: 455
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,455,9971.87,455000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-09-17_14-18-52
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 457
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 2.578715385331048
          entropy_coeff: 0.009999999999999998
          kl: 0.010406557975682289
          policy_loss: -0.031809034074346224
          total_loss: -0.056227660158442126
          vf_explained_var: -0.627547562122345
          vf_loss: 0.00010127219134372151
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000
  iterations_since_restore: 456
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,456,9990.07,456000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-09-17_14-19-13
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 458
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12177453845506533
          cur_lr: 5.000000000000001e-05
          entropy: 0.3583771381320225
          entropy_coeff: 0.009999999999999998
          kl: 0.002525306581841723
          policy_loss: 0.054665571658147706
          total_loss: 0.05167645453992817
          vf_explained_var: 0.22384963929653168
          vf_loss: 0.00028713428643338074
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 457000
  iterations_since_restore: 457
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,457,10010.6,457000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-09-17_14-19-32
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 459
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 2.368317405382792
          entropy_coeff: 0.009999999999999998
          kl: 0.007158488564135645
          policy_loss: -0.04748345087799761
          total_loss: -0.06976218827896648
          vf_explained_var: -0.38782835006713867
          vf_loss: 0.0009685721797268343
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000
  iterations_since_restore: 458
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,458,10029.4,458000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-09-17_14-19-52
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 460
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 0.9946177636583646
          entropy_coeff: 0.009999999999999998
          kl: 0.005172814076711339
          policy_loss: -0.0597515350414647
          total_loss: -0.06915121682816082
          vf_explained_var: 0.021541954949498177
          vf_loss: 0.00023153882882777705
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 459000
  iterations_since_restore: 459
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,459,10049.3,459000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-09-17_14-20-12
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 461
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 2.530983734130859
          entropy_coeff: 0.009999999999999998
          kl: 0.010089601518784224
          policy_loss: -0.07294931775993771
          total_loss: -0.09735613018274307
          vf_explained_var: -0.29255056381225586
          vf_loss: 0.0002886964658854494
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000
  iterations_since_restore: 460
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,460,10069.8,460000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-09-17_14-20-30
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 462
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 2.072739793194665
          entropy_coeff: 0.009999999999999998
          kl: 0.01276087996523611
          policy_loss: -0.09400202946530448
          total_loss: -0.11369005226426654
          vf_explained_var: 0.024511927738785744
          vf_loss: 0.000262398849306717
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 461000
  iterations_since_restore: 461
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,461,10087.6,461000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-09-17_14-20-49
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 463
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 1.2190097723570135
          entropy_coeff: 0.009999999999999998
          kl: 0.010464712275196566
          policy_loss: -0.1131413336429331
          total_loss: -0.12390681041611565
          vf_explained_var: -0.3122045397758484
          vf_loss: 0.0007874499818879283
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 462000
  iterations_since_restore: 462
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,462,10107,462000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-09-17_14-21-09
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 464
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 1.2454647888739905
          entropy_coeff: 0.009999999999999998
          kl: 0.009535792345572346
          policy_loss: -0.059208126159177886
          total_loss: -0.0702360356433524
          vf_explained_var: -0.0027121801394969225
          vf_loss: 0.0008461283516630324
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 463000
  iterations_since_restore: 463
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,463,10126.7,463000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-09-17_14-21-31
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 465
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 1.5514810469415452
          entropy_coeff: 0.009999999999999998
          kl: 0.00885568915539433
          policy_loss: -0.05593184228572581
          total_loss: -0.07038914863434102
          vf_explained_var: -0.5174252986907959
          vf_loss: 0.0005183065123371004
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 464000
  iterations_since_restore: 464
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,464,10148.7,464000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-09-17_14-21-53
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 466
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06088726922753267
          cur_lr: 5.000000000000001e-05
          entropy: 2.0655089881685047
          entropy_coeff: 0.009999999999999998
          kl: 0.030535868006721107
          policy_loss: -0.022330492734909058
          total_loss: -0.040634355942408246
          vf_explained_var: -0.06506825238466263
          vf_loss: 0.0004919813508523576
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465000
  iterations_since_restore: 465
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,465,10170.3,465000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-09-17_14-22-15
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 467
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09133090384129905
          cur_lr: 5.000000000000001e-05
          entropy: 0.396990028106504
          entropy_coeff: 0.009999999999999998
          kl: 0.004559627034772538
          policy_loss: -0.04386145277983612
          total_loss: -0.04707411188218329
          vf_explained_var: -0.25020837783813477
          vf_loss: 0.0003408064585477809
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 466000
  iterations_since_restore: 466
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,466,10192.5,466000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-09-17_14-22-36
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 468
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.045665451920649525
          cur_lr: 5.000000000000001e-05
          entropy: 2.122266868750254
          entropy_coeff: 0.009999999999999998
          kl: 0.014914114673041764
          policy_loss: -0.051011890938712494
          total_loss: -0.07135279847619434
          vf_explained_var: -0.5712050795555115
          vf_loss: 0.00020070370678695326
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467000
  iterations_since_restore: 467
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,467,10213.3,467000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-09-17_14-22-56
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 469
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.045665451920649525
          cur_lr: 5.000000000000001e-05
          entropy: 1.802607364124722
          entropy_coeff: 0.009999999999999998
          kl: 0.020954665900566265
          policy_loss: 0.011495308950543404
          total_loss: -0.004945362814598613
          vf_explained_var: 0.31724846363067627
          vf_loss: 0.0006284968276961283
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 468000
  iterations_since_restore: 468
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,468,10233.3,468000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-09-17_14-23-14
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 470
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06849817788097423
          cur_lr: 5.000000000000001e-05
          entropy: 2.0412432312965394
          entropy_coeff: 0.009999999999999998
          kl: 0.012834691027710695
          policy_loss: -0.03435459463960595
          total_loss: -0.053108514348665876
          vf_explained_var: 0.34287601709365845
          vf_loss: 0.0007793619751080567
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 469000
  iterations_since_restore: 469
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,469,10251.8,469000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-09-17_14-23-36
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 471
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06849817788097423
          cur_lr: 5.000000000000001e-05
          entropy: 0.4461164945529567
          entropy_coeff: 0.009999999999999998
          kl: 0.002786448891222943
          policy_loss: -0.005990066710445616
          total_loss: -0.006640911805960867
          vf_explained_var: 0.5629656314849854
          vf_loss: 0.0036194526717331934
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470000
  iterations_since_restore: 470
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,470,10273.5,470000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-09-17_14-23-55
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 472
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.034249088940487116
          cur_lr: 5.000000000000001e-05
          entropy: 2.305202117231157
          entropy_coeff: 0.009999999999999998
          kl: 0.010389102108391807
          policy_loss: -0.033354941631356876
          total_loss: -0.05516926824218697
          vf_explained_var: -0.6695531010627747
          vf_loss: 0.0008818793098448117
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 471000
  iterations_since_restore: 471
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,471,10292.9,471000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-09-17_14-24-17
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 473
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.034249088940487116
          cur_lr: 5.000000000000001e-05
          entropy: 0.7763199281361368
          entropy_coeff: 0.009999999999999998
          kl: 0.042346070427771054
          policy_loss: 0.11399242298470603
          total_loss: 0.10822935435507032
          vf_explained_var: 0.6536979675292969
          vf_loss: 0.0005498179815024034
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 472000
  iterations_since_restore: 472
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,472,10314.4,472000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-09-17_14-24-40
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 474
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05137363341073069
          cur_lr: 5.000000000000001e-05
          entropy: 2.540102587805854
          entropy_coeff: 0.009999999999999998
          kl: 0.027133606059722443
          policy_loss: -0.006584450933668349
          total_loss: -0.030234960714975993
          vf_explained_var: -0.12332286685705185
          vf_loss: 0.00035655863947441606
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 473000
  iterations_since_restore: 473
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,473,10337,473000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-09-17_14-24-58
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 475
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07706045011609605
          cur_lr: 5.000000000000001e-05
          entropy: 2.537904821501838
          entropy_coeff: 0.009999999999999998
          kl: 0.00955412340334411
          policy_loss: 0.008886739363272984
          total_loss: -0.015604344341490004
          vf_explained_var: 0.07411803305149078
          vf_loss: 0.00015171740023510615
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000
  iterations_since_restore: 474
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,474,10355,474000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-09-17_14-25-18
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 476
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07706045011609605
          cur_lr: 5.000000000000001e-05
          entropy: 2.4117024766074286
          entropy_coeff: 0.009999999999999998
          kl: 0.006826200314286288
          policy_loss: -0.022615923939479723
          total_loss: -0.0459994295405017
          vf_explained_var: -0.4140413999557495
          vf_loss: 0.00020748829688272964
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 475000
  iterations_since_restore: 475
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,475,10374.9,475000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-09-17_14-25-36
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 477
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07706045011609605
          cur_lr: 5.000000000000001e-05
          entropy: 2.529960584640503
          entropy_coeff: 0.009999999999999998
          kl: 0.01442217966016395
          policy_loss: -0.017319711794455846
          total_loss: -0.0412733793258667
          vf_explained_var: -0.7000537514686584
          vf_loss: 0.00023455858563971156
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 476000
  iterations_since_restore: 476
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,476,10393.3,476000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-09-17_14-25-55
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 478
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07706045011609605
          cur_lr: 5.000000000000001e-05
          entropy: 2.608019550641378
          entropy_coeff: 0.009999999999999998
          kl: 0.026329895452219452
          policy_loss: -0.030989828954140344
          total_loss: -0.054838336093558204
          vf_explained_var: -0.056962378323078156
          vf_loss: 0.0002026959330186805
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 477000
  iterations_since_restore: 477
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,477,10412.2,477000,0,0,0,996.22




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-09-17_14-26-35
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 2
  episodes_total: 480
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11559067517414401
          cur_lr: 5.000000000000001e-05
          entropy: 2.6306171152326794
          entropy_coeff: 0.009999999999999998
          kl: 0.013846151509619986
          policy_loss: -0.03371066740817494
          total_loss: -0.05829522502091196
          vf_explained_var: 0.15396815538406372
          vf_loss: 0.00012112802272289022
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 478000
  iterations_since_restore: 478
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,478,10451.9,478000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-09-17_14-26-57
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 481
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11559067517414401
          cur_lr: 5.000000000000001e-05
          entropy: 2.222937375969357
          entropy_coeff: 0.009999999999999998
          kl: 0.009838743810675036
          policy_loss: 0.040407797694206236
          total_loss: 0.019806493446230887
          vf_explained_var: -1.0
          vf_loss: 0.0004908008054169638
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 479000
  iterations_since_restore: 479
  node_ip: 10.55.229.87
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,479,10474,479000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-09-17_14-27-17
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 482
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11559067517414401
          cur_lr: 5.000000000000001e-05
          entropy: 2.7194816430409747
          entropy_coeff: 0.009999999999999998
          kl: 0.010276735528798842
          policy_loss: -0.026586795018778905
          total_loss: -0.052460826685031256
          vf_explained_var: -0.7933923602104187
          vf_loss: 0.00013289139666691578
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 480000
  iterations_since_restore: 480
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,480,10493.8,480000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-09-17_14-27-35
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 483
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11559067517414401
          cur_lr: 5.000000000000001e-05
          entropy: 2.532224522696601
          entropy_coeff: 0.009999999999999998
          kl: 0.009572884214486403
          policy_loss: 0.07335307912694083
          total_loss: 0.049358748189277116
          vf_explained_var: -0.659435510635376
          vf_loss: 0.00022137700000510248
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 481000
  iterations_since_restore: 481
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,481,10512.2,481000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-09-17_14-27-54
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 484
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11559067517414401
          cur_lr: 5.000000000000001e-05
          entropy: 2.7705679125256006
          entropy_coeff: 0.009999999999999998
          kl: 0.003458475189043379
          policy_loss: -0.11276661819881863
          total_loss: -0.14005896498759587
          vf_explained_var: -0.8868134617805481
          vf_loss: 1.3564649416113954e-05
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 482000
  iterations_since_restore: 482
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,482,10531,482000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-09-17_14-28-13
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 485
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.057795337587072004
          cur_lr: 5.000000000000001e-05
          entropy: 2.7303270472420587
          entropy_coeff: 0.009999999999999998
          kl: 0.005018119422552757
          policy_loss: -0.040701075482906565
          total_loss: -0.06766454618838098
          vf_explained_var: -0.6052854061126709
          vf_loss: 4.9775112408193915e-05
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 483000
  iterations_since_restore: 483
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,483,10550.4,483000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-09-17_14-28-33
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 486
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.057795337587072004
          cur_lr: 5.000000000000001e-05
          entropy: 2.6537790245480006
          entropy_coeff: 0.009999999999999998
          kl: 0.004838647955655478
          policy_loss: -0.12603062391281128
          total_loss: -0.1522703405883577
          vf_explained_var: -0.3105494976043701
          vf_loss: 1.8421845735853517e-05
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 484000
  iterations_since_restore: 484
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,484,10569.3,484000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-09-17_14-28-52
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 487
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.4633710357877945
          entropy_coeff: 0.009999999999999998
          kl: 0.007538227922844953
          policy_loss: -0.024493449843592113
          total_loss: -0.04884784316851033
          vf_explained_var: -0.7779942750930786
          vf_loss: 6.147812873172775e-05
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 485000
  iterations_since_restore: 485
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,485,10588,485000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-09-17_14-29-13
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 488
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.659690613216824
          entropy_coeff: 0.009999999999999998
          kl: 0.008674489410090401
          policy_loss: -0.07953675190607706
          total_loss: -0.10586324623889393
          vf_explained_var: -0.642365574836731
          vf_loss: 1.9739901952359814e-05
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 486000
  iterations_since_restore: 486
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,486,10609,486000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-09-17_14-29-33
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 489
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.6575949986775718
          entropy_coeff: 0.009999999999999998
          kl: 0.005933227208472265
          policy_loss: -0.08475043657753202
          total_loss: -0.11113936818308301
          vf_explained_var: -0.7554783225059509
          vf_loss: 1.556075374840778e-05
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 487000
  iterations_since_restore: 487
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,487,10629.3,487000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-09-17_14-29-50
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 490
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.6679013464185926
          entropy_coeff: 0.009999999999999998
          kl: 0.007346755615161705
          policy_loss: -0.08879726309743192
          total_loss: -0.11524526675542196
          vf_explained_var: -0.5384464263916016
          vf_loss: 1.870533336740563e-05
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488000
  iterations_since_restore: 488
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,488,10646.8,488000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-09-17_14-30-10
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 491
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.5624329646428428
          entropy_coeff: 0.009999999999999998
          kl: 0.006035462830244005
          policy_loss: -0.13362680342462327
          total_loss: -0.15904437344935204
          vf_explained_var: -0.5286230444908142
          vf_loss: 3.234958097285319e-05
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489000
  iterations_since_restore: 489
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,489,10666.1,489000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-09-17_14-30-31
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 492
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.574409286181132
          entropy_coeff: 0.009999999999999998
          kl: 0.005479410861412205
          policy_loss: -0.10474177218145794
          total_loss: -0.1302899176047908
          vf_explained_var: -1.0
          vf_loss: 3.760535637411522e-05
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 490000
  iterations_since_restore: 490
  node_ip: 10.55.229.87
  num_healthy_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,490,10686.9,490000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-09-17_14-30-51
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 493
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3967163032955594
          entropy_coeff: 0.009999999999999998
          kl: 0.009997724336196593
          policy_loss: -0.06847777428726355
          total_loss: -0.09212523508403037
          vf_explained_var: -0.4756556749343872
          vf_loss: 3.079117469850543e-05
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 491000
  iterations_since_restore: 491
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,491,10706.8,491000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-09-17_14-31-11
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 494
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.43504974577162
          entropy_coeff: 0.009999999999999998
          kl: 0.014520948128394403
          policy_loss: -0.14249040939741664
          total_loss: -0.1663231227133009
          vf_explained_var: -0.9905728697776794
          vf_loss: 9.816317857864002e-05
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000
  iterations_since_restore: 492
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,492,10727,492000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-09-17_14-31-35
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 495
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.2952616307470532
          entropy_coeff: 0.009999999999999998
          kl: 0.010162124795809073
          policy_loss: -0.08760170488514835
          total_loss: -0.1100768692791462
          vf_explained_var: -0.9758238196372986
          vf_loss: 0.00018378891068601257
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 493000
  iterations_since_restore: 493
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,493,10750.9,493000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-09-17_14-31-58
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 496
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.3500713215933904
          entropy_coeff: 0.009999999999999998
          kl: 0.010675736513986919
          policy_loss: -0.07692220840189191
          total_loss: -0.09984098896384239
          vf_explained_var: -0.8298978805541992
          vf_loss: 0.0002734283760623334
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 494000
  iterations_since_restore: 494
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,494,10773.8,494000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-09-17_14-32-22
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 497
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 2.1240750352541604
          entropy_coeff: 0.009999999999999998
          kl: 0.00987833212644623
          policy_loss: -0.1602950721979141
          total_loss: -0.181168339567052
          vf_explained_var: -0.5964279174804688
          vf_loss: 8.202235488473283e-05
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 495000
  iterations_since_restore: 495
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,495,10797.7,495000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-09-17_14-32-43
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 498
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 1.5814486066500346
          entropy_coeff: 0.009999999999999998
          kl: 0.011007736970502396
          policy_loss: -0.18446284814013375
          total_loss: -0.1962224945425987
          vf_explained_var: -0.8123257756233215
          vf_loss: 0.0037367444210960657
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 496000
  iterations_since_restore: 496
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,496,10819.1,496000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-09-17_14-33-05
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 499
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028897668793536002
          cur_lr: 5.000000000000001e-05
          entropy: 0.24283957423435318
          entropy_coeff: 0.009999999999999998
          kl: 0.0013429703280526913
          policy_loss: -0.09572512159744899
          total_loss: -0.09709762020243538
          vf_explained_var: -0.5334974527359009
          vf_loss: 0.001017086977588772
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497000
  iterations_since_restore: 497
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,497,10840.5,497000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-09-17_14-33-25
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 500
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014448834396768001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8756245930989583
          entropy_coeff: 0.009999999999999998
          kl: 0.0265183360355597
          policy_loss: -0.03877437436539266
          total_loss: -0.056355342786345214
          vf_explained_var: -0.9598748087882996
          vf_loss: 0.0007921161420300551
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 498000
  iterations_since_restore: 498
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,498,10861.1,498000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-09-17_14-33-49
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 501
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021673251595152015
          cur_lr: 5.000000000000001e-05
          entropy: 0.1123921562400129
          entropy_coeff: 0.009999999999999998
          kl: 0.02409174821528434
          policy_loss: -0.1626317759354909
          total_loss: -0.16312926250199478
          vf_explained_var: 0.3735807240009308
          vf_loss: 0.00010429106647886025
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499000
  iterations_since_restore: 499
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,499,10885.2,499000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-09-17_14-34-13
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 502
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03250987739272802
          cur_lr: 5.000000000000001e-05
          entropy: 2.2875504904323156
          entropy_coeff: 0.009999999999999998
          kl: 0.02089970023622314
          policy_loss: -0.16423510693841512
          total_loss: -0.1859818811217944
          vf_explained_var: -0.9391829371452332
          vf_loss: 0.00044928059930195255
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 500000
  iterations_since_restore: 500
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,500,10909.2,500000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-09-17_14-34-36
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 503
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.048764816089092014
          cur_lr: 5.000000000000001e-05
          entropy: 2.0539753913879393
          entropy_coeff: 0.009999999999999998
          kl: 0.015339988381818397
          policy_loss: -0.062015817542042995
          total_loss: -0.08107718692885504
          vf_explained_var: -1.0
          vf_loss: 0.000730333416908656
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 501000
  iterations_since_restore: 501
  node_ip: 10.55.229.87
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,501,10932,501000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-09-17_14-34-57
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 504
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.048764816089092014
          cur_lr: 5.000000000000001e-05
          entropy: 2.3727987421883476
          entropy_coeff: 0.009999999999999998
          kl: 0.010604600789105727
          policy_loss: -0.09837819350262483
          total_loss: -0.12129114282627901
          vf_explained_var: -0.9954457879066467
          vf_loss: 0.0002979075215989724
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502000
  iterations_since_restore: 502
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,502,10952.4,502000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-09-17_14-35-16
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 505
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.048764816089092014
          cur_lr: 5.000000000000001e-05
          entropy: 2.2688973016209073
          entropy_coeff: 0.009999999999999998
          kl: 0.021660072181967395
          policy_loss: -0.07573002295361625
          total_loss: -0.09708800833258364
          vf_explained_var: -0.9270698428153992
          vf_loss: 0.00027473837237468817
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 503000
  iterations_since_restore: 503
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,503,10972.1,503000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-09-17_14-35-38
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 506
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07314722413363806
          cur_lr: 5.000000000000001e-05
          entropy: 2.410827491018507
          entropy_coeff: 0.009999999999999998
          kl: 0.013777060229978186
          policy_loss: -0.047209196082419816
          total_loss: -0.06999300896293587
          vf_explained_var: -1.0
          vf_loss: 0.0003167087758710194
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 504000
  iterations_since_restore: 504
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,504,10993.7,504000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-09-17_14-35-59
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 507
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07314722413363806
          cur_lr: 5.000000000000001e-05
          entropy: 2.257451672024197
          entropy_coeff: 0.009999999999999998
          kl: 0.015691791195648227
          policy_loss: -0.039591306603203216
          total_loss: -0.0607847751946085
          vf_explained_var: -1.0
          vf_loss: 0.00023323815548792481
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 505000
  iterations_since_restore: 505
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,505,11014.4,505000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-09-17_14-36-21
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 508
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07314722413363806
          cur_lr: 5.000000000000001e-05
          entropy: 1.9233611947960323
          entropy_coeff: 0.009999999999999998
          kl: 0.013907272384975045
          policy_loss: 0.0033873151573869916
          total_loss: -0.014283562948306402
          vf_explained_var: -1.0
          vf_loss: 0.0005454546092854192
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 506000
  iterations_since_restore: 506
  node_ip: 10.55.229.87
  num_healt

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,506,11036.2,506000,0,0,0,996.24


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-09-17_14-36-43
  done: false
  episode_len_mean: 996.24
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 509
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07314722413363806
          cur_lr: 5.000000000000001e-05
          entropy: 2.2816174387931825
          entropy_coeff: 0.009999999999999998
          kl: 0.014828978943188728
          policy_loss: -0.008391682048224741
          total_loss: -0.02972927795102199
          vf_explained_var: -1.0
          vf_loss: 0.0003938780385295912
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000
  iterations_since_restore: 507
  node_ip: 10.55.229.87
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,507,11058.8,507000,0,0,0,996.24




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-09-17_14-37-21
  done: false
  episode_len_mean: 995.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 510
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07314722413363806
          cur_lr: 5.000000000000001e-05
          entropy: 1.8791213572025298
          entropy_coeff: 0.009999999999999998
          kl: 0.02064562115323468
          policy_loss: -0.02596113748020596
          total_loss: -0.042469850844807096
          vf_explained_var: -0.5232135653495789
          vf_loss: 0.0007723312712591401
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000
  iterations_since_restore: 508
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,508,11096.3,508000,0,0,0,995.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-09-17_14-37-43
  done: false
  episode_len_mean: 995.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 511
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10972083620045707
          cur_lr: 5.000000000000001e-05
          entropy: 2.4790609147813587
          entropy_coeff: 0.009999999999999998
          kl: 0.006438130755853777
          policy_loss: -0.12469698049955898
          total_loss: -0.148728389872445
          vf_explained_var: -1.0
          vf_loss: 5.280212327407854e-05
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 509000
  iterations_since_restore: 509
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,509,11118.4,509000,0,0,0,995.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-09-17_14-38-06
  done: false
  episode_len_mean: 995.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 512
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10972083620045707
          cur_lr: 5.000000000000001e-05
          entropy: 2.4895880222320557
          entropy_coeff: 0.009999999999999998
          kl: 0.0049258923161618505
          policy_loss: -0.07473957373036279
          total_loss: -0.09902638809548484
          vf_explained_var: -1.0
          vf_loss: 6.859269940630636e-05
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 510000
  iterations_since_restore: 510
  node_ip: 10.55.229.87
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,510,11141.1,510000,0,0,0,995.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-09-17_14-38-27
  done: false
  episode_len_mean: 995.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 513
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.054860418100228535
          cur_lr: 5.000000000000001e-05
          entropy: 2.414458303981357
          entropy_coeff: 0.009999999999999998
          kl: 0.013396531897963188
          policy_loss: -0.07797566051077512
          total_loss: -0.10100639752215809
          vf_explained_var: -1.0
          vf_loss: 0.0003789059021073626
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 511000
  iterations_since_restore: 511
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,511,11162.2,511000,0,0,0,995.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-09-17_14-38-52
  done: false
  episode_len_mean: 995.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 514
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.054860418100228535
          cur_lr: 5.000000000000001e-05
          entropy: 2.098707440164354
          entropy_coeff: 0.009999999999999998
          kl: 0.015318832278247789
          policy_loss: -0.0530853271484375
          total_loss: -0.07188038358257877
          vf_explained_var: -0.5699307322502136
          vf_loss: 0.0013516180747602548
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 512000
  iterations_since_restore: 512
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,512,11187.1,512000,0,0,0,995.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-09-17_14-39-13
  done: false
  episode_len_mean: 995.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 515
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.054860418100228535
          cur_lr: 5.000000000000001e-05
          entropy: 1.8372570070955487
          entropy_coeff: 0.009999999999999998
          kl: 0.03093460881979912
          policy_loss: -0.17179959648185306
          total_loss: -0.18054712288495567
          vf_explained_var: -0.71117103099823
          vf_loss: 0.00792796040075498
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 513000
  iterations_since_restore: 513
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,513,11208,513000,0,0,0,995.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-09-17_14-39-34
  done: false
  episode_len_mean: 995.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 516
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08229062715034278
          cur_lr: 5.000000000000001e-05
          entropy: 2.523766507042779
          entropy_coeff: 0.009999999999999998
          kl: 0.008941336024957992
          policy_loss: -0.18787387824720805
          total_loss: -0.2122943252325058
          vf_explained_var: -1.0
          vf_loss: 8.142728818509366e-05
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 514000
  iterations_since_restore: 514
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,514,11229.4,514000,0,0,0,995.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-09-17_14-39-56
  done: false
  episode_len_mean: 995.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 517
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08229062715034278
          cur_lr: 5.000000000000001e-05
          entropy: 2.068241188261244
          entropy_coeff: 0.009999999999999998
          kl: 0.01085640508429893
          policy_loss: -0.26840681706865627
          total_loss: -0.2879281679375304
          vf_explained_var: -0.38491329550743103
          vf_loss: 0.0002676834096746461
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515000
  iterations_since_restore: 515
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,515,11251.7,515000,0,0,0,995.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-09-17_14-40-19
  done: false
  episode_len_mean: 995.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 518
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08229062715034278
          cur_lr: 5.000000000000001e-05
          entropy: 2.1974939187367757
          entropy_coeff: 0.009999999999999998
          kl: 0.011563463167984513
          policy_loss: -0.18115492347213957
          total_loss: -0.20171380821201537
          vf_explained_var: -0.332089900970459
          vf_loss: 0.0004644889357425402
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 516000
  iterations_since_restore: 516
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,516,11274,516000,0,0,0,995.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-09-17_14-40-41
  done: false
  episode_len_mean: 995.06
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 519
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08229062715034278
          cur_lr: 5.000000000000001e-05
          entropy: 2.4779151333702933
          entropy_coeff: 0.009999999999999998
          kl: 0.011497218053667603
          policy_loss: -0.12234580053223504
          total_loss: -0.14598980396986008
          vf_explained_var: -0.8401803970336914
          vf_loss: 0.00018903409630487052
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517000
  iterations_since_restore: 517
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,517,11295.9,517000,0,0,0,995.06


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-09-17_14-41-02
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 520
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08229062715034278
          cur_lr: 5.000000000000001e-05
          entropy: 1.704946920606825
          entropy_coeff: 0.009999999999999998
          kl: 0.07124854204078235
          policy_loss: -0.12227423335942957
          total_loss: -0.1319421600550413
          vf_explained_var: -0.07987243682146072
          vf_loss: 0.0015184556028947959
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000
  iterations_since_restore: 518
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,518,11317.6,518000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-09-17_14-41-25
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 521
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1234359407255142
          cur_lr: 5.000000000000001e-05
          entropy: 0.8973988455202845
          entropy_coeff: 0.009999999999999998
          kl: 0.015577599081932809
          policy_loss: -0.09629991501569748
          total_loss: -0.10259836945268842
          vf_explained_var: 0.16199597716331482
          vf_loss: 0.0007526995249160488
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000
  iterations_since_restore: 519
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,519,11340.5,519000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-09-17_14-41-46
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 522
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1234359407255142
          cur_lr: 5.000000000000001e-05
          entropy: 2.195314470926921
          entropy_coeff: 0.009999999999999998
          kl: 0.01324005730149417
          policy_loss: -0.17414653673768044
          total_loss: -0.1940800810439719
          vf_explained_var: -1.0
          vf_loss: 0.0003853025142032291
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520000
  iterations_since_restore: 520
  node_ip: 10.55.229.87
  num_healthy_wor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,520,11361.5,520000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-09-17_14-42-08
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 523
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1234359407255142
          cur_lr: 5.000000000000001e-05
          entropy: 0.8175113452805414
          entropy_coeff: 0.009999999999999998
          kl: 0.01860023109294894
          policy_loss: -0.07597437041501204
          total_loss: -0.08110989307363828
          vf_explained_var: -0.015234099701046944
          vf_loss: 0.0007436546587592198
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 521000
  iterations_since_restore: 521
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,521,11382.9,521000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-09-17_14-42-29
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 524
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1234359407255142
          cur_lr: 5.000000000000001e-05
          entropy: 1.4047685994042292
          entropy_coeff: 0.009999999999999998
          kl: 0.014901117214772351
          policy_loss: -0.1150808156364494
          total_loss: -0.1264477574163013
          vf_explained_var: -0.7836671471595764
          vf_loss: 0.0008414103778906994
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522000
  iterations_since_restore: 522
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,522,11404.1,522000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-09-17_14-42-51
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 525
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1234359407255142
          cur_lr: 5.000000000000001e-05
          entropy: 2.4792133543226456
          entropy_coeff: 0.009999999999999998
          kl: 0.008815336229194868
          policy_loss: -0.27544548941983116
          total_loss: -0.2991116808520423
          vf_explained_var: -0.7397075891494751
          vf_loss: 3.7813520329008394e-05
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 523000
  iterations_since_restore: 523
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,523,11425.3,523000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-09-17_14-43-13
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 526
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1234359407255142
          cur_lr: 5.000000000000001e-05
          entropy: 2.2648269799020557
          entropy_coeff: 0.009999999999999998
          kl: 0.014548087717880204
          policy_loss: -0.16275349772638745
          total_loss: -0.18338604552878274
          vf_explained_var: -0.9773444533348083
          vf_loss: 0.00021996668100554315
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524000
  iterations_since_restore: 524
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,524,11447.5,524000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-09-17_14-43-35
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 527
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1234359407255142
          cur_lr: 5.000000000000001e-05
          entropy: 1.1383634971247778
          entropy_coeff: 0.009999999999999998
          kl: 0.02017972596666038
          policy_loss: -0.1308349298934142
          total_loss: -0.1385013859305117
          vf_explained_var: -0.06490620225667953
          vf_loss: 0.0012262752027406047
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 525000
  iterations_since_restore: 525
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,525,11469.7,525000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-09-17_14-43-57
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 528
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18515391108827126
          cur_lr: 5.000000000000001e-05
          entropy: 1.3795015619860755
          entropy_coeff: 0.009999999999999998
          kl: 0.00855152556486891
          policy_loss: -0.01771056759688589
          total_loss: -0.028734240473972425
          vf_explained_var: -0.894822359085083
          vf_loss: 0.0011879969815102717
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526000
  iterations_since_restore: 526
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,526,11491.6,526000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-09-17_14-44-20
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 529
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18515391108827126
          cur_lr: 5.000000000000001e-05
          entropy: 0.9290863275527954
          entropy_coeff: 0.009999999999999998
          kl: 0.006387024646745942
          policy_loss: -0.09421847992473179
          total_loss: -0.10124279643512435
          vf_explained_var: -0.9773922562599182
          vf_loss: 0.0010839628670105917
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 527000
  iterations_since_restore: 527
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,527,11514.8,527000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-09-17_14-44-41
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 530
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18515391108827126
          cur_lr: 5.000000000000001e-05
          entropy: 2.0623185250494216
          entropy_coeff: 0.009999999999999998
          kl: 0.010004336372164675
          policy_loss: -0.22488393800126183
          total_loss: -0.2432866154445542
          vf_explained_var: -0.9726977944374084
          vf_loss: 0.0003681669875732041
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 528000
  iterations_since_restore: 528
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,528,11535.8,528000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-09-17_14-45-03
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 531
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18515391108827126
          cur_lr: 5.000000000000001e-05
          entropy: 1.8973294019699096
          entropy_coeff: 0.009999999999999998
          kl: 0.006386706152951089
          policy_loss: -0.18379337506161797
          total_loss: -0.20128959599468443
          vf_explained_var: -0.913356602191925
          vf_loss: 0.00029454916449241054
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 529000
  iterations_since_restore: 529
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,529,11557,529000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-09-17_14-45-24
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 532
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18515391108827126
          cur_lr: 5.000000000000001e-05
          entropy: 2.4488715012868245
          entropy_coeff: 0.009999999999999998
          kl: 0.005910731846372702
          policy_loss: -0.20014284915394254
          total_loss: -0.22351381066772674
          vf_explained_var: -1.0
          vf_loss: 2.335799504685888e-05
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530000
  iterations_since_restore: 530
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,530,11578.1,530000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-09-17_14-45-47
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 533
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18515391108827126
          cur_lr: 5.000000000000001e-05
          entropy: 2.0753524078263177
          entropy_coeff: 0.009999999999999998
          kl: 0.00798942462980467
          policy_loss: -0.08025503497984675
          total_loss: -0.09929667914079296
          vf_explained_var: -1.0
          vf_loss: 0.000232606830185331
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 531000
  iterations_since_restore: 531
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,531,11601.3,531000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-09-17_14-46-07
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 534
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18515391108827126
          cur_lr: 5.000000000000001e-05
          entropy: 2.390712547302246
          entropy_coeff: 0.009999999999999998
          kl: 0.009250104669687372
          policy_loss: -0.11301558423373434
          total_loss: -0.13516692477795814
          vf_explained_var: -0.9830273985862732
          vf_loss: 4.309134158878199e-05
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532000
  iterations_since_restore: 532
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,532,11621.1,532000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-09-17_14-46-25
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 535
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18515391108827126
          cur_lr: 5.000000000000001e-05
          entropy: 0.885311077038447
          entropy_coeff: 0.009999999999999998
          kl: 0.012338240892783622
          policy_loss: -0.196664766387807
          total_loss: -0.20232960118187798
          vf_explained_var: -1.0
          vf_loss: 0.0009037997825847318
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000
  iterations_since_restore: 533
  node_ip: 10.55.229.87
  num_healthy_wo

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,533,11639.4,533000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-09-17_14-46-42
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 536
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18515391108827126
          cur_lr: 5.000000000000001e-05
          entropy: 2.0952178716659544
          entropy_coeff: 0.009999999999999998
          kl: 0.009276583804162868
          policy_loss: -0.1489173885020945
          total_loss: -0.1679145872592926
          vf_explained_var: -0.7203199863433838
          vf_loss: 0.00023738427694802846
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534000
  iterations_since_restore: 534
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,534,11656.4,534000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-09-17_14-47-10
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 537
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18515391108827126
          cur_lr: 5.000000000000001e-05
          entropy: 0.7617089119222429
          entropy_coeff: 0.009999999999999998
          kl: 0.004793290986213705
          policy_loss: -0.10736397732463147
          total_loss: -0.11350528146657679
          vf_explained_var: -1.0
          vf_loss: 0.0005882877057754538
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 535000
  iterations_since_restore: 535
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,535,11684.4,535000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-09-17_14-47-35
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 538
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09257695554413563
          cur_lr: 5.000000000000001e-05
          entropy: 2.351236253314548
          entropy_coeff: 0.009999999999999998
          kl: 0.014455971279027875
          policy_loss: -0.1067020133137703
          total_loss: -0.12879225611686707
          vf_explained_var: -0.8103535175323486
          vf_loss: 8.383099086333661e-05
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 536000
  iterations_since_restore: 536
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,536,11709.3,536000,0,0,0,996.29


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-09-17_14-47-57
  done: false
  episode_len_mean: 996.29
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 539
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09257695554413563
          cur_lr: 5.000000000000001e-05
          entropy: 0.7377292288674249
          entropy_coeff: 0.009999999999999998
          kl: 0.006250222009695961
          policy_loss: -0.1487932365387678
          total_loss: -0.15487270669804679
          vf_explained_var: -0.7610600590705872
          vf_loss: 0.0007191960043403217
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 537000
  iterations_since_restore: 537
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,537,11731.1,537000,0,0,0,996.29




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-09-17_14-48-36
  done: false
  episode_len_mean: 995.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 540
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09257695554413563
          cur_lr: 5.000000000000001e-05
          entropy: 0.7678143733077579
          entropy_coeff: 0.009999999999999998
          kl: 0.02178269642830632
          policy_loss: -0.15460666401518716
          total_loss: -0.1592309546139505
          vf_explained_var: -0.13869254291057587
          vf_loss: 0.001037276129419398
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 538000
  iterations_since_restore: 538
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,538,11770.3,538000,0,0,0,995


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-09-17_14-48-58
  done: false
  episode_len_mean: 995.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 541
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 0.7911307440863715
          entropy_coeff: 0.009999999999999998
          kl: 0.014709507420804225
          policy_loss: -0.07979834406740136
          total_loss: -0.08480490555779802
          vf_explained_var: -0.6022223234176636
          vf_loss: 0.0008621025772299618
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 539000
  iterations_since_restore: 539
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,539,11792.6,539000,0,0,0,995


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-09-17_14-49-19
  done: false
  episode_len_mean: 995.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 542
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 0.7235541462898254
          entropy_coeff: 0.009999999999999998
          kl: 0.01860723252093616
          policy_loss: -0.014249851223495272
          total_loss: -0.017568881809711456
          vf_explained_var: -0.743344783782959
          vf_loss: 0.001332609424005366
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 540000
  iterations_since_restore: 540
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,540,11813.4,540000,0,0,0,995


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-09-17_14-49-39
  done: false
  episode_len_mean: 995.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 543
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 0.6689455648263295
          entropy_coeff: 0.009999999999999998
          kl: 0.015742677608566208
          policy_loss: -0.030821738060977724
          total_loss: -0.0342298588818974
          vf_explained_var: -0.6894143223762512
          vf_loss: 0.0010952198748580284
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 541000
  iterations_since_restore: 541
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,541,11833.3,541000,0,0,0,995


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-09-17_14-49-59
  done: false
  episode_len_mean: 995.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 544
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 1.8515688869688245
          entropy_coeff: 0.009999999999999998
          kl: 0.015258789845663203
          policy_loss: -0.028889907482597562
          total_loss: -0.04468412467588981
          vf_explained_var: -0.9466232061386108
          vf_loss: 0.0006025551268572195
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 542000
  iterations_since_restore: 542
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,542,11853.3,542000,0,0,0,995


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-09-17_14-50-20
  done: false
  episode_len_mean: 995.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 545
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 0.954607120487425
          entropy_coeff: 0.009999999999999998
          kl: 0.01497211437031691
          policy_loss: -0.08294960514952739
          total_loss: -0.08971437104046345
          vf_explained_var: -0.9520131349563599
          vf_loss: 0.0007021948643442657
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 543000
  iterations_since_restore: 543
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,543,11873.5,543000,0,0,0,995


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-09-17_14-50-39
  done: false
  episode_len_mean: 995.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 546
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 0.8373499797450171
          entropy_coeff: 0.009999999999999998
          kl: 0.011584133319510246
          policy_loss: -0.04968231775694423
          total_loss: -0.05561392286585437
          vf_explained_var: -0.7966041564941406
          vf_loss: 0.0008332585712196305
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544000
  iterations_since_restore: 544
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,544,11893,544000,0,0,0,995


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-09-17_14-51-00
  done: false
  episode_len_mean: 995.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 547
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 0.8040902296702067
          entropy_coeff: 0.009999999999999998
          kl: 0.01367623891965053
          policy_loss: -0.020471386363108954
          total_loss: -0.02548679821193218
          vf_explained_var: -0.7668613195419312
          vf_loss: 0.0011263327514623395
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 545000
  iterations_since_restore: 545
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,545,11913.8,545000,0,0,0,995


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-09-17_14-51-20
  done: false
  episode_len_mean: 995.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 548
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 0.6901408056418101
          entropy_coeff: 0.009999999999999998
          kl: 0.012269038215945353
          policy_loss: -0.03012615351213349
          total_loss: -0.03465963610344463
          vf_explained_var: -0.08161798119544983
          vf_loss: 0.0006641816212019573
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 546000
  iterations_since_restore: 546
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,546,11933.8,546000,0,0,0,995


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-09-17_14-51-41
  done: false
  episode_len_mean: 995.0
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 549
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 0.6317944818072849
          entropy_coeff: 0.009999999999999998
          kl: 0.01215432860326781
          policy_loss: -0.03766509820189741
          total_loss: -0.0416075684544113
          vf_explained_var: -0.9124952554702759
          vf_loss: 0.0006876569703712852
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 547000
  iterations_since_restore: 547
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,547,11955.1,547000,0,0,0,995


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-09-17_14-52-01
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 550
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13886543331620343
          cur_lr: 5.000000000000001e-05
          entropy: 0.2654536149568028
          entropy_coeff: 0.009999999999999998
          kl: 0.003950761348992958
          policy_loss: -0.10433581781884034
          total_loss: -0.1063335683196783
          vf_explained_var: 0.646876335144043
          vf_loss: 0.00010816338976332594
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 548000
  iterations_since_restore: 548
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,548,11974.8,548000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-09-17_14-52-22
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 551
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06943271665810172
          cur_lr: 5.000000000000001e-05
          entropy: 0.8609856426715851
          entropy_coeff: 0.009999999999999998
          kl: 0.019660310190528482
          policy_loss: -0.03563034472366174
          total_loss: -0.04182898998260498
          vf_explained_var: -0.9549055099487305
          vf_loss: 0.001046142238839012
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 549000
  iterations_since_restore: 549
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,549,11995.6,549000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-09-17_14-52-42
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 552
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06943271665810172
          cur_lr: 5.000000000000001e-05
          entropy: 1.0520539187722735
          entropy_coeff: 0.009999999999999998
          kl: 0.01713677066602687
          policy_loss: -0.03643836176229848
          total_loss: -0.0450913673473729
          vf_explained_var: -1.0
          vf_loss: 0.0006776819008842318
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 550000
  iterations_since_restore: 550
  node_ip: 10.55.229.87
  num_healthy_wo

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,550,12016.2,550000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-09-17_14-53-02
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 553
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06943271665810172
          cur_lr: 5.000000000000001e-05
          entropy: 0.641075274017122
          entropy_coeff: 0.009999999999999998
          kl: 0.037220112427298425
          policy_loss: -0.056643428405125935
          total_loss: -0.059711064563857184
          vf_explained_var: 0.3616931438446045
          vf_loss: 0.0007588251616754051
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 551000
  iterations_since_restore: 551
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,551,12035.8,551000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-09-17_14-53-22
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 554
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10414907498715255
          cur_lr: 5.000000000000001e-05
          entropy: 0.5263188011116452
          entropy_coeff: 0.009999999999999998
          kl: 0.01387713567277468
          policy_loss: 0.019991359776920743
          total_loss: 0.01660028232468499
          vf_explained_var: 0.17517971992492676
          vf_loss: 0.0004268202887841552
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 552000
  iterations_since_restore: 552
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,552,12055.8,552000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-09-17_14-53-43
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 555
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10414907498715255
          cur_lr: 5.000000000000001e-05
          entropy: 1.7334414137734306
          entropy_coeff: 0.009999999999999998
          kl: 0.03657967502369408
          policy_loss: -0.031096112148629295
          total_loss: -0.043453501330481634
          vf_explained_var: -0.36016806960105896
          vf_loss: 0.001167285750206146
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 553000
  iterations_since_restore: 553
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,553,12077,553000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-09-17_14-54-04
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 556
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15622361248072886
          cur_lr: 5.000000000000001e-05
          entropy: 2.1225010368559096
          entropy_coeff: 0.009999999999999998
          kl: 0.024614808451999256
          policy_loss: -0.026257587348421416
          total_loss: -0.043419818000661005
          vf_explained_var: -0.6154891848564148
          vf_loss: 0.0002173658935741211
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554000
  iterations_since_restore: 554
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,554,12097.2,554000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-09-17_14-54-24
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 557
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.003426965077718
          entropy_coeff: 0.009999999999999998
          kl: 0.015472207837929833
          policy_loss: -0.01391542282783323
          total_loss: -0.030122434265083736
          vf_explained_var: -0.25269582867622375
          vf_loss: 0.00020157211882987111
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 555000
  iterations_since_restore: 555
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,555,12117.8,555000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-09-17_14-54-44
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 558
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.2122255947854783
          entropy_coeff: 0.009999999999999998
          kl: 0.010823173225033027
          policy_loss: 0.05427639186382294
          total_loss: 0.03507832280463642
          vf_explained_var: -0.273141086101532
          vf_loss: 0.0003879358381204333
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 556000
  iterations_since_restore: 556
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,556,12137.1,556000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-09-17_14-55-05
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 559
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.243946894009908
          entropy_coeff: 0.009999999999999998
          kl: 0.011401337344944472
          policy_loss: -0.021929761477642588
          total_loss: -0.030740551981661056
          vf_explained_var: -0.39025434851646423
          vf_loss: 0.000956939901677995
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 557000
  iterations_since_restore: 557
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,557,12158.6,557000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-09-17_14-55-25
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 560
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.470904509226481
          entropy_coeff: 0.009999999999999998
          kl: 0.01065743543801342
          policy_loss: 0.04823159151193168
          total_loss: 0.026112957919637362
          vf_explained_var: -0.6041109561920166
          vf_loss: 9.299604847304485e-05
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 558000
  iterations_since_restore: 558
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,558,12178.4,558000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-09-17_14-55-44
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 561
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.435788165198432
          entropy_coeff: 0.009999999999999998
          kl: 0.0144648268887157
          policy_loss: -0.0027323000443478427
          total_loss: -0.023533248321877587
          vf_explained_var: -0.810911238193512
          vf_loss: 0.00016731032554970524
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 559000
  iterations_since_restore: 559
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,559,12197.4,559000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-09-17_14-56-03
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 562
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.638985766304864
          entropy_coeff: 0.009999999999999998
          kl: 0.011118142319694138
          policy_loss: -0.03158053654349512
          total_loss: -0.05522711219059096
          vf_explained_var: -0.7559643387794495
          vf_loss: 0.0001379084200531603
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 560000
  iterations_since_restore: 560
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,560,12216.3,560000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-09-17_14-56-23
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 563
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.6260332187016804
          entropy_coeff: 0.009999999999999998
          kl: 0.011774787196701435
          policy_loss: -0.12032005590283208
          total_loss: -0.14371976773771974
          vf_explained_var: -0.17836827039718628
          vf_loss: 0.00010137118519600739
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 561000
  iterations_since_restore: 561
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,561,12236.3,561000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-09-17_14-56-41
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 564
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.6208938439687093
          entropy_coeff: 0.009999999999999998
          kl: 0.01687875997952533
          policy_loss: -0.06003773294182287
          total_loss: -0.08212081626471546
          vf_explained_var: -0.4520702660083771
          vf_loss: 0.00017056520960573432
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562000
  iterations_since_restore: 562
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,562,12254.7,562000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-09-17_14-56-59
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 565
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.679516617457072
          entropy_coeff: 0.009999999999999998
          kl: 0.008578888717271797
          policy_loss: -0.12596239373087884
          total_loss: -0.150668649127086
          vf_explained_var: -0.4396516680717468
          vf_loss: 7.857323933219757e-05
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 563000
  iterations_since_restore: 563
  node_ip: 10.55.229.87
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,563,12272.7,563000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-09-17_14-57-19
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 566
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.6522661871380278
          entropy_coeff: 0.009999999999999998
          kl: 0.007334050864540111
          policy_loss: 0.020488780964579848
          total_loss: -0.004192402751909362
          vf_explained_var: -0.24534155428409576
          vf_loss: 0.0001228506003422404
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 564000
  iterations_since_restore: 564
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,564,12292.2,564000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-09-17_14-57-41
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 567
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.61730444961124
          entropy_coeff: 0.009999999999999998
          kl: 0.010009049059739105
          policy_loss: -0.0854279124074512
          total_loss: -0.1091393085817496
          vf_explained_var: -0.673344612121582
          vf_loss: 0.00011617147324614053
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 565000
  iterations_since_restore: 565
  node_ip: 10.55.229.87
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,565,12313.9,565000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-09-17_14-58-01
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 568
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 0.5829479161236021
          entropy_coeff: 0.009999999999999998
          kl: 0.015653202063300498
          policy_loss: 0.048568521646989715
          total_loss: 0.04717894546273682
          vf_explained_var: -0.5312014818191528
          vf_loss: 0.0007718025868396378
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 566000
  iterations_since_restore: 566
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,566,12334.4,566000,0,0,0,996.3


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-09-17_14-58-20
  done: false
  episode_len_mean: 996.3
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 569
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.6968694819344416
          entropy_coeff: 0.009999999999999998
          kl: 0.006839514190083622
          policy_loss: -0.002756153502398067
          total_loss: -0.027967258335815537
          vf_explained_var: -0.30175158381462097
          vf_loss: 0.00015484710609143805
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 567000
  iterations_since_restore: 567
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,567,12353.1,567000,0,0,0,996.3




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-09-17_14-58-57
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 570
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.662915439075894
          entropy_coeff: 0.009999999999999998
          kl: 0.00923913383267681
          policy_loss: -0.0557419040447308
          total_loss: -0.08005404944221178
          vf_explained_var: -0.4709133505821228
          vf_loss: 0.00015195438543034673
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 568000
  iterations_since_restore: 568
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,568,12389.8,568000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-09-17_14-59-16
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 571
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.3286849008666146
          entropy_coeff: 0.009999999999999998
          kl: 0.00920355159561418
          policy_loss: -0.04772463823772139
          total_loss: -0.06861335424085459
          vf_explained_var: -0.648851215839386
          vf_loss: 0.0002414150358409491
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000
  iterations_since_restore: 569
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,569,12408.7,569000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-09-17_14-59-34
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 572
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.4096210598945618
          entropy_coeff: 0.009999999999999998
          kl: 0.016860103145273798
          policy_loss: -0.05947778556081984
          total_loss: -0.07921777756677734
          vf_explained_var: 0.13797637820243835
          vf_loss: 0.0004052987065102166
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 570000
  iterations_since_restore: 570
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,570,12426.9,570000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-09-17_14-59-52
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 573
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.2923766096433003
          entropy_coeff: 0.009999999999999998
          kl: 0.012703305511585149
          policy_loss: -0.05121866514285405
          total_loss: -0.07060125006569756
          vf_explained_var: -0.5271025896072388
          vf_loss: 0.0005643452199497763
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 571000
  iterations_since_restore: 571
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,571,12445.3,571000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-09-17_15-00-14
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 574
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.1066295597288343
          entropy_coeff: 0.009999999999999998
          kl: 0.011156854152626
          policy_loss: -0.05363951875931687
          total_loss: -0.07150725482238664
          vf_explained_var: -0.44370681047439575
          vf_loss: 0.0005841124371297863
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 572000
  iterations_since_restore: 572
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,572,12467.4,572000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-09-17_15-00-35
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 575
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.437464944521586
          entropy_coeff: 0.009999999999999998
          kl: 0.009436071905337507
          policy_loss: 0.009918878310256535
          total_loss: -0.012006838454140557
          vf_explained_var: -0.5372171401977539
          vf_loss: 0.00023772601744869663
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 573000
  iterations_since_restore: 573
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,573,12487.9,573000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-09-17_15-00-54
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 576
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.6806440035502117
          entropy_coeff: 0.009999999999999998
          kl: 0.009903574385635001
          policy_loss: -0.05665203873068094
          total_loss: -0.08098321214525236
          vf_explained_var: 0.2119816690683365
          vf_loss: 0.00015450532212134426
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 574000
  iterations_since_restore: 574
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,574,12506.9,574000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-09-17_15-01-17
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 577
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.3822740965419347
          entropy_coeff: 0.009999999999999998
          kl: 0.009018431425815425
          policy_loss: -0.029204171412210496
          total_loss: -0.05067782394277553
          vf_explained_var: -0.5335296392440796
          vf_loss: 0.00023575125695616509
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 575000
  iterations_since_restore: 575
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,575,12530,575000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-09-17_15-01-37
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 578
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.6236688799328274
          entropy_coeff: 0.009999999999999998
          kl: 0.013658974994022414
          policy_loss: -0.025104049717386563
          total_loss: -0.04792577007578479
          vf_explained_var: -0.0030788329895585775
          vf_loss: 0.00021418574868208552
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 576000
  iterations_since_restore: 576
  node_ip: 10.55.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,576,12549.7,576000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-09-17_15-01-56
  done: false
  episode_len_mean: 994.99
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 579
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.5879409896002876
          entropy_coeff: 0.009999999999999998
          kl: 0.012180231074549401
          policy_loss: -0.0639421786285109
          total_loss: -0.08679967768904236
          vf_explained_var: -0.7602620720863342
          vf_loss: 0.00016765052544845578
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained: 577000
  iterations_since_restore: 577
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,577,12569,577000,0,0,0,994.99


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-09-17_15-02-16
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 580
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.561847350332472
          entropy_coeff: 0.009999999999999998
          kl: 0.008449757957618962
          policy_loss: -0.06048896002272765
          total_loss: -0.08388709702218572
          vf_explained_var: -0.9057321548461914
          vf_loss: 0.00024025826066160033
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained: 578000
  iterations_since_restore: 578
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,578,12588.8,578000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-09-17_15-02-35
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 581
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.5544453620910645
          entropy_coeff: 0.009999999999999998
          kl: 0.014705247967498133
          policy_loss: -0.0020080693893962435
          total_loss: -0.023952976862589518
          vf_explained_var: -0.653937578201294
          vf_loss: 0.00015358297769125785
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trained: 579000
  iterations_since_restore: 579
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,579,12607.2,579000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-09-17_15-02-54
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 582
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.4211009793811376
          entropy_coeff: 0.009999999999999998
          kl: 0.012393349810374982
          policy_loss: -0.06340384669601917
          total_loss: -0.08447545766830444
          vf_explained_var: -0.9444121718406677
          vf_loss: 0.00023519565274909837
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained: 580000
  iterations_since_restore: 580
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,580,12626.5,580000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-09-17_15-03-15
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 583
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.4844477732976276
          entropy_coeff: 0.009999999999999998
          kl: 0.011927712522387926
          policy_loss: -0.038445991981360644
          total_loss: -0.060356528953545624
          vf_explained_var: -0.8684854507446289
          vf_loss: 0.0001388530551796268
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 581000
  iterations_since_restore: 581
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,581,12647.8,581000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-09-17_15-03-35
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 584
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.1515368766254848
          entropy_coeff: 0.009999999999999998
          kl: 0.011797362284721362
          policy_loss: -0.04243981407748328
          total_loss: -0.06104813673430019
          vf_explained_var: -0.6377089619636536
          vf_loss: 0.00014250494568841533
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained: 582000
  iterations_since_restore: 582
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,582,12667.3,582000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-09-17_15-03-54
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 585
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.015105558766259
          entropy_coeff: 0.009999999999999998
          kl: 0.013210085631212584
          policy_loss: -0.014351967308256362
          total_loss: -0.0312975169883834
          vf_explained_var: -0.48095715045928955
          vf_loss: 0.00010991447124979459
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained: 583000
  iterations_since_restore: 583
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,583,12686.8,583000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-09-17_15-04-16
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 586
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.6115366134378646
          entropy_coeff: 0.009999999999999998
          kl: 0.011135177780480671
          policy_loss: -0.03881690005461375
          total_loss: -0.0453215077933338
          vf_explained_var: -0.8548425436019897
          vf_loss: 0.007001394313637219
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained: 584000
  iterations_since_restore: 584
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,584,12708.1,584000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-09-17_15-04-38
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 587
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 0.46764312761887494
          entropy_coeff: 0.009999999999999998
          kl: 0.00804734628477964
          policy_loss: -0.11979177089201079
          total_loss: -0.1224026215573152
          vf_explained_var: 0.08727512508630753
          vf_loss: 0.00017980172784493536
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 585000
  iterations_since_restore: 585
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,585,12730.3,585000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-09-17_15-05-00
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 588
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.937202090687222
          entropy_coeff: 0.009999999999999998
          kl: 0.013394588243448267
          policy_loss: -0.056467995021699205
          total_loss: -0.07255176478582952
          vf_explained_var: -0.989919900894165
          vf_loss: 0.00014942453041536888
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 586000
  iterations_since_restore: 586
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,586,12752.5,586000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-09-17_15-05-20
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 589
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.942072652445899
          entropy_coeff: 0.009999999999999998
          kl: 0.009602703024522501
          policy_loss: -0.01627119295299053
          total_loss: -0.033303797927995524
          vf_explained_var: -1.0
          vf_loss: 0.00013786977922589157
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_trained: 587000
  iterations_since_restore: 587
  node_ip: 10.55.229.87
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,587,12772.6,587000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-09-17_15-05-42
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 590
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.794098260667589
          entropy_coeff: 0.009999999999999998
          kl: 0.019870763640083014
          policy_loss: 0.04544288623664114
          total_loss: 0.03240635204646322
          vf_explained_var: -0.520483672618866
          vf_loss: 0.00024802348802445016
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained: 588000
  iterations_since_restore: 588
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,588,12793.9,588000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-09-17_15-06-02
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 591
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.9245053781403436
          entropy_coeff: 0.009999999999999998
          kl: 0.01201015139334416
          policy_loss: -0.008407964929938316
          total_loss: -0.024594831301106346
          vf_explained_var: -0.6329765915870667
          vf_loss: 0.0002437839453907347
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained: 589000
  iterations_since_restore: 589
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,589,12814.5,589000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-09-17_15-06-23
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 592
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.0191497166951495
          entropy_coeff: 0.009999999999999998
          kl: 0.013381965056140999
          policy_loss: -0.054703261372115876
          total_loss: -0.07157926445619928
          vf_explained_var: -0.42569002509117126
          vf_loss: 0.00017962583903378496
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 590000
  iterations_since_restore: 590
  node_ip: 10.55.22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,590,12835.4,590000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-09-17_15-06-44
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 593
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.8990597208340962
          entropy_coeff: 0.009999999999999998
          kl: 0.007496890559809937
          policy_loss: -0.192388563685947
          total_loss: -0.20940285589959887
          vf_explained_var: -0.4315352141857147
          vf_loss: 0.0002195154593007626
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 591000
  iterations_since_restore: 591
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,591,12856.2,591000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-09-17_15-07-07
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 594
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.069639119837019
          entropy_coeff: 0.009999999999999998
          kl: 0.012086422710730155
          policy_loss: -0.07060315617256695
          total_loss: -0.08818884185618825
          vf_explained_var: -0.4247291684150696
          vf_loss: 0.0002784233813549185
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained: 592000
  iterations_since_restore: 592
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,592,12879.2,592000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-09-17_15-07-27
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 595
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.3776824686262343
          entropy_coeff: 0.009999999999999998
          kl: 0.010981608097891933
          policy_loss: -0.019446026699410545
          total_loss: -0.04057107518116633
          vf_explained_var: -1.0
          vf_loss: 7.83981899076025e-05
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_trained: 593000
  iterations_since_restore: 593
  node_ip: 10.55.229.87
  num_healthy

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,593,12899.2,593000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-09-17_15-07-47
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 596
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.5093060546451147
          entropy_coeff: 0.009999999999999998
          kl: 0.010385804305844026
          policy_loss: -0.0016360011365678575
          total_loss: -0.024176591663207445
          vf_explained_var: -0.5735158920288086
          vf_loss: 0.00011870783599887444
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 594000
  iterations_since_restore: 594
  node_ip: 10.55.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,594,12918.6,594000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-09-17_15-08-05
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 597
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.553683869043986
          entropy_coeff: 0.009999999999999998
          kl: 0.00979498646881988
          policy_loss: -0.010744216665625573
          total_loss: -0.03385964184999466
          vf_explained_var: -0.09917036443948746
          vf_loss: 0.0001261012529413266
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trained: 595000
  iterations_since_restore: 595
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,595,12937.3,595000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-09-17_15-08-24
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 598
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.4570093340343897
          entropy_coeff: 0.009999999999999998
          kl: 0.008676989995587907
          policy_loss: -0.049535786815815504
          total_loss: -0.07181915177239312
          vf_explained_var: -0.5687093734741211
          vf_loss: 0.0002534004253094382
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained: 596000
  iterations_since_restore: 596
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,596,12956.2,596000,0,0,0,996.22


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-09-17_15-08-44
  done: false
  episode_len_mean: 996.22
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 599
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.595793525377909
          entropy_coeff: 0.009999999999999998
          kl: 0.007124383418420719
          policy_loss: -0.05374619174334738
          total_loss: -0.07776749456922213
          vf_explained_var: -0.36658981442451477
          vf_loss: 0.000267137118081943
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained: 597000
  iterations_since_restore: 597
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,597,12976.2,597000,0,0,0,996.22




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-09-17_15-09-22
  done: false
  episode_len_mean: 994.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 600
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 2.2846919430626764
          entropy_coeff: 0.009999999999999998
          kl: 0.008957065748146873
          policy_loss: -0.0056526063217057125
          total_loss: -0.026175793260335922
          vf_explained_var: -0.5172091722488403
          vf_loss: 0.00022477210923170786
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 598000
  iterations_since_restore: 598
  node_ip: 10.55.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,598,13014.4,598000,0,0,0,994.68


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-09-17_15-09-42
  done: false
  episode_len_mean: 994.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 601
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.7717611650625864
          entropy_coeff: 0.009999999999999998
          kl: 0.007485471240383133
          policy_loss: -0.08361969300442272
          total_loss: -0.09821708343095249
          vf_explained_var: -0.45560699701309204
          vf_loss: 0.001366108347140956
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_trained: 599000
  iterations_since_restore: 599
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,599,13033.4,599000,0,0,0,994.68


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-09-17_15-10-02
  done: false
  episode_len_mean: 994.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 602
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 0.6003358678685294
          entropy_coeff: 0.009999999999999998
          kl: 0.007843107760293656
          policy_loss: -0.049132282535235085
          total_loss: -0.052390221175220276
          vf_explained_var: 0.275616317987442
          vf_loss: 0.0009075035971666997
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained: 600000
  iterations_since_restore: 600
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,600,13053.4,600000,0,0,0,994.68


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-09-17_15-10-22
  done: false
  episode_len_mean: 994.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 603
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23433541872109329
          cur_lr: 5.000000000000001e-05
          entropy: 1.6099478383858998
          entropy_coeff: 0.009999999999999998
          kl: 0.027440828041624756
          policy_loss: 0.005270146330197652
          total_loss: -0.003962841692070166
          vf_explained_var: -0.2864294946193695
          vf_loss: 0.00043613168462697
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_trained: 601000
  iterations_since_restore: 601
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,601,13073.9,601000,0,0,0,994.68


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-09-17_15-10-41
  done: false
  episode_len_mean: 994.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 604
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.8094873289267221
          entropy_coeff: 0.009999999999999998
          kl: 0.013238318592022028
          policy_loss: -0.014341400087707573
          total_loss: -0.02726730431119601
          vf_explained_var: -0.6744744181632996
          vf_loss: 0.000515656068071419
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 602000
  iterations_since_restore: 602
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,602,13093.2,602000,0,0,0,994.68


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-09-17_15-11-01
  done: false
  episode_len_mean: 994.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 605
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.343361417452494
          entropy_coeff: 0.009999999999999998
          kl: 0.010670575880834024
          policy_loss: -0.023000370628303953
          total_loss: -0.042472365436454614
          vf_explained_var: -0.9692794680595398
          vf_loss: 0.0002108774666238686
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 603000
  iterations_since_restore: 603
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,603,13112.2,603000,0,0,0,994.68


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-09-17_15-11-20
  done: false
  episode_len_mean: 994.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 606
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.8239725510279337
          entropy_coeff: 0.009999999999999998
          kl: 0.010174691003151083
          policy_loss: -0.03002727230389913
          total_loss: -0.044353344498409164
          vf_explained_var: -1.0
          vf_loss: 0.0003372146250007467
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained: 604000
  iterations_since_restore: 604
  node_ip: 10.55.229.87
  num_healthy_w

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,604,13131.7,604000,0,0,0,994.68


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-09-17_15-11-40
  done: false
  episode_len_mean: 994.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 607
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.9665326277414958
          entropy_coeff: 0.009999999999999998
          kl: 0.008023482379280366
          policy_loss: -0.10874253283772203
          total_loss: -0.1253662694659498
          vf_explained_var: -0.5337090492248535
          vf_loss: 0.00022130912798780223
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained: 605000
  iterations_since_restore: 605
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,605,13151.3,605000,0,0,0,994.68


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-09-17_15-12-01
  done: false
  episode_len_mean: 994.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 608
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.8831373148494297
          entropy_coeff: 0.009999999999999998
          kl: 0.012034564292482959
          policy_loss: -0.05707103562437826
          total_loss: -0.07129117554674545
          vf_explained_var: -0.9624158143997192
          vf_loss: 0.00038104851733401627
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 606000
  iterations_since_restore: 606
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,606,13172.1,606000,0,0,0,994.68


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-09-17_15-12-21
  done: false
  episode_len_mean: 994.68
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 609
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.908911289109124
          entropy_coeff: 0.009999999999999998
          kl: 0.008700483171523013
          policy_loss: -0.02344395470701986
          total_loss: -0.039295391117533045
          vf_explained_var: -1.0
          vf_loss: 0.000179429244841837
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained: 607000
  iterations_since_restore: 607
  node_ip: 10.55.229.87
  num_healthy_wor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,607,13192.8,607000,0,0,0,994.68


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-09-17_15-12-41
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 610
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.0050632264879016
          entropy_coeff: 0.009999999999999998
          kl: 0.010143041920149799
          policy_loss: -0.07034894915090667
          total_loss: -0.08659224038322767
          vf_explained_var: -0.47916820645332336
          vf_loss: 0.00024203125536183102
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 608000
  iterations_since_restore: 608
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,608,13213,608000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-09-17_15-13-00
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 611
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.95047347413169
          entropy_coeff: 0.009999999999999998
          kl: 0.0065267620502766814
          policy_loss: -0.02191620207288199
          total_loss: -0.03896803831060727
          vf_explained_var: -0.99717777967453
          vf_loss: 0.00015871861374358156
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 609000
  iterations_since_restore: 609
  node_ip: 10.55.229.87
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,609,13231.6,609000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-09-17_15-13-21
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 612
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.8743288199106851
          entropy_coeff: 0.009999999999999998
          kl: 0.008473264410027999
          policy_loss: -0.036567417076892324
          total_loss: -0.05216535493317578
          vf_explained_var: -0.8357949256896973
          vf_loss: 0.00016697000650714875
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 610000
  iterations_since_restore: 610
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,610,13252.7,610000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-09-17_15-13-41
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 613
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.558781698015001
          entropy_coeff: 0.009999999999999998
          kl: 0.011093133741892795
          policy_loss: -0.015492795366379949
          total_loss: -0.026833877795272403
          vf_explained_var: -0.3471444547176361
          vf_loss: 0.0003474629256137026
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 611000
  iterations_since_restore: 611
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,611,13272.3,611000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-09-17_15-14-01
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 614
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.8025052044126721
          entropy_coeff: 0.009999999999999998
          kl: 0.007276749543781255
          policy_loss: -0.030085295252501965
          total_loss: -0.04530604863539338
          vf_explained_var: -0.9242734909057617
          vf_loss: 0.0002464956810096434
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 612000
  iterations_since_restore: 612
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,612,13292,612000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-09-17_15-14-20
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 615
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.143802983231015
          entropy_coeff: 0.009999999999999998
          kl: 0.010350396475879523
          policy_loss: -0.0073479376319381924
          total_loss: -0.024828801345494058
          vf_explained_var: -0.5854500532150269
          vf_loss: 0.00031896983077280716
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 613000
  iterations_since_restore: 613
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,613,13311.2,613000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-09-17_15-14-41
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 616
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.9065478218926324
          entropy_coeff: 0.009999999999999998
          kl: 0.00905052266634772
          policy_loss: -0.046612685815327695
          total_loss: -0.062254552356898786
          vf_explained_var: -0.376857727766037
          vf_loss: 0.00024232451849254884
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 614000
  iterations_since_restore: 614
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,614,13332.3,614000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-09-17_15-15-00
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 617
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.1047958029641047
          entropy_coeff: 0.009999999999999998
          kl: 0.0081212785927125
          policy_loss: -0.03937645718041394
          total_loss: -0.05742339655343029
          vf_explained_var: -1.0
          vf_loss: 0.0001463623731372839
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000
  iterations_since_restore: 615
  node_ip: 10.55.229.87
  num_healthy_work

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,615,13351.1,615000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-09-17_15-15-21
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 618
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.1661844624413384
          entropy_coeff: 0.009999999999999998
          kl: 0.008399159175235878
          policy_loss: -0.027757311736543975
          total_loss: -0.0460852497153812
          vf_explained_var: -0.7034638524055481
          vf_loss: 0.00038157408304060583
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 616000
  iterations_since_restore: 616
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,616,13372.6,616000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-09-17_15-15-39
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 619
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.30189934041765
          entropy_coeff: 0.009999999999999998
          kl: 0.015487548235357895
          policy_loss: -0.053915222651428646
          total_loss: -0.07122652588619126
          vf_explained_var: -0.520687460899353
          vf_loss: 0.0002637674685198969
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 617000
  iterations_since_restore: 617
  node_ip: 10.55.229.87
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,617,13390.6,617000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-09-17_15-15-58
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 620
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.2906272093455
          entropy_coeff: 0.009999999999999998
          kl: 0.009003857999061655
          policy_loss: -0.029310744255781174
          total_loss: -0.0486683185522755
          vf_explained_var: 0.179457426071167
          vf_loss: 0.00038381497921970246
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 618000
  iterations_since_restore: 618
  node_ip: 10.55.229.87
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,618,13409.3,618000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-09-17_15-16-18
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 621
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.0467131866349115
          entropy_coeff: 0.009999999999999998
          kl: 0.014447031487912751
          policy_loss: -0.019892229346765412
          total_loss: -0.03506556269195345
          vf_explained_var: -0.5950038433074951
          vf_loss: 0.00021562114133202056
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 619000
  iterations_since_restore: 619
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,619,13428.7,619000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-09-17_15-16-38
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 622
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.6362840745184157
          entropy_coeff: 0.009999999999999998
          kl: 0.010366497405645663
          policy_loss: -0.05202982794079516
          total_loss: -0.06420402884897258
          vf_explained_var: -0.5723239779472351
          vf_loss: 0.0005447824217198003
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 620000
  iterations_since_restore: 620
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,620,13449.2,620000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-09-17_15-16-58
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 623
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.8079773730701871
          entropy_coeff: 0.009999999999999998
          kl: 0.008665497892807444
          policy_loss: 0.042749636206361984
          total_loss: 0.028195935984452566
          vf_explained_var: -0.7251244187355042
          vf_loss: 0.00048012418806643435
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 621000
  iterations_since_restore: 621
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,621,13469.5,621000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-09-17_15-17-20
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 624
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.6903169631958008
          entropy_coeff: 0.009999999999999998
          kl: 0.00804577425236997
          policy_loss: -0.07216337685369784
          total_loss: -0.08588722712463803
          vf_explained_var: -0.5314889550209045
          vf_loss: 0.00035120366087034604
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000
  iterations_since_restore: 622
  node_ip: 10.55.229.87
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,622,13490.5,622000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-09-17_15-17-39
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 625
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 1.7924632840686374
          entropy_coeff: 0.009999999999999998
          kl: 0.008722540144668204
          policy_loss: -0.011481832298967574
          total_loss: -0.025997241752015218
          vf_explained_var: -0.815427303314209
          vf_loss: 0.00034322322870947473
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 623000
  iterations_since_restore: 623
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,623,13510.4,623000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-09-17_15-17-58
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 626
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.2699043247434827
          entropy_coeff: 0.009999999999999998
          kl: 0.016940446363769564
          policy_loss: -0.022351605279578104
          total_loss: -0.038779724554883106
          vf_explained_var: -0.7463254928588867
          vf_loss: 0.0003163064607482132
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 624000
  iterations_since_restore: 624
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,624,13529,624000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-09-17_15-18-18
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 627
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.5248950242996218
          entropy_coeff: 0.009999999999999998
          kl: 0.00893357142409123
          policy_loss: -0.04565817545064622
          total_loss: -0.0676669912205802
          vf_explained_var: -0.2424076497554779
          vf_loss: 9.99552391640969e-05
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 625000
  iterations_since_restore: 625
  node_ip: 10.55.229.87
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,625,13548.5,625000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-09-17_15-18-36
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 628
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.35150312808164
          cur_lr: 5.000000000000001e-05
          entropy: 2.3442056205537583
          entropy_coeff: 0.009999999999999998
          kl: 0.02453553810416851
          policy_loss: -0.03145414027902815
          total_loss: -0.046011487560139765
          vf_explained_var: 0.16238680481910706
          vf_loss: 0.00026039058268704845
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 626000
  iterations_since_restore: 626
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,626,13567.1,626000,0,0,0,995.86


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-09-17_15-18-55
  done: false
  episode_len_mean: 995.86
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 629
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5272546921224599
          cur_lr: 5.000000000000001e-05
          entropy: 2.2957657231224906
          entropy_coeff: 0.009999999999999998
          kl: 0.008441367446275667
          policy_loss: 0.005048244363731808
          total_loss: -0.013256866816017362
          vf_explained_var: 0.10139671713113785
          vf_loss: 0.00020179340879167285
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 627000
  iterations_since_restore: 627
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,627,13586.1,627000,0,0,0,995.86




Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-09-17_15-19-34
  done: false
  episode_len_mean: 994.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 630
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5272546921224599
          cur_lr: 5.000000000000001e-05
          entropy: 2.39002615875668
          entropy_coeff: 0.009999999999999998
          kl: 0.007359042539799729
          policy_loss: -0.015869485752450094
          total_loss: -0.03552881677945455
          vf_explained_var: -0.9452159404754639
          vf_loss: 0.00036083914310691196
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 628000
  iterations_since_restore: 628
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,628,13624.5,628000,0,0,0,994.28


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-09-17_15-19-55
  done: false
  episode_len_mean: 994.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 631
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5272546921224599
          cur_lr: 5.000000000000001e-05
          entropy: 2.265276512834761
          entropy_coeff: 0.009999999999999998
          kl: 0.010661316963738166
          policy_loss: -0.05249709515935845
          total_loss: -0.06898265373375681
          vf_explained_var: -0.36950933933258057
          vf_loss: 0.000545975045234728
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629000
  iterations_since_restore: 629
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,629,13645.7,629000,0,0,0,994.28


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-09-17_15-20-17
  done: false
  episode_len_mean: 994.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 632
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5272546921224599
          cur_lr: 5.000000000000001e-05
          entropy: 2.3928192536036175
          entropy_coeff: 0.009999999999999998
          kl: 0.010182009176607526
          policy_loss: -0.02345834941499763
          total_loss: -0.04180133019884427
          vf_explained_var: -0.25337058305740356
          vf_loss: 0.000216701432923906
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 630000
  iterations_since_restore: 630
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,630,13667.3,630000,0,0,0,994.28


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-09-17_15-20-35
  done: false
  episode_len_mean: 994.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 633
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5272546921224599
          cur_lr: 5.000000000000001e-05
          entropy: 2.5993638118108113
          entropy_coeff: 0.009999999999999998
          kl: 0.004293481761843094
          policy_loss: 0.0009138883091509342
          total_loss: -0.022762518272631697
          vf_explained_var: -0.990725040435791
          vf_loss: 5.347474131111893e-05
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 631000
  iterations_since_restore: 631
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,631,13685.7,631000,0,0,0,994.28


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-09-17_15-20-54
  done: false
  episode_len_mean: 994.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 634
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 2.3587077842818367
          entropy_coeff: 0.009999999999999998
          kl: 0.01273201702745218
          policy_loss: -0.04231714105440511
          total_loss: -0.0623312340842353
          vf_explained_var: -0.4190864861011505
          vf_loss: 0.00021647656445919792
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 632000
  iterations_since_restore: 632
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,632,13705,632000,0,0,0,994.28


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-09-17_15-21-13
  done: false
  episode_len_mean: 994.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 635
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 2.4608979596032037
          entropy_coeff: 0.009999999999999998
          kl: 0.008634583544225628
          policy_loss: -0.03762772394758132
          total_loss: -0.05980458172659079
          vf_explained_var: -0.8246957659721375
          vf_loss: 0.00015581204417003391
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 633000
  iterations_since_restore: 633
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,633,13723.8,633000,0,0,0,994.28


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-09-17_15-21-32
  done: false
  episode_len_mean: 994.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 636
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 2.38253909084532
          entropy_coeff: 0.009999999999999998
          kl: 0.006870255911525814
          policy_loss: -0.04268805090751913
          total_loss: -0.06451757564726802
          vf_explained_var: -0.9140740036964417
          vf_loss: 0.00018467738618836745
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 634000
  iterations_since_restore: 634
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,634,13742.6,634000,0,0,0,994.28


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-09-17_15-21-51
  done: false
  episode_len_mean: 994.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 637
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 2.196476656860775
          entropy_coeff: 0.009999999999999998
          kl: 0.00816672773045929
          policy_loss: -0.04276911638961691
          total_loss: -0.06213736095362239
          vf_explained_var: -0.8443229794502258
          vf_loss: 0.0004435501694994552
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 635000
  iterations_since_restore: 635
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,635,13761.3,635000,0,0,0,994.28


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-09-17_15-22-10
  done: false
  episode_len_mean: 994.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 638
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 1.8070839332209694
          entropy_coeff: 0.009999999999999998
          kl: 0.006263105699599652
          policy_loss: -0.05201959659655889
          total_loss: -0.06790085658431053
          vf_explained_var: -0.6799647212028503
          vf_loss: 0.0005384505777328741
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 636000
  iterations_since_restore: 636
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,636,13780.1,636000,0,0,0,994.28


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-09-17_15-22-30
  done: false
  episode_len_mean: 994.28
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 639
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 2.2358662459585403
          entropy_coeff: 0.009999999999999998
          kl: 0.013344613066009951
          policy_loss: -0.03770637199696567
          total_loss: -0.0562845289396743
          vf_explained_var: -0.409164160490036
          vf_loss: 0.00026249919807115574
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 637000
  iterations_since_restore: 637
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,637,13800.3,637000,0,0,0,994.28


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-09-17_15-22-50
  done: false
  episode_len_mean: 995.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 640
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 2.364290421538883
          entropy_coeff: 0.009999999999999998
          kl: 0.009694638870082489
          policy_loss: -0.0496038358244631
          total_loss: -0.07032453273940417
          vf_explained_var: -0.9922937154769897
          vf_loss: 0.0003664333167080258
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 638000
  iterations_since_restore: 638
  node_ip: 10.55.229.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,638,13820.1,638000,0,0,0,995.57


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-09-17_15-23-10
  done: false
  episode_len_mean: 995.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 641
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 2.0288484162754483
          entropy_coeff: 0.009999999999999998
          kl: 0.011651414426192927
          policy_loss: -0.03863444566312763
          total_loss: -0.05554631436243653
          vf_explained_var: -0.5497144460678101
          vf_loss: 0.00030498205339123764
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 639000
  iterations_since_restore: 639
  node_ip: 10.55.229.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,639,13840,639000,0,0,0,995.57


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-09-17_15-23-28
  done: false
  episode_len_mean: 995.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 642
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 2.374757209089067
          entropy_coeff: 0.009999999999999998
          kl: 0.006361447502823648
          policy_loss: -0.04299831498000357
          total_loss: -0.06485267180121607
          vf_explained_var: -0.9601516127586365
          vf_loss: 0.00021616196436298197
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 640000
  iterations_since_restore: 640
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,640,13858.6,640000,0,0,0,995.57


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-09-17_15-23-47
  done: false
  episode_len_mean: 995.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 643
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 2.4976070006688436
          entropy_coeff: 0.009999999999999998
          kl: 0.006320048535288514
          policy_loss: -0.020636477041989565
          total_loss: -0.04383877117393745
          vf_explained_var: -0.7770454287528992
          vf_loss: 0.00010763930925653161
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained: 641000
  iterations_since_restore: 641
  node_ip: 10.55.229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,641,13876.8,641000,0,0,0,995.57


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-09-17_15-24-05
  done: false
  episode_len_mean: 995.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 644
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 2.652866564856635
          entropy_coeff: 0.009999999999999998
          kl: 0.005862735403117369
          policy_loss: -0.05096826437446806
          total_loss: -0.07583956658426258
          vf_explained_var: -0.8788576126098633
          vf_loss: 0.0001117869686267012
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained: 642000
  iterations_since_restore: 642
  node_ip: 10.55.229.87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,642,13895.4,642000,0,0,0,995.57


Result for PPO_my_env_d74d3_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-09-17_15-24-25
  done: false
  episode_len_mean: 995.57
  episode_media: {}
  episode_reward_max: 0.0
  episode_reward_mean: 0.0
  episode_reward_min: 0.0
  episodes_this_iter: 1
  episodes_total: 645
  experiment_id: 07f70cbd5fb847dfafeefd9a680181c0
  hostname: cds2
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.26362734606122995
          cur_lr: 5.000000000000001e-05
          entropy: 1.5354156348440382
          entropy_coeff: 0.009999999999999998
          kl: 0.011451674264380587
          policy_loss: -0.04258376900106668
          total_loss: -0.054573836751903096
          vf_explained_var: -0.7680838704109192
          vf_loss: 0.000345116782101387
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained: 643000
  iterations_since_restore: 643
  node_ip: 10.55.229.8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d74d3_00000,RUNNING,10.55.229.87:333,643,13915.4,643000,0,0,0,995.57
